# Organizations GTFS Status Report

In [1]:
from calitp.tables import tbl
from siuba import *
import pandas as pd

import utils



In [69]:
from IPython.display import Markdown, HTML, display_html, display

In [2]:
import altair as alt
from dla_utils import _dla_utils
from shared_utils import styleguide




In [3]:
from calitp import query_sql, magics

In [4]:
pd.set_option("display.max_columns", 100)

In [47]:
pct_gtfs = query_sql(
    """
WITH table AS( 
    SELECT 
    caltrans_district,
    count(*) AS org_count,
    SUM(IF(complete_static_gtfs_coverage = 1, 1, 0)) AS gtfs_static_count,
    SUM(IF(complete_rt_coverage = 1, 1, 0)) AS gtfs_rt_count,
FROM cal-itp-data-infra-staging.staging_views.airtable_orgs_filtered
GROUP BY caltrans_district
ORDER BY caltrans_district
) 

SELECT
    caltrans_district,
    org_count,
    gtfs_static_count,
    gtfs_rt_count,
    100.0 * (table.gtfs_static_count / table.org_count) AS `percent_static`,
    100.0 * (table.gtfs_rt_count / table.org_count) AS `percent_rt`
FROM table 
"""
)



In [46]:
# pct_compliant = query_sql(
# """

# WITH

# complete_status AS (
#     select
#         caltrans_district,
#         count(*) AS `count`
#     from  cal-itp-data-infra-staging.staging_views.airtable_orgs_filtered
#     where (complete_static_gtfs_coverage = 1)
#     group by caltrans_district
# ),

# all_status AS (
#     select
#         caltrans_district,
#         count(*) AS `count`
#     from  cal-itp-data-infra-staging.staging_views.airtable_orgs_filtered   
#     group by caltrans_district
# )

# SELECT
#     caltrans_district,
#     all_status.count as `all_org`,
#     complete_status.count as `gtfs_compliant`,
#     100.0 * (complete_status.count / all_status.count) AS `percent`
# FROM all_status 
# join complete_status 
#     using (caltrans_district)
# ORDER BY caltrans_district
    
# """)

In [7]:
df = query_sql(
"""
WITH new_table AS (
SELECT 
    *,
        CASE 
        WHEN (fares_v2_status LIKE ('%Unknown%') 
              or fares_v2_status is null 
              or fares_v2_status LIKE ('%Blocked%') 
              or fares_v2_status LIKE ('%to Publish%') 
              or fares_v2_status LIKE ('%Need%')) then 0 
        WHEN (fares_v2_status LIKE ('%published%')) then 1
    end AS `fares_v2_status_complete`,
        CASE 
        WHEN (fares_v2_status LIKE ('%Unknown%') or fares_v2_status is null) then 'Unknown'
        WHEN (fares_v2_status LIKE ('%Blocked%') or fares_v2_status LIKE ('%Need%')) then 'Needed'
        WHEN (fares_v2_status LIKE ('%to Publish%') or fares_v2_status LIKE ('%in progress%')) then 'In works'
        WHEN (fares_v2_status LIKE ('%published%')) then 'Published'
    end AS `fares_v2_status2`,
FROM cal-itp-data-infra-staging.staging_views.airtable_orgs_filtered
)

SELECT 
    *,
FROM new_table
""")



In [17]:
##check to see if the new column works
# (df>>filter(_.fares_v2_status_complete == 1)>>select(_.name,
#                                                      _.fares_v2_status,
#                                                     _.fares_v2_status_complete,
#                                                     _.fares_v2_status2))
## works

In [91]:
(_dla_utils.bar_chart_nosubset((df>>group_by(_.caltrans_district)>>count(_.reporting_category)),
                               'caltrans_district',
                               'n',
                               'reporting_category',
                               chart_title='Organizations Reporting Category by District'))

In [92]:
(_dla_utils.bar_chart_nosubset((df>>group_by(_.caltrans_district)>>count(_.organization_type)),
                               'caltrans_district',
                               'n',
                               'organization_type',
                               chart_title='Organizations Type by District'))

## What Percent of Organizations are GTFS Compliant by District?

In [50]:
(_dla_utils.bar_chart_nosubset(pct_gtfs,
                               'caltrans_district',
                               'org_count',
                               'caltrans_district',
                               chart_title='Number of Organizations by District'))

In [52]:
(_dla_utils.bar_chart_nosubset(pct_gtfs,
                               'caltrans_district',
                               'gtfs_static_count',
                               'caltrans_district',
                               chart_title='Number of GTFS Static Compliant Organizations by District'))

In [53]:
(_dla_utils.bar_chart_nosubset(pct_gtfs,
                               'caltrans_district',
                               'gtfs_rt_count',
                               'caltrans_district',
                               chart_title='Number of GTFS RT Compliant Organizations by District'))

In [55]:
(_dla_utils.bar_chart_nosubset(pct_gtfs,
                               'caltrans_district',
                               'percent_static',
                               'caltrans_district',
                               chart_title='Percent of GTFS Static Compliant Organizations by District'))

In [56]:
(_dla_utils.bar_chart_nosubset(pct_gtfs,
                               'caltrans_district',
                               'percent_rt',
                               'caltrans_district',
                               chart_title='Percent of GTFS Static Compliant Organizations by District'))

## Fares v2 Status by District

In [75]:
(display(Markdown(f"There are **{len((df>>filter(_.fares_v2_status_complete==1)))}** "
                  "Organizations with all Fares v2 Status as **fully published**")))

There are **73** Organizations with all Fares v2 Status as **fully published**

In [76]:
(_dla_utils.bar_chart_nosubset((df>>group_by(_.caltrans_district)>>count(_.fares_v2_status2)),
                               'caltrans_district',
                               'n',
                               'fares_v2_status2',
                               chart_title='Fares v2 Status by District'))