# GTFS RT & Payments Dashboard  

In [86]:
import pandas as pd
import intake
import altair as alt

In [8]:
catalog = intake.open_catalog('./catalog.yml')

In [13]:
df = pd.read_csv('https://docs.google.com/spreadsheets/d/1qr49azk6p30mp96_7myKoO-Bb_bXMMn5ZzgbL-uPiPw/gviz/tq?tqx=out:csv&sheet=Data')

In [21]:
df = df[df['Potential for GTFS static (Y/N)'] == 'Y']
print(f"The Number of Agencies in CA w/ Potential for static GTFS is {len(df)}")

The Number of Agencies in CA w/ Potential for static GTFS is 224


In [34]:
print(f"The percentage of agencies with static GTFS {df.GTFS.isnull().value_counts(normalize=True)[0] * 100}, although {df.GTFS.value_counts()[0]} agencies are 'not in google'")

The percentage of agencies with static GTFS 99.10714285714286, although 31 agencies are 'not in google'


In [61]:
df['2017_fare_revenue'] = pd.to_numeric(df['2017 fare revenue total'].str.replace("$", "").str.replace(",",""))

In [65]:
def assign_tier(row): 
    if row['Electronic fare program'] == 'Clipper':
        return "tier1clipper"
    elif row['Electronic fare program'] == 'TAPcard':
        return "tier1TAP"
    elif row['Electronic fare program'] == 'Compasscard':
        return "tier1.5compass"
    elif row['2017_fare_revenue'] > 100_000:
        return "tier2"
    else:
        return "tier3"

In [67]:
df['tier'] = df.apply(assign_tier, axis=1)

In [71]:
df.to_excel('agencies_for_gtfs_with_tier.xlsx')

In [72]:
df.tier.value_counts()

tier3             90
tier2             75
tier1TAP          34
tier1clipper      23
tier1.5compass     2
Name: tier, dtype: int64

In [74]:
df['Real-time status'].value_counts()

2.0    71
4.0    70
1.0    40
6.0    27
3.0     8
5.0     8
Name: Real-time status, dtype: int64

In [76]:
mapping = {1: 'has static GTFS and GTFS-rt',
 2:	'has static GTFS and non-standard realtime',
 3:	'no static GTFS and has non-standard realtime',
 4:	'has static GTFS and no realtime data',
 5:	'has static GTFS and GTFS-rt in-process with realtime provider',
 6:	'no static GTFS and no realtime data'
}

In [94]:
mapping_simple = {1: 'has_gtfs_rt',
 2:	'has_rt_non_standard',
 3:	'has_rt_non_standard',
 4:	'no_rt',
 5:	'has_gtfs_rt',
 6:	'no_rt'
}

In [95]:
df['realtime_readable'] = df['Real-time status'].replace(mapping)

In [96]:
df['realtime_simple'] = df['Real-time status'].replace(mapping_simple)

In [97]:
df[['realtime_simple','tier']].value_counts()

realtime_simple      tier          
no_rt                tier3             53
has_rt_non_standard  tier3             32
                     tier2             31
no_rt                tier2             25
has_gtfs_rt          tier2             19
no_rt                tier1TAP          17
has_gtfs_rt          tier1clipper      13
                     tier1TAP           9
has_rt_non_standard  tier1clipper       8
                     tier1TAP           8
has_gtfs_rt          tier3              5
no_rt                tier1clipper       2
has_gtfs_rt          tier1.5compass     2
dtype: int64

In [98]:
alt.Chart(df).mark_bar().encode(
    alt.Y('realtime_simple:N'),
    alt.X('count(realtime_simple):Q')
)


In [101]:
alt.Chart(df).mark_bar().encode(
    alt.Y('realtime_simple:N'),
    alt.X('count(realtime_simple):Q'),
    color='tier:N',
)


In [113]:
df[(df['realtime_simple'] == 'no_rt') & (df['tier'] == 'tier2')]['Agency Name'].values

array(['Banning Pass Transit', 'Camarillo Area Transit',
       'Clovis Transit System', 'Corona Cruiser',
       'El Monte Transportation Division',
       'Arcata & Mad River Transit System', 'Eureka Transit Service',
       'Morongo Basin Transit Authority', 'Desert Roadrunner',
       'Plumas Transit Systems', 'County Express',
       'Siskiyou Transit and General Express', 'Santa Maria Area Transit',
       'South County Transit Link', 'Tulare County Area Transit',
       'Tehama Rural Area eXpress',
       'Yosemite Area Regional Transportation System', 'Grapeline',
       'e-Tran', 'City of Lompoc Transit', 'Imperial Valley Transit',
       'Madera Area Express', 'Get Around Town Express', 'TRACER',
       'Delano Area Rapid Transit'], dtype=object)

In [None]:
df['']