In [1]:
import calitp.magics
import pandas as pd
from calitp import *
from calitp.tables import tbl
from siuba import *
import utils

pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)



## Black Cat

In [2]:
# Read in Black Cat: 5311 and 5310 recipients with projects in past 2 years
df_bc = pd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/5311-5310/5311_5310_applicants.parquet", engine="auto"
)

In [3]:
# Clean up organization names
df_bc = utils.organization_cleaning(df_bc, 'organization_name')

In [4]:
# Clean up names to match Transit Stacks 
df_bc["organization_name"] = df_bc["organization_name"].replace(
    {
    
        "Tuolumne County Transit Agency": "Tuolumne County Transit Agency",
        "Omnitrans": "OmniTrans",
        'Eastern Sierra Transit Authority':'Eastern Sierra Transit Authority Community Routes',
        'Fresno County Rural Transit Agency':'Fresno County Rural Transit',
        'El Dorado County Transit Authority': 'El Dorado Transit',
        'Palo Verde Valley Transit Agency':'Palos Verdes Peninsula Transit Authority',
        'Kern Regional Transit':'Kern Transit',
        'Riverside Transit Agency':'Riverside Transit',
        'Tuolumne County Transit Agency ':'Tuolumne County Transit',
    }
)

In [5]:
# Clean up names to match Fare Systems
df_bc["organization_name"] = df_bc["organization_name"].replace(
    {
    
        "Amador Transit": 'Amador Regional Transit System',
        'Calaveras Transit Agency': 'Calaveras Transit',
        'Eastern Sierra Transit Authority Community Routes': 'Eastern Sierra Transit Authority',
        'Fresno County Rural Transit':'Fresno County Rural Transit Agency',
        'Marin County Transit District':'Marin Transit',
         'Redwood Coast Transit Authority':'Redding Area Bus Authority',
        'Riverside Transit':'Riverside Transit Agency',
        'Sunline Transit Agency':'SunLine Transit Agency',
        'Victor Valley Transit Authority':'Victor Valley Transit',
        'Yolo County Transportation District':'Yolobus',
    }
)

## Transit Stacks, Service Components

In [6]:
%%sql -o df_service_components
SELECT 
  service_name,
  product_name,
  component_name
FROM cal-itp-data-infra.mart_transit_database.dim_service_components


Unnamed: 0,service_name,product_name,component_name
0,Nevada County Connects,Swiftly Metronome,
1,Laguna Beach Trolley,Transign,Headsigns
2,Marin Transit,Twin Vision,Headsigns
3,Glendale Beeline,Twin Vision,Headsigns
4,Tri-Valley Wheels,Twin Vision,Headsigns
5,Beach Cities Transit,Twin Vision,Headsigns
6,Anaheim Resort Transportation,Twin Vision,Headsigns
7,Alhambra Community Transit,Twin Vision,Headsigns
8,Long Beach Transit,Trapeze Signage,Headsigns
9,OmniTrans sbX,Trapeze Signage,Headsigns


In [7]:
components_wanted = [
    "Mobile ticketing",
    "Cash Farebox",
    "Fare card system",
    "Ticket Vending Machines",
    "Contactless Payment Validators",
    "Payment processor",
]

In [8]:
# Filter for only fare related components
df_service_components2 = df_service_components[
    df_service_components["component_name"].isin(components_wanted)
]

In [9]:
df_service_components2 = utils.organization_cleaning(df_service_components2, "service_name")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column_wanted] = (


In [64]:
len(df_service_components2)

132

In [65]:
comps = utils.summarize_rows(df_service_components2, 'service_name', 'component_name')

In [66]:
prods = utils.summarize_rows(df_service_components2, 'service_name', 'product_name')

In [67]:
df_service_components2 = pd.merge(
    comps,
    prods,
    how="inner",
    on = ['service_name']
    
)

In [68]:
# Merge transit stacks with BC
m1 = pd.merge(
    df_bc,
    df_service_components2,
    how="left",
    left_on=["organization_name"],
    right_on=["service_name"],
    indicator=True,
    validate='1:m'
)

In [69]:
m1.shape

(177, 6)

In [70]:
m1 = m1.drop(columns = ['_merge', 'service_name'])

In [71]:
m1[['product_name','component_name']] = m1[['product_name','component_name']].fillna('N/A')

In [72]:
len(m2)

177

In [73]:
# df_bc['organization_name'].sort_values().unique().tolist()

## Airtable - Fare Systems 
* Using CSV for now

In [74]:
df_fare = to_snakecase(pd.read_csv("gs://calitp-analytics-data/data-analyses/5311-5310/fare systems-Grid view.csv"))

In [75]:
cols_wanted = ['fare_system','electronic_fare_program','payment_accepted','ticket_pass_sales_methods', 'ticket_media', 'ticket_validation',]

In [76]:
df_fare2 = df_fare[cols_wanted]

In [77]:
df_fare3 = df_fare2.dropna(thresh=2)

In [78]:
df_fare2.shape, df_fare3.shape

((394, 6), (189, 6))

In [79]:
df_fare3.sample(5)

Unnamed: 0,fare_system,electronic_fare_program,payment_accepted,ticket_pass_sales_methods,ticket_media,ticket_validation
65,Willowbrook Demand Response,TAPcard,,,TAPcard,
205,Torrance Transit System,TAPcard,"cash, smartcard","onboardbus, online, phone, vendor, kiosk","TAPcard, tokens",fareboxtap
269,WestCAT,Clipper,"cash, smartcard, benefit","onboardbus, tcenter, kiosk, online, phone, mail, vendor",Clipper,
355,Blue and Gold Fleet,,,"online, station",,
160,San Luis Obispo Regional Transit Authority,,paypal,"onboardbus, online, vendor",customcard,magstrip


In [80]:
# Merge transit stacks with BC
m3 = pd.merge(
    m2,
    df_fare3,
    how="left",
    left_on=["organization_name"],
    right_on=["fare_system"],
    indicator=True,
    
)

In [81]:
m3['_merge'].value_counts()

left_only     146
both           31
right_only      0
Name: _merge, dtype: int64

In [82]:
# left =  m2.loc[m2['_merge'] == 'left_only']
# left['organization_name'].sort_values().unique().tolist()

In [83]:
# df_fare3['fare_system'].sort_values().unique().tolist()

In [86]:
cols_fare_systems = [  'electronic_fare_program', 'payment_accepted',
       'ticket_pass_sales_methods', 'ticket_media', 'ticket_validation',]

In [89]:
m3 = m3.drop(columns = ['fare_system'])

In [89]:
m3 = m3.drop(columns = ['fare_system'])