# 5311 and 5310 Applicant Payment First Look
* An agency in this analysis is a recipient of 5311/5310/or both funds

In [1]:
import calitp.magics
import pandas as pd
import utils
from calitp import *
from calitp.tables import tbl
from siuba import *

pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)



## Black Cat

In [2]:
# Read in Black Cat: 5311 and 5310 recipients with projects in past 2 years
df_bc = pd.read_parquet(
    "gs://calitp-analytics-data/data-analyses/5311-5310/5311_5310_applicants.parquet",
    engine="auto",
)

In [3]:
# Clean up organization names
df_bc = utils.organization_cleaning(df_bc, "organization_name")

In [4]:
# Clean up names to match Transit Stacks
df_bc["organization_name"] = df_bc["organization_name"].replace(
    {
        "Tuolumne County Transit Agency": "Tuolumne County Transit Agency",
        "Omnitrans": "OmniTrans",
        "Eastern Sierra Transit Authority": "Eastern Sierra Transit Authority Community Routes",
        "Fresno County Rural Transit Agency": "Fresno County Rural Transit",
        "El Dorado County Transit Authority": "El Dorado Transit",
        "Palo Verde Valley Transit Agency": "Palos Verdes Peninsula Transit Authority",
        "Kern Regional Transit": "Kern Transit",
        "Riverside Transit Agency": "Riverside Transit",
        "Tuolumne County Transit Agency ": "Tuolumne County Transit",
    }
)

In [5]:
# Clean up names to match Fare Systems
df_bc["organization_name"] = df_bc["organization_name"].replace(
    {
        "Amador Transit": "Amador Regional Transit System",
        "Calaveras Transit Agency": "Calaveras Transit",
        "Eastern Sierra Transit Authority Community Routes": "Eastern Sierra Transit Authority",
        "Fresno County Rural Transit": "Fresno County Rural Transit Agency",
        "Marin County Transit District": "Marin Transit",
        "Redwood Coast Transit Authority": "Redding Area Bus Authority",
        "Sunline Transit Agency": "SunLine Transit Agency",
        "Victor Valley Transit Authority": "Victor Valley Transit",
        "Yolo County Transportation District": "Yolobus",
    }
)

In [6]:
# df_bc["organization_name"].sort_values().unique().tolist()

## Airtable: Transit Stacks, Service Components

In [7]:
%%sql -o df_service_components
SELECT 
  service_name,
  product_name,
  component_name
FROM cal-itp-data-infra.mart_transit_database.dim_service_components


Unnamed: 0,service_name,product_name,component_name
0,Nevada County Connects,Swiftly Metronome,
1,Laguna Beach Trolley,Transign,Headsigns
2,Tri-Valley Wheels,Twin Vision,Headsigns
3,Marin Transit,Twin Vision,Headsigns
4,Anaheim Resort Transportation,Twin Vision,Headsigns
5,Glendale Beeline,Twin Vision,Headsigns
6,Alhambra Community Transit,Twin Vision,Headsigns
7,Beach Cities Transit,Twin Vision,Headsigns
8,OmniTrans,Trapeze Signage,Headsigns
9,SJRTD Local,Trapeze Signage,Headsigns


In [55]:
# df_service_components["component_name"].sort_values().unique()

In [9]:
# Add prefix
df_service_components = df_service_components.add_prefix("service_components_")

In [10]:
# Clean organization name
df_service_components = utils.organization_cleaning(
    df_service_components, "service_components_service_name"
)

In [11]:
# df_service_components["service_components_service_name"].sort_values().unique().tolist()

In [12]:
# List of fare & CAD/AVL related components
fare_comps = [
    "Mobile ticketing",
    "Cash Farebox",
    "Fare card system",
    "Ticket Vending Machines",
    "Contactless Payment Validators",
    "Payment processor",
]

In [13]:
# DF for fare related components
df_fare = utils.service_comps_summarize(df_service_components, fare_comps)

In [14]:
df_fare = df_fare.rename(
    columns={
        "service_components_component_name": "service_components_fare_component",
        "service_components_product_name": "service_components_fare_product",
    }
)

In [15]:
# Subset out for CAD/AVL
cad_avl_comps = [
    "AVL Software",
    "Location Sensors",
    "AVL On-board Computer",
]

In [16]:
# DF for cad/avl
df_cad_avl = utils.service_comps_summarize(df_service_components, cad_avl_comps)

In [17]:
df_cad_avl = df_cad_avl.rename(
    columns={
        "service_components_component_name": "service_components_AVL_CAD_component",
        "service_components_product_name": "service_components_AVL_CAD_product",
    }
)

In [18]:
df_cad_avl["service_components_service_name"].nunique(), df_fare[
    "service_components_service_name"
].nunique()

(111, 132)

### Merge Fare Components

In [19]:
# Merge transit stacks with BC
m1 = pd.merge(
    df_bc,
    df_fare,
    how="left",
    left_on=["organization_name"],
    right_on=["service_components_service_name"],
    indicator=True,
)

In [20]:
m1.shape

(177, 6)

In [21]:
len(m1)

177

In [22]:
m1 = m1.drop(columns=["_merge", "service_components_service_name"])

### Merge AVL/CAD Components 

In [23]:
# Merge transit stacks with BC
m1 = pd.merge(
    m1,
    df_cad_avl,
    how="left",
    left_on=["organization_name"],
    right_on=["service_components_service_name"],
)

In [24]:
m1 = m1.drop(columns=["service_components_service_name"])

In [25]:
m1.sample(6)

Unnamed: 0,5311_5310_overlap,organization_name,service_components_fare_component,service_components_fare_product,service_components_AVL_CAD_component,service_components_AVL_CAD_product
145,5310 only,Howard Prep,,,,
91,5310 only,Angel View,,,,
16,Both 5311 and 5310,Monterey-Salinas Transit,"Cash Farebox,Payment processor,Payment processor","Genfare Farebox (Unspecified),Transax,authorize.net",,
157,5310 only,OmniTrans,"Cash Farebox,Mobile ticketing,Payment processor,Ticket Vending Machines","Genfare Farebox (Unspecified),Token Transit Mobile Ticketing,FIS,GenFare TVM (Unspecified)",,
104,5310 only,Easy Lift Transportation,,,,
92,5310 only,ARC Imperial Valley,,,,


## Airtable - Fare Systems 
* Using CSV for now

In [26]:
df_fare = to_snakecase(
    pd.read_csv(
        "gs://calitp-analytics-data/data-analyses/5311-5310/fare systems-Grid view.csv"
    )
)

In [27]:
"""
%%sql -o df_fare
SELECT 
  service_name,
  product_name,
  component_name
FROM cal-itp-data-infra.mart_transit_database.dim_service_components
"""

'\n%%sql -o df_fare\nSELECT \n  service_name,\n  product_name,\n  component_name\nFROM cal-itp-data-infra.mart_transit_database.dim_service_components\n'

In [28]:
# Only grab columns I'm interested in
cols_wanted = [
    "fare_system",
    "electronic_fare_program",
    "payment_accepted",
    "ticket_pass_sales_methods",
    "ticket_media",
    "ticket_validation",
]

In [29]:
# Subset dataframe
df_fare2 = df_fare[cols_wanted]

In [30]:
# Keep only the rows with at least 2 non-NA values.
df_fare3 = df_fare2.dropna(thresh=2)

In [31]:
df_fare2.shape, df_fare3.shape

((394, 6), (189, 6))

In [32]:
# Add prefix
df_fare3 = df_fare3.add_prefix("fare_systems_")

In [33]:
# Clean organization name
df_fare3 = utils.organization_cleaning(df_fare3, "fare_systems_fare_system")

In [34]:
df_fare3.sample(3)

Unnamed: 0,fare_systems_fare_system,fare_systems_electronic_fare_program,fare_systems_payment_accepted,fare_systems_ticket_pass_sales_methods,fare_systems_ticket_media,fare_systems_ticket_validation
311,Baldwin Park Transit,TAPcard,"cash, check","onboardbus, online, vendor, phone, kiosk","paper, TAPcard",
389,Amador Regional Transit System,,cash,,,
28,Lawndale Beat,,cash,onboardbus,customcard,


In [35]:
# Merge faresystems with m1
m2 = pd.merge(
    m1,
    df_fare3,
    how="left",
    left_on=["organization_name"],
    right_on=["fare_systems_fare_system"],
    indicator=True,
)

In [36]:
m2["_merge"].value_counts()

left_only     145
both           32
right_only      0
Name: _merge, dtype: int64

In [37]:
# left =  m2.loc[m2['_merge'] == 'left_only']
# left['organization_name'].sort_values().unique().tolist()

In [38]:
# Drop unwanted cols
m2 = m2.drop(columns=["fare_systems_fare_system", "_merge"])

In [39]:
len(m2)

177

## Airtable - GTFS Status

In [40]:
%%sql -o df_orgs
SELECT 
  CAST(itp_id AS INT) AS itp_id,
  name,
  gtfs_realtime_status
FROM cal-itp-data-infra.mart_transit_database.dim_organizations

Unnamed: 0,itp_id,name,gtfs_realtime_status
0,,Trinity County Transportation Commission,RT Incomplete
1,385.0,San Bernardino County Transportation Authority,RT Incomplete
2,,Mono County Local Transportation Commission,RT Incomplete
3,,"American Logistics Company, LLC",RT Incomplete
4,,Westminster on Wheels Senior Transportation Program,RT Incomplete
5,,Humboldt County Association of Governments,RT Incomplete
6,,Castle Rock Associates,RT Incomplete
7,,ACE Parking,RT Incomplete
8,,Vivalon Inc.,RT Incomplete
9,242.0,Paratransit Inc.,RT Incomplete


In [41]:
# Clean up org names
df_orgs = utils.organization_cleaning(df_orgs, "name")

In [42]:
# Add prefix
df_orgs = df_orgs.add_prefix("airtable_orgs_")

In [43]:
# Clean up names to match m2
df_orgs["airtable_orgs_name"] = df_orgs["airtable_orgs_name"].replace(
    {
        "Butte County Association of Governments": "Butte County Association of Governments Butte Regional Transit",
        "City of Corcoran": "City of Corcoran - Corcoran Area Transit",
        "City of Lafayette": "City of Lafayette: Lamorinda Sprit Van Program",
        "Sonoma County": "County of Sonoma",
        "Siskiyou County": "County of Siskiyou",
        "Tulare County": "Tulare County Regional Transportation Agency",
        "Nevada County": "County of Nevada Public Works",
        "Shasta County": "County of Shasta Department of Public Works",
        "El Dorado County Transit Authority": "El Dorado Transit",
        "Klamath Trinity Non-Emergency Transportation": "Klamath Trinity Non-Emergency Transportation\u200b",
        "Livermore  Amador\n  Valley Transit Authority": "Livermore Amador Valley Transit Authority",
        "Marin County Transit District": "Marin Transit",
        "Modoc Transportation Authority": "Modoc Transportation Agency",
        "On Lok": "On Lok Senior Health Services",
        "Placer County": "Placer County Public Works ",
        "Stanislaus County": "Stanislaus County Public Works - Transit Division",
        "Tehama County": "Tehama County Transit Agency",
        "Tuolumne County Transit Agency": "Tuolumne County Transit",
        "Victor Valley Transit Authority": "Victor Valley Transit",
        "Vivalon Inc.": "Vivalon",
        "Yurok Tribe": "Yurok Tribe Transit",
    }
)

In [44]:
# df_orgs.sort_values('airtable_orgs_name')

In [45]:
# Merge faresystems with m1
m3 = pd.merge(
    m2,
    df_orgs,
    how="left",
    left_on=["organization_name"],
    right_on=["airtable_orgs_name"],
    indicator=True,
)

In [46]:
m3["_merge"].value_counts()

both          120
left_only      57
right_only      0
Name: _merge, dtype: int64

In [47]:
# left = m3.loc[m3["_merge"] == "left_only"]
# left["organization_name"].sort_values().unique().tolist()

In [48]:
# df_orgs['airtable_orgs_name'].sort_values().unique().tolist()

In [49]:
# Drop unwanted cols
m3 = m3.drop(columns=["airtable_orgs_name", "_merge"])

In [53]:
m3.shape

(177, 13)

## Export

In [50]:
columns_for_na = [
    "service_components_fare_component",
    "service_components_fare_product",
    "service_components_AVL_CAD_component",
    "service_components_AVL_CAD_product",
    "fare_systems_electronic_fare_program",
    "fare_systems_payment_accepted",
    "fare_systems_ticket_pass_sales_methods",
    "fare_systems_ticket_media",
    "fare_systems_ticket_validation",
    "airtable_orgs_itp_id",
    "airtable_orgs_gtfs_realtime_status",
]

In [51]:
m3[columns_for_na] = m3[columns_for_na].fillna("N/A")

In [52]:
m3 = utils.clean_up_columns(m3)

In [54]:
# exporting 2 dataframes into a single workbook
with pd.ExcelWriter(
    "gs://calitp-analytics-data/data-analyses/5311-5310/5311_5310_payments.xlsx"
) as writer:
    m3.to_excel(writer, sheet_name="main", index=True)