# Consolidated application data

In [1]:
import re as re

import numpy as np
import pandas as pd

pd.options.display.max_columns = 50
pd.options.display.max_rows = 250
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:.2f}".format

from itertools import chain

from calitp import *
from siuba import *

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/consolidated_applications/"

In [2]:
data = to_snakecase(
    pd.read_excel(f"{GCS_FILE_PATH}Copy of Application_Review_Report_5_2_2022.xls")
)

In [3]:
data.shape

(346, 24)

In [4]:
data.isna().sum()

organization_name                            0
district                                     6
application_name                             0
year                                         0
application_status                           0
project_upin                                 0
project_category                             0
project_line_item__ali_                      0
project_description                          0
is_stimulus                                  0
consolidated_application                     0
total_expenses                               0
_5311_funds                                  7
_5311_f__funds                               7
_5311_cmaq_funds                             7
_5339_funds                                  7
federal_total                                0
other_fed_funds_total                        7
lctop__state__funds                          7
sb1__state_of_good_repair__state__funds      7
transit_development_act__state__funds        7
other_state_f

In [5]:
data.dtypes

organization_name                           object
district                                   float64
application_name                            object
year                                         int64
application_status                          object
project_upin                                object
project_category                            object
project_line_item__ali_                     object
project_description                         object
is_stimulus                                 object
consolidated_application                    object
total_expenses                               int64
_5311_funds                                float64
_5311_f__funds                             float64
_5311_cmaq_funds                           float64
_5339_funds                                float64
federal_total                                int64
other_fed_funds_total                      float64
lctop__state__funds                        float64
sb1__state_of_good_repair__stat

In [17]:
data = data.drop(columns=["application_name","consolidated_application"])

KeyError: "['application_name'] not found in axis"

## Organizations

In [7]:
# Remove any acronyms
data["organization_name"] = data["organization_name"].str.replace(
    "\s+\(.*$", "", regex=True
)

In [8]:
# Replace Ventura, read in weirdly
data["organization_name"] = data["organization_name"].replace(
    {
        "Ventura County Transportation Commission\xa0": "Ventura County Transportation Commission"
    }
)

In [9]:
# Looking to make sure there aren't any duplicates.
data.organization_name.unique()

array(['Alameda-Contra Costa Transit District', 'Amador Transit',
       'Butte County Association of Governments/ Butte Regional Transit',
       'Calaveras Transit Agency ', 'City of Arcata', 'City of Arvin',
       'City of Auburn', 'City of Banning', 'City of Beaumont',
       'City of California City', 'City of Chowchilla ', 'City of Clovis',
       'City of Corcoran - Corcoran Area Transit', 'City of Davis',
       'City of Dixon', 'City of Escalon ', 'City of Fairfield',
       'City of Fresno', 'City of Guadalupe', 'City of Los Angeles DOT',
       'City of Manteca', 'City of McFarland', 'City of Needles',
       'City of Ojai', 'City of Ridgecrest', 'City of Rio Vista',
       'City of Roseville', 'City of San Luis Obispo Transit',
       'City of Santa Rosa', 'City of Shafter', 'City of Solvang',
       'City of Taft', 'City of Tehachapi', 'City of Tracy',
       'City of Union City', 'City of Visalia', 'City of Wasco',
       'Colusa County Transit Agency', 'County Connectio

## Monetary Columns
* This column represents the different types of local funding a project can receive.
* Extract everything after the colons. 

In [10]:
data['local_total'] =  data['local_total'].str.split(': ').str[-1]

In [11]:
data['local_total'] = data['local_total'].str.replace(',', '', regex = True).str.replace('$', '', regex= True).fillna(0).astype('float')

In [13]:
data.loc[data["organization_name"] == "Eastern Sierra Transit Authority"]

Unnamed: 0,organization_name,district,year,application_status,project_upin,project_category,project_line_item__ali_,project_description,is_stimulus,consolidated_application,total_expenses,_5311_funds,_5311_f__funds,_5311_cmaq_funds,_5339_funds,federal_total,other_fed_funds_total,lctop__state__funds,sb1__state_of_good_repair__state__funds,transit_development_act__state__funds,other_state_funds,state_total,local_total
109,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003734,OP,300901,Operating Assistance-LCTOP Project 1,No,Yes,75566,0.0,0.0,0.0,0.0,0,0.0,59570.0,15996.0,0.0,0.0,75566.0,0.0
110,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003735,OP,300901,Operating Assistance-LCTOP Project 2,No,Yes,20474,0.0,0.0,0.0,0.0,0,0.0,20474.0,0.0,0.0,0.0,20474.0,0.0
111,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003737,CA,111215,Purchase Replacement Van LCTOP Project 3,No,Yes,45209,0.0,0.0,0.0,0.0,0,0.0,45209.0,0.0,0.0,0.0,45209.0,0.0
112,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003825,OP,300902,Operating Assistance Sliding Scale- 5311 FFY2022 Mono,No,Yes,209740,116049.0,0.0,0.0,0.0,116049,0.0,0.0,0.0,0.0,0.0,0.0,93691.0
113,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003826,OP,300902,Operating Assistance Sliding Scale- 5311 FFY 2023 Mono,No,Yes,213935,118370.0,0.0,0.0,0.0,118370,0.0,0.0,0.0,0.0,0.0,0.0,95565.0
114,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003827,OP,300902,Operating Assistance Sliding Scale-5311 FFY2022 Inyo,No,Yes,273893,151545.0,0.0,0.0,0.0,151545,0.0,0.0,0.0,0.0,0.0,0.0,122348.0
115,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003828,OP,300902,Operating Assistance Sliding Scale- 5311 FFY2023 Inyo,No,Yes,279369,154575.0,0.0,0.0,0.0,154575,0.0,0.0,0.0,0.0,0.0,0.0,124794.0
116,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003889,OP,300902,Operating Assistance Sliding Scale 5311(F) Lancaster FY2022,No,Yes,336838,0.0,186372.0,0.0,0.0,186372,0.0,0.0,0.0,126466.0,24000.0,150466.0,0.0
117,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003890,OP,300902,Operating Assistance Sliding Scale 5311(F) Reno FY2022,No,Yes,289782,0.0,160336.0,0.0,0.0,160336,0.0,0.0,0.0,129446.0,0.0,129446.0,0.0
118,Eastern Sierra Transit Authority,9.0,2022,Submitted,BCG0003972,CA,111203,Purchase Replacement Std 30-34 ft Bus 5339 FY2022,No,Yes,289757,0.0,0.0,0.0,246293.0,246293,0.0,0.0,0.0,0.0,0.0,0.0,43464.0


## Projects
* Organize all the different project types to broader categories.

In [14]:
data["project_description"] = data.project_description.str.lower()

In [15]:
data.project_description.nunique()

206

In [16]:
# Didn't Work
        Replacement = ['purchase','replacement']
        Operating_Assistance = ['assistance', 'operating','operational']
        Preventive = ['maintenance','preventive','preventive maintenance']
        Security = ['surveillance/security','surveillance','security']
        Replacing_Vehicles = ['automobiles (service)','replacement']
        Expansion = ['expanded', 'extension']
        Increase_Ridership = ['free fare','free fares','ridership','recovery','planning']
        Construction = ['construction','shelters', 'bus stop improvement']
        Planning = ['transit planning','planning']

IndentationError: unexpected indent (682450171.py, line 2)