# 5311 Rural Areas First Look
* Using data from Black Cat
* Downloaded all records from projects file off of Black Cat

### Clean up

In [30]:
import pandas as pd
import numpy as np
#import shared_utils

In [31]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/5311 /"
FILE_NAME = "Grant_Projects.xlsx"
df_5311 = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME}")

In [32]:
f'There are {len(df_5311)} rows without any filtering'

'There are 2520 rows without any filtering'

In [33]:
#clean column names
df_5311.columns = df_5311.columns.str.replace('\n', ' ').str.replace(' ', '_')

In [34]:
#drop any potential duplicates
df_5311.drop_duplicates(inplace=True)

### Filtering out only for 5311 programs

In [35]:
df_5311['Funding_Program'].unique()

array(['Section 5311', '5310 Exp', '5310 Trad', '5311(f) Cont',
       '5339 (National)', '5339 (State)', 'CMAQ (FTA 5311)',
       'Section 5311(f)', 'Toll Credits', '5311(f) Round 2', 'CARES Act',
       'CARES Act (F)', 'ARPA', 'CRRSAA'], dtype=object)

In [36]:
subset = ['Section 5311', '5311(f) Cont','CMAQ (FTA 5311)', 'Section 5311(f)',  '5311(f) Round 2']

In [37]:
df_5311 = df_5311[df_5311.Funding_Program.isin(subset)]

In [38]:
f'There are {len(df_5311)} rows after filtering for only 5311'

'There are 798 rows after filtering for only 5311'

In [39]:
df_5311.columns

Index(['Grant_Fiscal_Year', 'Funding_Program', 'Grant_Number', 'Project_Year',
       'Organization_Name', 'UPIN', 'Description', 'ALI', 'Contract_Number',
       'AllocationAmount', 'Encumbered_Amount', 'ExpendedAmount',
       'ActiveBalance', 'ClosedOutBalance', 'Project_Status',
       'Project_Closed_By', 'Project_Closed_Date', 'Project_Closed_Time'],
      dtype='object')

In [None]:
f'There are {df_5311.Organization_Name.nunique()} total unique agencies in 5311'

In [40]:
df_5311 = df_5311.drop(columns = ['Project_Closed_By', 'Project_Closed_Date', 'Project_Closed_Time'])

In [41]:
#Convert to CSV
df_5311.to_csv("./5311_bus.csv", index = False) 

### Find only projects with bus replacement

In [42]:
#can see  bus is typed as BUS or Bus
df_5311.Description.unique().tolist()

['Operating Assistance',
 'Buy <30-Ft Bus For Expansion',
 'Purchase Replacement Van',
 'Purchase Expansion <30ft Bus',
 'Purchase Replacement Std 40 Ft Bus ',
 'Operating Assistance Sliding Scale',
 'Operating Assistance ',
 'Purchase Replacement < 30 Ft Bus',
 'Operating Assistance - Dial A Ride',
 'Operating Assistance Sliding Scale ',
 'Purchase Misc Communications Equip ',
 'FY 15/16 Section 5311 Operating Assistance (Federal Share 55.33%)',
 'Preventive Maintenance ',
 'Operating Assistance 5311 Dial-A-Ride',
 'Operating Assistance for Sacramento County rural Transit Program',
 'Operating Assistance Inyo',
 'Operating Assistance Mono',
 'Buy <30-Ft Bus For Expansion ',
 'Operating Assistance FTA 5311',
 'Rural Operating Assistance ',
 'Construction - Bus Shelters ',
 'Purchase Replacement Van ',
 'Operating Assistance for Tuolumne County Transit',
 'Rural Operating Assistance',
 'Alpine County Operating Assistance',
 'Purchase Replacement < 30 Ft Bus ',
 'FTA FY16/17 Section 5311

In [43]:
df_5311_bus = df_5311[(df_5311.Description.str.contains("Bus", case= False))]

In [44]:
f'There are {len(df_5311_bus)} rows after filtering for only 5311 & descriptions that contain keyword bus'

'There are 115 rows after filtering for only 5311 & descriptions that contain keyword bus'

In [45]:
#Checking nas
df_5311_bus.isna().sum()

Grant_Fiscal_Year     0
Funding_Program       0
Grant_Number          0
Project_Year          0
Organization_Name     0
UPIN                  0
Description           0
ALI                   0
Contract_Number      20
AllocationAmount      0
Encumbered_Amount     0
ExpendedAmount        0
ActiveBalance         0
ClosedOutBalance      0
Project_Status        0
dtype: int64

In [46]:
#drop columns with all NAs aka project closed by, closed date, and closed time
#df_5311_bus = df_5311_bus.drop(columns=['Project_Closed_By', 'Project_Closed_Date', 'Project_Closed_Time'])

### Finding unique agencies/organizations

In [47]:
unique_agencies = df_5311_bus['Organization_Name'].unique().tolist()

In [48]:
f'There are {len(unique_agencies)} total unique agencies that received 5311 funding & have buses in description'

'There are 24 total unique agencies that received 5311 funding & have buses in description'

### So I thought each record/grant recipient would have their own unique contract number but apparently not...Weird

In [49]:
print(df_5311_bus['Contract_Number'].nunique())

35


### Looking at some measures

In [50]:
#Look at count of funding program (lots of different ways to call  the same program)
df_5311_bus.Funding_Program.value_counts() 

Section 5311       77
CMAQ (FTA 5311)    30
Section 5311(f)     8
Name: Funding_Program, dtype: int64

In [51]:
#preview 
df_5311_bus.head(2)

Unnamed: 0,Grant_Fiscal_Year,Funding_Program,Grant_Number,Project_Year,Organization_Name,UPIN,Description,ALI,Contract_Number,AllocationAmount,Encumbered_Amount,ExpendedAmount,ActiveBalance,ClosedOutBalance,Project_Status
1,2011,Section 5311,CA-18-X047 | 0012000083,2016,Madera County,BCG0000283,Buy <30-Ft Bus For Expansion,111304,64BC17-00408,110663.0,110663.0,101352.02,9310.98,0,Open
4,2012,Section 5311,CA-18-X052 | 0012000304,2016,Madera County,BCG0000286,Purchase Expansion <30ft Bus,111304,64BC17-00480,22925.0,113319.0,22655.51,269.49,0,Open


#### Looking at data by year
* 2018 had the most projects followed by 2019 then 2021
* 2019 registered the largest amount of allocated money. 

In [52]:
year = df_5311_bus[['Grant_Fiscal_Year',"AllocationAmount",'Encumbered_Amount','ExpendedAmount', 'ActiveBalance','Organization_Name',]].groupby(['Grant_Fiscal_Year']).agg({"AllocationAmount": 'sum', 
'Encumbered_Amount': 'sum','ExpendedAmount': 'sum', 'ActiveBalance':'sum', 'Organization_Name':'count',})
year = year.rename(columns = {'Organization_Name': 'Number_of_Award_Recipients'})

In [53]:
year

Unnamed: 0_level_0,AllocationAmount,Encumbered_Amount,ExpendedAmount,ActiveBalance,Number_of_Award_Recipients
Grant_Fiscal_Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011,110663.0,110663.0,101352.02,9310.98,1
2012,22925.0,113319.0,22655.51,269.49,1
2013,567790.0,981571.0,353046.51,214743.49,3
2014,556025.0,1159788.0,365484.39,190540.61,3
2015,126367.0,526572.0,0.0,126367.0,1
2016,951857.0,1272354.0,643057.62,308799.38,13
2017,3535960.0,4475890.0,3846724.46,-310764.46,32
2018,1259901.0,2246713.0,773442.0,486459.0,19
2019,1146142.28,1499806.28,132196.0,1013946.28,9
2020,1611958.0,2125602.0,0.0,1611958.0,18


### Looking at stuff by description, really annoying how the same category like "buy <30 ft bus for expansion' is split off
* Looks like most projects (58) are for Purchase Replacement < 30 Ft Bus	

In [54]:
description = df_5311_bus[["AllocationAmount",'Encumbered_Amount','ExpendedAmount', 'ActiveBalance','Description',]].groupby(['Description']).agg({"AllocationAmount": 'sum', 
'Encumbered_Amount': 'sum','ExpendedAmount': 'sum', 'ActiveBalance':'sum','Description': 'count'})
description = description.rename(columns = {'Description': 'Project_Count'})
description.sort_values(by='Project_Count', inplace=True, ascending=False)

In [55]:
description

Unnamed: 0_level_0,AllocationAmount,Encumbered_Amount,ExpendedAmount,ActiveBalance,Project_Count
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Purchase Replacement < 30 Ft Bus,6903366.28,7396775.28,2674189.02,4229177.26,64
Purchase Replacement < 30 Ft Bus,597693.0,1197693.0,582806.76,14886.24,18
Purchase Replacement Std 40 Ft Bus,1241883.0,2295027.0,436550.0,805333.0,6
Purchase Replacement Std 35 Ft Bus,712305.0,712305.0,712305.0,0.0,5
Purchase Expansion <30ft Bus,454999.0,909998.0,449650.41,5348.59,4
Purchase Replacement Std 35 Ft Bus,329901.0,434595.0,0.0,329901.0,3
Buy 35-Ft Bus For Expansion,295304.0,160726.0,147977.0,147327.0,2
Purchase Replacement Std 35-39 Ft Bus,354012.0,0.0,0.0,354012.0,2
Buy <30-Ft Bus For Expansion,346075.0,346075.0,346075.0,0.0,2
Construction - Bus Shelters,29061.0,29827.0,13576.9,15484.1,2


### Look at the different organizations & the grant money they have received over the years
* Yuba-Sutter Transit Authority has received the most grants by agency

In [56]:
df_5311_bus.Organization_Name.value_counts()

Yuba-Sutter Transit Authority                   22
Mountain Area Regional Transit Authority        21
Sonoma County Transit                           20
Humboldt Transit Authority                       7
County of Tulare                                 6
Madera County                                    5
Morongo Basin Transit Authority                  4
Palo Verde Valley Transit Agency                 3
Kings County Area Public Transit Agency          3
City of Rio Vista                                3
Monterey-Salinas Transit                         2
Amador Transit                                   2
Tehama County Transit Agency                     2
City of Woodlake                                 2
City of Dixon                                    2
Trinity County Department of Transportation      2
City of Ojai                                     2
Redwood Coast Transit Authority                  1
City of Dinuba                                   1
Modoc Transportation Agency    

In [57]:
organization = df_5311_bus[['Grant_Fiscal_Year',"AllocationAmount",'Encumbered_Amount','ExpendedAmount', 'ActiveBalance','Organization_Name',]].groupby(['Organization_Name','Grant_Fiscal_Year']).agg({"AllocationAmount": 'sum', 
'Encumbered_Amount': 'sum','ExpendedAmount': 'sum', 'ActiveBalance':'sum'})

### Once we summarize the organization by year, it becomes more condensed

In [58]:
organization

Unnamed: 0_level_0,Unnamed: 1_level_0,AllocationAmount,Encumbered_Amount,ExpendedAmount,ActiveBalance
Organization_Name,Grant_Fiscal_Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Amador Transit,2020,211130.0,211130.0,0.0,211130.0
City of Dinuba,2018,132000.0,132000.0,132000.0,0.0
City of Dixon,2021,166436.0,0.0,0.0,166436.0
City of Ojai,2014,291536.0,291536.0,291536.0,0.0
City of Ojai,2021,354907.0,0.0,0.0,354907.0
City of Rio Vista,2019,75000.0,75000.0,0.0,75000.0
City of Rio Vista,2021,128208.0,0.0,0.0,128208.0
City of Woodlake,2018,0.0,0.0,0.0,0.0
County of Tulare,2017,352349.0,352349.0,352349.0,0.0
County of Tulare,2021,958259.0,0.0,0.0,958259.0
