# 5311 Rural Areas First Look
* Using data from Black Cat
* Downloaded all records from projects file off of Black Cat

### Clean up

In [None]:
import pandas as pd
import numpy as np
#import shared_utils

In [None]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/5311 /"
FILE_NAME = "Grant_Projects.xlsx"
df_5311 = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME}")

In [None]:
f'There are {len(df_5311)} rows without any filtering'

In [None]:
#clean column names
df_5311.columns = df_5311.columns.str.replace('\n', ' ').str.replace(' ', '_')

In [None]:
#drop any potential duplicates
df_5311.drop_duplicates(inplace=True)

### Filtering out only for 5311 programs

In [None]:
df_5311['Funding_Program'].unique()

In [None]:
subset = ['Section 5311', '5311(f) Cont','5339 (National)', 'Section 5311(f)',  '5311(f) Round 2']

In [None]:
df_5311 = df_5311[df_5311.Funding_Program.isin(subset)]

In [None]:
f'There are {len(df_5311)} rows after filtering for only 5311'

In [None]:
df_5311.columns

In [None]:
df_5311 = df_5311.drop(columns = ['Project_Closed_By', 'Project_Closed_Date', 'Project_Closed_Time'])

In [None]:
#Convert to CSV
df_5311.to_csv("./5311_bus.csv", index = False) 

### Find only projects with bus replacement

In [None]:
#can see  bus is typed as BUS or Bus
df_5311.Description.unique().tolist()

In [None]:
df_5311_bus = df_5311[(df_5311.Description.str.contains("Bus", case= False))]

In [None]:
f'There are {len(df_5311_bus)} rows after filtering for only 5311 & descriptions that contain keyword bus'

In [None]:
#Checking nas
df_5311_bus.isna().sum()

In [None]:
#drop columns with all NAs aka project closed by, closed date, and closed time
df_5311_bus = df_5311_bus.drop(columns=['Project_Closed_By', 'Project_Closed_Date', 'Project_Closed_Time'])

### Finding unique agencies/organizations

In [None]:
unique_agencies = df_5311_bus['Organization_Name'].unique().tolist()

### So I thought each record/grant recipient would have their own unique contract number but apparently not...Weird

In [None]:
print(df_5311_bus['Contract_Number'].nunique())

### Looking at some measures

In [None]:
#Look at count of funding program (lots of different ways to call  the same program)
df_5311_bus.Funding_Program.value_counts() 

In [None]:
#preview 
df_5311_bus.head(2)

#### Looking at data by year
* 2018 had the most projects followed by 2019 then 2021
* 2019 registered the largest amount of allocated money. 

In [None]:
year = df_5311_bus[['Grant_Fiscal_Year',"AllocationAmount",'Encumbered_Amount','ExpendedAmount', 'ActiveBalance','Organization_Name',]].groupby(['Grant_Fiscal_Year']).agg({"AllocationAmount": 'sum', 
'Encumbered_Amount': 'sum','ExpendedAmount': 'sum', 'ActiveBalance':'sum', 'Organization_Name':'count',})
year = year.rename(columns = {'Organization_Name': 'Number_of_Award_Recipients'})

In [None]:
year

### Looking at stuff by description, really annoying how the same category like "buy <30 ft bus for expansion' is split off
* Looks like most projects (58) are for Purchase Replacement < 30 Ft Bus	

In [None]:
description = df_5311_bus[["AllocationAmount",'Encumbered_Amount','ExpendedAmount', 'ActiveBalance','Description',]].groupby(['Description']).agg({"AllocationAmount": 'sum', 
'Encumbered_Amount': 'sum','ExpendedAmount': 'sum', 'ActiveBalance':'sum','Description': 'count'})
description = description.rename(columns = {'Description': 'Project_Count'})
description.sort_values(by='Project_Count', inplace=True, ascending=False)

In [None]:
description

### Look at the different organizations & the grant money they have received over the years
* Yuba-Sutter Transit Authority has received the most grants by agency

In [None]:
df_5311_bus.Organization_Name.value_counts()

In [None]:
organization = df_5311_bus[['Grant_Fiscal_Year',"AllocationAmount",'Encumbered_Amount','ExpendedAmount', 'ActiveBalance','Organization_Name',]].groupby(['Organization_Name','Grant_Fiscal_Year']).agg({"AllocationAmount": 'sum', 
'Encumbered_Amount': 'sum','ExpendedAmount': 'sum', 'ActiveBalance':'sum'})

### Once we summarize the organization by year, it becomes more condensed

In [None]:
organization