# Grants Overview
***
* [Cleaning up and checking the data entered.](https://docs.google.com/spreadsheets/d/12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4/edit#gid=0)

In [28]:
import pandas as pd
import numpy as np
from siuba import *
from calitp import *
import altair as alt
import shared_utils 
from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.set_option("display.max_colwidth", None)
from IPython.display import HTML, Image, Markdown, display, display_html

In [2]:
sheet_id = "12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4"
sheet_name = "current_grants"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
#https://docs.google.com/spreadsheets/d/12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4/edit#gid=0
df = to_snakecase(pd.read_csv(url)) 

In [3]:

#df = df.iloc[:,1:25]

In [4]:
#Fill in missing values for eligiblity that are null with 0 
eligiblity = ['eligibility:_transit',
       'eligibility:_municipality', 'eligibility:_school_district',
       'eligibility:_mpo', 'eligibility:_county_regional_govt__not_mpo',
       'eligibility:_other_local_entity__non_municipality_',
       'eligibility:_state_public_lands', 'eligibility:_federal_public_lands',
       'eligibility:_tribal_government', 'eligibility:_non_profit',
       'eligibility:_state_department_', 'eligibility:_for_profit_companies'] 

In [5]:
df[eligiblity] = df[eligiblity].fillna(0)

In [6]:
df['grant_program'].nunique()

75

In [44]:
df.columns

Index(['grant_#', 'division', 'grant_program', 'notice_of_funding',
       'fy_due_date', 'months_to_apply__c_d_', 'fy_cycle',
       'funding_amount_available', 'funding_amount_flags',
       'agency_eligibility', 'grant_type', 'federal_or_state_funded',
       'eligibility_restrictions', 'current__in_this_fy__or_expired',
       'eligibility:_transit', 'eligibility:_municipality',
       'eligibility:_school_district', 'eligibility:_mpo',
       'eligibility:_county_regional_govt__not_mpo',
       'eligibility:_other_local_entity__non_municipality_',
       'eligibility:_state_public_lands', 'eligibility:_federal_public_lands',
       'eligibility:_tribal_government', 'eligibility:_non_profit',
       'eligibility:_state_department_', 'eligibility:_for_profit_companies',
       'unnamed:_26', 'unnamed:_27', 'unnamed:_28', 'unnamed:_29',
       'unnamed:_30', 'unnamed:_31', 'unnamed:_32', 'unnamed:_33',
       'unnamed:_34', 'unnamed:_35', 'unnamed:_36', 'unnamed:_37',
       'total_e

In [7]:
df.iloc[:,0:10].sample(3)

Unnamed: 0,grant_#,division,grant_program,notice_of_funding,fy_due_date,months_to_apply__c_d_,fy_cycle,funding_amount_available,funding_amount_flags,agency_eligibility
35,36,Federal Highway Administration (DOT),Bridge Investment Program,"June 10, 2022","September 8, 2022",2,1 Year,2500000000,No Flags,•State\r\n •MPO (w/ pop. >200K)\r\n• Local government\r\n• Special purpose district or public authority with a transportation\r\nfunction\r\n• Federal land management agency\r\n• Tribal government
56,58,Federal Transit Administration (DOT),Public Transportation on Indian Reservations Formula,Formula funds are apportioned after appropriations are received.,Missing,Missing,Year of Apportionment plus two years\r,183250437,No Flags,Federally recognized Indian Tribes and Alaska Native Villages\n
72,74,Federal Highway Administration (DOT),Strategic Innovation for Revenue Collection\r\n(Set-aside),Missing,Missing,Missing,Available until expended\r,15000000,No Flags,"States, Metropolitan Planning Organizations, and Local governments"


## Looking at value counts to ensure the entered data makes sense
***

In [8]:
value_count_cols = ['division', 
       'months_to_apply__c_d_', 'fy_cycle','grant_type', 'federal_or_state_funded','funding_amount_flags']

In [9]:
#https://stackoverflow.com/questions/32589829/how-to-get-value-counts-for-multiple-columns-at-once-in-pandas-dataframe
for column in df[value_count_cols]:
     print("\n" + column)
     print(df[column].value_counts())


division
Federal Transit Administration (DOT)     18
Federal Highway Administration (DOT)     17
DLA                                      13
SB1                                       8
DRMT                                      8
Office of the Secretary (DOT)             6
SB1/DLA                                   2
Caltrans - Unknown Division               1
Clean CA                                  1
DLA                                       1
Federal Railroad Administration (DOT)     1
Name: division, dtype: int64

months_to_apply__c_d_
Missing    55
1           8
3           6
2           5
4           1
5           1
Name: months_to_apply__c_d_, dtype: int64

fy_cycle
1 Year                                                                                                                                      22
Available until expended                                                                                                                     7
2 Years                         

## Analyze how many grants x entity
***

In [10]:
eligibility = df[eligiblity].sum().sort_values().to_frame().reset_index().rename(columns = {'index':'Entity',0:'# of Grants'}).replace(r"_", " ", regex=True)

In [11]:
eligibility['Entity'] = eligibility['Entity'].replace(r"eligibility:", "", regex= True).str.title()

In [34]:
eligibility.style.hide(axis="index").set_properties(**{"background-color": "white"}).set_table_styles([dict(selector="th", props=[("text-align", "center")])]
).set_properties(**{"text-align": "center"})

Entity,# of Grants
For Profit Companies,2.0
School District,3.0
Federal Public Lands,4.0
State Public Lands,6.0
Non Profit,14.0
Transit,20.0
Other Local Entity Non Municipality,23.0
Mpo,32.0
Municipality,37.0
State Department,38.0


## Sums
***

In [50]:
df['funding_amount_available'].sum()

52515005298

In [51]:
sum_df = df.groupby('funding_amount_flags').agg({'funding_amount_available':'sum'})

In [52]:
sum_df["funding_amount_available"] = (
    "$"
    + (sum_df["funding_amount_available"].astype(float) / 1000000000)
    .round()
    .astype(str)
    + "B"
)

In [53]:
sum_df.reset_index()

Unnamed: 0,funding_amount_flags,funding_amount_available
0,Conflicting Amounts,$6.0B
1,Draft,$1.0B
2,Lump sum across all years,$1.0B
3,Nationwide,$12.0B
4,No Flags,$33.0B


In [54]:
fed_state = df.groupby('federal_or_state_funded').agg({'funding_amount_available':'sum'})
fed_state["funding_amount_available"] = (
    "$"
    + (fed_state["funding_amount_available"].astype(float) / 1000000000)
    .round()
    .astype(str)
    + "B"
)

In [55]:
fed_state

Unnamed: 0_level_0,funding_amount_available
federal_or_state_funded,Unnamed: 1_level_1
Both,$6.0B
Federal,$43.0B
Missing,$0.0B
State,$3.0B


## How much 'competition' for grants?
***
* Count number of eligible applicants per grant. 
* Rough draft.

In [56]:
#df["agency_eligibility"]  = df["agency_eligibility"].replace(r"•", ",", regex= True).replace(r"and", ",", regex= True)

In [57]:
df["agency_eligibility"]  = df["agency_eligibility"].replace(r"(•|and|;)", ",", regex= True)

In [58]:
df["total_eligible_applicants"] = (
    df["agency_eligibility"]
    .str.split(",+")
    .str.len()
    .groupby(df.grant_program)
    .transform("sum")
)

In [59]:
df[['agency_eligibility','total_eligible_applicants']].sort_values('total_eligible_applicants').sample(3)

Unnamed: 0,agency_eligibility,total_eligible_applicants
8,"Entities that operate fixed route bus service or that allocate funding to fixed route bus operators, , State or local governmental entities that operate fixed ro ute bus service that are eligible to receive direct grants under 5307 , 5311.",4
5,"States or local government authorities, private non-profit organizations, or operators of public transportation that receive a grant indirectly through a recipient.",3
65,"(A) A metropolitan planning organization, (B) a political subdivision of a\r\nState, (C) a federally recognized Tribal government, , (D) a multijurisdictional group\r\nof entities described in any of subparagraphs (A) through (C).",5


In [60]:
df['total_eligible_applicants'].value_counts()

3     14
1     14
2     12
4     10
5      6
8      5
7      3
11     3
10     2
9      2
6      1
16     1
12     1
22     1
23     1
Name: total_eligible_applicants, dtype: int64

In [61]:
df['total_eligible_applicants'].median()

3.0

In [62]:
df['total_eligible_applicants'].mean()

4.75