# Grants Overview
***
* [Cleaning up and checking the data entered.](https://docs.google.com/spreadsheets/d/12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4/edit#gid=0)

In [1]:
import pandas as pd
import numpy as np
from siuba import *
from calitp import *
import altair as alt
import shared_utils 
from shared_utils import altair_utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.set_option("display.max_colwidth", None)
from IPython.display import HTML, Image, Markdown, display, display_html



In [2]:
sheet_id = "12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4"
sheet_name = "current_grants"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
df = to_snakecase(pd.read_csv(url)) 

In [3]:
#Delete unnecessary columns
df = df.iloc[:,1:26]

In [4]:
#Fill in missing values for eligiblity that are null with 0 
eligiblity = ['eligibility:_transit',
       'eligibility:_municipality', 'eligibility:_school_district',
       'eligibility:_mpo', 'eligibility:_county_regional_govt__not_mpo',
       'eligibility:_other_local_entity__non_municipality_',
       'eligibility:_state_public_lands', 'eligibility:_federal_public_lands',
       'eligibility:_tribal_government', 'eligibility:_non_profit',
       'eligibility:_state_department_', 'eligibility:_for_profit_companies'] 

In [5]:
df[eligiblity] = df[eligiblity].fillna(0)

In [6]:
df['grant_program'].nunique()

75

## Looking at value counts to ensure the entered data makes sense
***

In [61]:
value_count_cols = ['division', 
       'weeks_to_apply__c_d_', 'fy_cycle','grant_type', 'federal_or_state_funded','funding_amount_flags', 'notice_of_funding']

In [62]:
#https://stackoverflow.com/questions/32589829/how-to-get-value-counts-for-multiple-columns-at-once-in-pandas-dataframe
for column in df[value_count_cols]:
     print("\n" + column)
     print(df[column].value_counts())


division
Federal Transit Administration (DOT)     18
Federal Highway Administration (DOT)     17
DLA                                      13
SB1                                       8
DRMT                                      8
Office of the Secretary (DOT)             6
SB1/DLA                                   2
Caltrans - Unknown Division               1
Clean CA                                  1
DLA                                       1
Federal Railroad Administration (DOT)     1
Name: division, dtype: int64

weeks_to_apply__c_d_
Missing    49
9           5
8           4
13          4
15          3
4           3
7           1
10          1
18          1
24          1
12          1
17          1
11          1
14          1
Name: weeks_to_apply__c_d_, dtype: int64

fy_cycle
1 Year                                                                                                                                      22
Available until expended                                         

## Analyze how many grants x entity
***

In [12]:
eligibility = df[eligiblity].sum().sort_values().to_frame().reset_index().rename(columns = {'index':'Entity',0:'# of Grants'}).replace(r"_", " ", regex=True)

In [13]:
eligibility['Entity'] = eligibility['Entity'].replace(r"eligibility:", "", regex= True).str.title()

In [14]:
eligibility.style.hide(axis="index").set_properties(**{"background-color": "white"}).set_table_styles([dict(selector="th", props=[("text-align", "center")])]
).set_properties(**{"text-align": "center"})

Entity,# of Grants
For Profit Companies,2.0
School District,3.0
Federal Public Lands,4.0
State Public Lands,6.0
Non Profit,14.0
Transit,20.0
Other Local Entity Non Municipality,23.0
Mpo,32.0
Municipality,37.0
State Department,38.0


## Sums
***

In [15]:
df['funding_amount_available'].sum()

52515005298

In [36]:
sum_df = df.groupby(['funding_amount_flags', 'division']).agg({'funding_amount_available':'sum'})

In [37]:
sum_df["funding_amount_available"] = (
    "$"
    + (sum_df["funding_amount_available"].astype(float) / 1000000000)
    .round()
    .astype(str)
    + "B"
)

In [38]:
sum_df.reset_index()

Unnamed: 0,funding_amount_flags,division,funding_amount_available
0,Conflicting Amounts,SB1,$6.0B
1,Draft,SB1,$1.0B
2,Lump sum across all years,Federal Highway Administration (DOT),$1.0B
3,Lump sum across all years,Federal Transit Administration (DOT),$0.0B
4,Nationwide,Caltrans - Unknown Division,$2.0B
5,Nationwide,DRMT,$10.0B
6,No Flags,Clean CA,$0.0B
7,No Flags,DLA,$1.0B
8,No Flags,DLA,$0.0B
9,No Flags,DRMT,$1.0B


In [39]:
fed_state = df.groupby('federal_or_state_funded').agg({'funding_amount_available':'sum'})
fed_state["funding_amount_available"] = (
    "$"
    + (fed_state["funding_amount_available"].astype(float) / 1000000000)
    .round()
    .astype(str)
    + "B"
)

In [40]:
fed_state

Unnamed: 0_level_0,funding_amount_available
federal_or_state_funded,Unnamed: 1_level_1
Both,$6.0B
Federal,$43.0B
Missing,$0.0B
State,$3.0B


## How much 'competition' for grants?
***
* Count number of eligible applicants per grant. 


In [41]:
#Subset dataframe 
df_elgibility = df[['grant_program', 'eligibility:_transit',
       'eligibility:_municipality', 'eligibility:_school_district',
       'eligibility:_mpo', 'eligibility:_county_regional_govt__not_mpo',
       'eligibility:_other_local_entity__non_municipality_',
       'eligibility:_state_public_lands', 'eligibility:_federal_public_lands',
       'eligibility:_tribal_government', 'eligibility:_non_profit',
       'eligibility:_state_department_', 'eligibility:_for_profit_companies']]

In [42]:
#Sum up eligibility 
df_elgibility['total_eligible_applicants'] = df_elgibility.iloc[:, 1:13].sum(axis=1)

#Drop old columns to simplify dataframe
df_elgibility = df_elgibility[['grant_program','total_eligible_applicants']] 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [52]:
df_elgibility.sample(3)

Unnamed: 0,grant_program,total_eligible_applicants
45,Tribal Transportation Program,1.0
39,All Stations Accessibility Program,2.0
53,Bus and Bus Facilities Competitive Grants,5.0


In [44]:
df_elgibility['total_eligible_applicants'].value_counts()

1.0     17
5.0     16
2.0     13
4.0      8
3.0      8
6.0      5
8.0      3
0.0      3
10.0     2
11.0     1
Name: total_eligible_applicants, dtype: int64

In [45]:
df_elgibility['total_eligible_applicants'].median()

3.0

## NOFO and Due Dates


In [46]:
populated_dates = df[(df['notice_of_funding'] != 'Missing') & (df['fy_due_date'] != 'Missing') & (df['fy_due_date'] != 'Rolling')]


In [47]:
f'Only {len(populated_dates)} or  {(len(populated_dates)/len(df))}  rows have a complete date information' 

'Only 27 or  0.35526315789473684  rows have a complete date information'

In [48]:
populated_dates = populated_dates[['division', 'grant_program', 'notice_of_funding', 'fy_due_date',
       'weeks_to_apply__c_d_']]

In [49]:
populated_dates.sort_values('weeks_to_apply__c_d_')

Unnamed: 0,division,grant_program,notice_of_funding,fy_due_date,weeks_to_apply__c_d_
18,SB1,Caltrans Sustainable Transportation Planning: Strategic Partnership & Strategic Partnership -Transit,"December 1, 2020","February 12, 2021",10
43,Office of the Secretary (DOT),Local and Regional Project Assistance Grants (RAISE),"January 28, 2022","April 14, 2022",11
34,Federal Highway Administration (DOT),Nationally Significant Federal Lands and Tribal Project Program,"Oct 20, 2021","January 9, 2022",12
35,Federal Highway Administration (DOT),Bridge Investment Program,"June 10, 2022","September 8, 2022",13
22,DLA,Safe Route To School Program (SRTS),"March 16, 2022","June 15, 2022",13
15,SB1/DLA,Active Transportation Program,"March 16, 2022","June 15, 2022",13
12,SB1,Trade Corridor Enhancement Program,"August 17, 2022","November 18, 2022",13
75,Federal Transit Administration (DOT),Public Transportation on Indian Reservations Program; Tribal Transit Competitive Program,"February 16, 2022","May 25, 2022",14
11,SB1,​​Solutions for Congested Corridors Program,"August 17, 2022","December 2, 2022",15
3,DRMT,Transit Intercity and Rail Capital Program,"November 19, 2021","March 3, 2022",15


In [50]:
populated_dates['weeks_to_apply__c_d_'].value_counts()

9     5
8     4
13    4
15    3
4     3
7     1
10    1
18    1
24    1
12    1
17    1
11    1
14    1
Name: weeks_to_apply__c_d_, dtype: int64

In [51]:
populated_dates['weeks_to_apply__c_d_'].median()

10.0