# Grants Overview
***
* [Cleaning up and checking the data entered.](https://docs.google.com/spreadsheets/d/12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4/edit#gid=0)

In [1]:
import pandas as pd
from calitp_data_analysis.sql import to_snakecase

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.set_option("display.max_colwidth", None)

In [2]:
sheet_id = "12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4"
sheet_name = "current_grants"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
df = to_snakecase(pd.read_csv(url))

In [3]:
# Delete unnecessary columns
df = df.iloc[:, 1:26]

In [4]:
# Fill in missing values for eligiblity that are null with 0
eligiblity = [
    "eligibility:_transit",
    "eligibility:_municipality",
    "eligibility:_school_district",
    "eligibility:_mpo",
    "eligibility:_county_regional_govt__not_mpo",
    "eligibility:_other_local_entity__non_municipality_",
    "eligibility:_state_public_lands",
    "eligibility:_federal_public_lands",
    "eligibility:_tribal_government",
    "eligibility:_non_profit",
    "eligibility:_state_department_",
    "eligibility:_for_profit_companies",
]

df[eligiblity] = df[eligiblity].fillna(0)

In [5]:
# Make sure grant programs are unique
df["grant_program"].nunique()

39

In [6]:
# Make sure length of df matches unique programs
len(df)

39

In [7]:
# Make sure each grant only has one value
df["grant_program"].value_counts()

Capital Investment Grants - 5309                                                            1
Prioritization Process Pilot Program                                                        1
Statewide Transportation Planning                                                           1
Urbanized Area Passenger Ferry Program                                                      1
Pilot Program for Transit Oriented Development                                              1
Public Transportation Technical Assistance and Workforce Development                        1
Public Transportation on Indian Reservations Competitive                                    1
National Rural Transportation Assistance Program\r                                          1
Pilot Program for Enhanced Mobility                                                         1
Tribal Transportation Facility Bridges\r\n(Bridge Formula Funding Set-Aside)\r              1
Rail Vehicle Replacement Grants                             

In [8]:
df.head(1)

Unnamed: 0,division,grant_program,notice_of_funding,fy_due_date,weeks_to_apply__c_d_,fy_cycle,funding_amount_available,flags,agency_eligibility,grant_type,federal_or_state_funded,eligibility_restrictions,current__in_this_fy_2022__or_expired,eligibility:_transit,eligibility:_municipality,eligibility:_school_district,eligibility:_mpo,eligibility:_county_regional_govt__not_mpo,eligibility:_other_local_entity__non_municipality_,eligibility:_state_public_lands,eligibility:_federal_public_lands,eligibility:_tribal_government,eligibility:_non_profit,eligibility:_state_department_,eligibility:_for_profit_companies
0,Caltrans - Unknown Division,Capital Investment Grants - 5309,Missing,Missing,Missing,1 Year,2300000000,Nationwide,Large Urban MPOs and Transit Agencies,Competitive,Federal,Missing,Current,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
df.columns

Index(['division', 'grant_program', 'notice_of_funding', 'fy_due_date',
       'weeks_to_apply__c_d_', 'fy_cycle', 'funding_amount_available', 'flags',
       'agency_eligibility', 'grant_type', 'federal_or_state_funded',
       'eligibility_restrictions', 'current__in_this_fy_2022__or_expired',
       'eligibility:_transit', 'eligibility:_municipality',
       'eligibility:_school_district', 'eligibility:_mpo',
       'eligibility:_county_regional_govt__not_mpo',
       'eligibility:_other_local_entity__non_municipality_',
       'eligibility:_state_public_lands', 'eligibility:_federal_public_lands',
       'eligibility:_tribal_government', 'eligibility:_non_profit',
       'eligibility:_state_department_', 'eligibility:_for_profit_companies'],
      dtype='object')

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39 entries, 0 to 38
Data columns (total 25 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   division                                            39 non-null     object 
 1   grant_program                                       39 non-null     object 
 2   notice_of_funding                                   39 non-null     object 
 3   fy_due_date                                         39 non-null     object 
 4   weeks_to_apply__c_d_                                39 non-null     object 
 5   fy_cycle                                            39 non-null     object 
 6   funding_amount_available                            39 non-null     int64  
 7   flags                                               39 non-null     object 
 8   agency_eligibility                                  39 non-null     object 
 9   g

## Looking at value counts to ensure the entered data makes sense
***

In [11]:
value_count_cols = [
    "division",
    "weeks_to_apply__c_d_",
    "fy_cycle",
    "grant_type",
    "federal_or_state_funded",
    "flags",
    "notice_of_funding",
    "fy_due_date",
]

In [12]:
# https://stackoverflow.com/questions/32589829/how-to-get-value-counts-for-multiple-columns-at-once-in-pandas-dataframe
for column in df[value_count_cols]:
    print("\n" + column)
    print(df[column].value_counts())


division
Federal Transit Administration (DOT)     16
Federal Highway Administration (DOT)     14
DRMT                                      4
US Department of Transportation (DOT)     3
Caltrans - Unknown Division               1
Federal Railroad Administration (DOT)     1
Name: division, dtype: int64

weeks_to_apply__c_d_
Missing    29
9           4
8           3
17          1
11          1
14          1
Name: weeks_to_apply__c_d_, dtype: int64

fy_cycle
1 Year                                    13
Available until expended                   5
Period of Availability: 4 year\r           3
Year of Apportionment plus three years     3
Period of Availability: 4 year             3
Year of Apportionment plus two years\r     2
Available until expended\r                 2
Year of allocation plus two years          2
One-time only                              2
2 Years                                    1
1 Year/Until Expended                      1
Year of Allocation plus five years\r       1


## Analyze how many grants available to organization types 
***

In [13]:
# Subset dataframe for only eligibility cols
eligibility = df[
    [
        "eligibility:_transit",
        "eligibility:_municipality",
        "eligibility:_school_district",
        "eligibility:_mpo",
        "eligibility:_county_regional_govt__not_mpo",
        "eligibility:_other_local_entity__non_municipality_",
        "eligibility:_state_public_lands",
        "eligibility:_federal_public_lands",
        "eligibility:_tribal_government",
        "eligibility:_non_profit",
        "eligibility:_state_department_",
        "eligibility:_for_profit_companies",
    ]
]

In [14]:
# Sum up the eligibility across
# Clean up values by removing _ and below removing eligiblity: 
eligibility = (
    df[eligiblity]
    .sum()
    .sort_values()
    .to_frame()
    .reset_index()
    .rename(columns={"index": "Entity", 0: "# of Grants"})
    .replace(r"_", " ", regex=True)
)

In [15]:
eligibility["Entity"] = (
    eligibility["Entity"].replace(r"eligibility:", "", regex=True).str.title()
)

In [16]:
eligibility.style.hide(axis="index").set_properties(
    **{"background-color": "white"}
).set_table_styles(
    [dict(selector="th", props=[("text-align", "center")])]
).set_properties(
    **{"text-align": "center"}
)

Entity,# of Grants
School District,0.0
Federal Public Lands,0.0
For Profit Companies,2.0
State Public Lands,3.0
Non Profit,9.0
Transit,11.0
Other Local Entity Non Municipality,11.0
Mpo,17.0
Municipality,18.0
County Regional Govt Not Mpo,19.0


## How much 'competition' for grants?
***
* Count number of eligible applicants per grant. 


In [17]:
# Subset dataframe
df_elgibility = df[
    [
        "grant_program",
        "eligibility:_transit",
        "eligibility:_municipality",
        "eligibility:_school_district",
        "eligibility:_mpo",
        "eligibility:_county_regional_govt__not_mpo",
        "eligibility:_other_local_entity__non_municipality_",
        "eligibility:_state_public_lands",
        "eligibility:_federal_public_lands",
        "eligibility:_tribal_government",
        "eligibility:_non_profit",
        "eligibility:_state_department_",
        "eligibility:_for_profit_companies",
    ]
]

In [18]:
# Sum up eligibility
df_elgibility["total_eligible_applicants"] = df_elgibility.iloc[:, 1:13].sum(axis=1)

# Drop old columns to simplify dataframe
df_elgibility = df_elgibility[["grant_program", "total_eligible_applicants"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_elgibility["total_eligible_applicants"] = df_elgibility.iloc[:, 1:13].sum(axis=1)


In [19]:
df_elgibility.sample(3)

Unnamed: 0,grant_program,total_eligible_applicants
3,Federal CMAQ (already in Consolidated Application),3.0
19,Bus and Bus Facilities Competitive Grants,5.0
6,Ferry Programs: Electric or Low Emitting Ferry Program,0.0


In [20]:
df_elgibility["total_eligible_applicants"].value_counts()

1.0     10
5.0      9
2.0      6
6.0      4
3.0      3
0.0      2
4.0      2
10.0     2
7.0      1
Name: total_eligible_applicants, dtype: int64

In [21]:
df_elgibility["total_eligible_applicants"].median()

3.0

## Sums
***

In [22]:
df["funding_amount_available"].sum()

28424926921

In [23]:
sum_df = df.groupby(
    [
        "division",
        "flags",
    ]
).agg({"funding_amount_available": "sum"})

In [24]:
sum_df["funding_amount_available"] = (
    "$"
    + (sum_df["funding_amount_available"].astype(float) / 1000000000)
    .round()
    .astype(str)
    + "B"
)

In [25]:
sum_df

Unnamed: 0_level_0,Unnamed: 1_level_0,funding_amount_available
division,flags,Unnamed: 2_level_1
Caltrans - Unknown Division,Nationwide,$2.0B
DRMT,Nationwide,$9.0B
Federal Highway Administration (DOT),Nationwide,$4.0B
Federal Highway Administration (DOT),No Flags,$0.0B
Federal Railroad Administration (DOT),Nationwide,$1.0B
Federal Transit Administration (DOT),Nationwide,$6.0B
US Department of Transportation (DOT),Nationwide,$7.0B


In [26]:
fed_state = df.groupby("federal_or_state_funded").agg(
    {"funding_amount_available": "sum"}
)
fed_state["funding_amount_available"] = (
    "$"
    + (fed_state["funding_amount_available"].astype(float) / 1000000000)
    .round()
    .astype(str)
    + "B"
)

In [27]:
fed_state

Unnamed: 0_level_0,funding_amount_available
federal_or_state_funded,Unnamed: 1_level_1
Federal,$28.0B


## NOFO/Open Date and Due Dates


In [28]:
# New subsetted df that excludes any missing and rolling dates
populated_dates = df[
    (df["notice_of_funding"] != "Missing")
    & (df["fy_due_date"] != "Missing")
    & (df["fy_due_date"] != "Rolling")
]

In [29]:
f"Only {len(populated_dates)} or  {(len(populated_dates)/len(df))}  rows have a complete date information"

'Only 10 or  0.2564102564102564  rows have a complete date information'

In [30]:
populated_dates = populated_dates[
    [
        "division",
        "grant_program",
        "notice_of_funding",
        "fy_due_date",
        "weeks_to_apply__c_d_",
    ]
]

In [31]:
populated_dates.sort_values("weeks_to_apply__c_d_")

Unnamed: 0,division,grant_program,notice_of_funding,fy_due_date,weeks_to_apply__c_d_
11,US Department of Transportation (DOT),Local and Regional Project Assistance Grants (RAISE),"January 28, 2022","April 14, 2022",11
36,Federal Transit Administration (DOT),Public Transportation on Indian Reservations Program; Tribal Transit Competitive Program,"February 16, 2022","May 25, 2022",14
10,Federal Railroad Administration (DOT),Consolidated Rail Infrastructure & Safety Improvements Grant Program,"August 1, 2021","November 29, 2021",17
2,DRMT,Rural Transit & Intercity Bus 5311 & 5311f (already in Consolidated Application),"March 1, 2022","April 29, 2022",8
3,DRMT,Federal CMAQ (already in Consolidated Application),"March 1, 2022","April 29, 2022",8
4,DRMT,Low or No Emission Vehicle Program 5339 (already in Consolidated Application),"March 1, 2022","April 29, 2022",8
24,Federal Transit Administration (DOT),Pilot Program for Transit Oriented Development,"May 26, 2022","July 25, 2022",9
34,Federal Transit Administration (DOT),Bus Exportable Power Systems,"April 12, 2022","June 13, 2022",9
35,Federal Transit Administration (DOT),Enhancing Mobility Innovation,"November 12, 2021","January 11, 2022",9
38,Federal Highway Administration (DOT),Fixing America's Surface Transportation (FAST) ACT,"June 22, 2021","August 23, 2021",9


In [32]:
populated_dates["weeks_to_apply__c_d_"].value_counts()

9     4
8     3
17    1
11    1
14    1
Name: weeks_to_apply__c_d_, dtype: int64

In [33]:
populated_dates["weeks_to_apply__c_d_"].median()

9.0