# Grants Overview
***
* [Cleaning up and checking the data entered.](https://docs.google.com/spreadsheets/d/12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4/edit#gid=0)

In [1]:
import altair as alt
import numpy as np
import pandas as pd
import shared_utils
from calitp import *
from shared_utils import altair_utils
from shared_utils import calitp_color_palette as cp
from shared_utils import geography_utils, styleguide
from siuba import *

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.set_option("display.max_colwidth", None)
from IPython.display import HTML, Image, Markdown, display, display_html



In [2]:
sheet_id = "12pw6_2OSHKGksnLQlvwvo6P8VljzQ9pse7E9zGDSpg4"
sheet_name = "current_grants"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
df = to_snakecase(pd.read_csv(url))

In [3]:
# Delete unnecessary columns
df = df.iloc[:, 1:26]

In [4]:
# Fill in missing values for eligiblity that are null with 0
eligiblity = [
    "eligibility:_transit",
    "eligibility:_municipality",
    "eligibility:_school_district",
    "eligibility:_mpo",
    "eligibility:_county_regional_govt__not_mpo",
    "eligibility:_other_local_entity__non_municipality_",
    "eligibility:_state_public_lands",
    "eligibility:_federal_public_lands",
    "eligibility:_tribal_government",
    "eligibility:_non_profit",
    "eligibility:_state_department_",
    "eligibility:_for_profit_companies",
]

df[eligiblity] = df[eligiblity].fillna(0)

In [5]:
# Make sure grant programs are unique
df["grant_program"].nunique()

70

In [6]:
# Make sure length of df matches unique programs
len(df)

70

In [7]:
# Make sure each grant only has one value
df["grant_program"].value_counts()

5304 Metropolitan & Statewide Planning and NonMetropolitan Transportation Planning                      1
Congestion Relief Program                                                                               1
Statewide Transportation Planning                                                                       1
Public Transportation on Indian Reservations Formula                                                    1
Rail Vehicle Replacement Grants                                                                         1
Bus and Bus Facilities Competitive Grants                                                               1
Ferry Service for Rural Communities                                                                     1
Tribal High Priority Projects Program                                                                   1
Highway Research & Development Program                                                                  1
Pilot Program for Transit Oriented Development

In [8]:
df.head(1)

Unnamed: 0,division,grant_program,notice_of_funding,fy_due_date,weeks_to_apply__c_d_,fy_cycle,funding_amount_available,flags,agency_eligibility,grant_type,federal_or_state_funded,eligibility_restrictions,current__in_this_fy_2022__or_expired,eligibility:_transit,eligibility:_municipality,eligibility:_school_district,eligibility:_mpo,eligibility:_county_regional_govt__not_mpo,eligibility:_other_local_entity__non_municipality_,eligibility:_state_public_lands,eligibility:_federal_public_lands,eligibility:_tribal_government,eligibility:_non_profit,eligibility:_state_department_,eligibility:_for_profit_companies
0,SB1 and Federal Transit Administration,5304 Metropolitan & Statewide Planning and NonMetropolitan Transportation Planning,Missing,Missing,Missing,1 Year,3000000.0,No Flags,MPOs/RTPAs\n• Transit Agencies\n• Universities and Community Colleges\n• Native American Tribal Governments\n• Cities and Counties\n• Community-Based Organizations\n• Non-Profit Organizations (501.C.3)\n• Other Public Entities**,Formula,Federal,State Departments of Transportation (DOTs) and Metropolitan Planning Organizations (MPOs). Federal planning funds are first apportioned to State DOTs. State DOTs then allocate planning funding to MPOs.,Current,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0


In [9]:
df.columns

Index(['division', 'grant_program', 'notice_of_funding', 'fy_due_date',
       'weeks_to_apply__c_d_', 'fy_cycle', 'funding_amount_available', 'flags',
       'agency_eligibility', 'grant_type', 'federal_or_state_funded',
       'eligibility_restrictions', 'current__in_this_fy_2022__or_expired',
       'eligibility:_transit', 'eligibility:_municipality',
       'eligibility:_school_district', 'eligibility:_mpo',
       'eligibility:_county_regional_govt__not_mpo',
       'eligibility:_other_local_entity__non_municipality_',
       'eligibility:_state_public_lands', 'eligibility:_federal_public_lands',
       'eligibility:_tribal_government', 'eligibility:_non_profit',
       'eligibility:_state_department_', 'eligibility:_for_profit_companies'],
      dtype='object')

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70 entries, 0 to 69
Data columns (total 25 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   division                                            70 non-null     object 
 1   grant_program                                       70 non-null     object 
 2   notice_of_funding                                   70 non-null     object 
 3   fy_due_date                                         70 non-null     object 
 4   weeks_to_apply__c_d_                                70 non-null     object 
 5   fy_cycle                                            70 non-null     object 
 6   funding_amount_available                            68 non-null     float64
 7   flags                                               70 non-null     object 
 8   agency_eligibility                                  70 non-null     object 
 9   g

## Looking at value counts to ensure the entered data makes sense
***

In [11]:
value_count_cols = [
    "division",
    "weeks_to_apply__c_d_",
    "fy_cycle",
    "grant_type",
    "federal_or_state_funded",
    "flags",
    "notice_of_funding",
    "fy_due_date",
]

In [12]:
# https://stackoverflow.com/questions/32589829/how-to-get-value-counts-for-multiple-columns-at-once-in-pandas-dataframe
for column in df[value_count_cols]:
    print("\n" + column)
    print(df[column].value_counts())


division
Federal Transit Administration (DOT)      18
Federal Highway Administration (DOT)      17
DRMT                                       8
DLA                                        8
SB1                                        7
Office of the Secretary (DOT)              5
SB1/DLA                                    2
SB1 and Federal Transit Administration     1
Caltrans - Unknown Division                1
Clean CA                                   1
DLA                                        1
Federal Railroad Administration (DOT)      1
Name: division, dtype: int64

weeks_to_apply__c_d_
Missing    44
9           7
8           4
15          3
13          3
7           1
4           1
10          1
18          1
24          1
12          1
17          1
11          1
14          1
Name: weeks_to_apply__c_d_, dtype: int64

fy_cycle
1 Year                                                                                                                                      22
Available

## Analyze how many grants available to organization types 
***

In [13]:
# Subset dataframe for only eligibility cols
eligibility = df[
    [
        "eligibility:_transit",
        "eligibility:_municipality",
        "eligibility:_school_district",
        "eligibility:_mpo",
        "eligibility:_county_regional_govt__not_mpo",
        "eligibility:_other_local_entity__non_municipality_",
        "eligibility:_state_public_lands",
        "eligibility:_federal_public_lands",
        "eligibility:_tribal_government",
        "eligibility:_non_profit",
        "eligibility:_state_department_",
        "eligibility:_for_profit_companies",
    ]
]

In [14]:
# Sum up the eligibility across
# Clean up values by removing _ and below removing eligiblity: 
eligibility = (
    df[eligiblity]
    .sum()
    .sort_values()
    .to_frame()
    .reset_index()
    .rename(columns={"index": "Entity", 0: "# of Grants"})
    .replace(r"_", " ", regex=True)
)

In [15]:
eligibility["Entity"] = (
    eligibility["Entity"].replace(r"eligibility:", "", regex=True).str.title()
)

In [16]:
eligibility.style.hide(axis="index").set_properties(
    **{"background-color": "white"}
).set_table_styles(
    [dict(selector="th", props=[("text-align", "center")])]
).set_properties(
    **{"text-align": "center"}
)

Entity,# of Grants
For Profit Companies,2.0
School District,3.0
Federal Public Lands,3.0
State Public Lands,6.0
Non Profit,15.0
Other Local Entity Non Municipality,19.0
Transit,22.0
Mpo,30.0
Municipality,33.0
State Department,37.0


## How much 'competition' for grants?
***
* Count number of eligible applicants per grant. 


In [23]:
# Subset dataframe
df_elgibility = df[
    [
        "grant_program",
        "eligibility:_transit",
        "eligibility:_municipality",
        "eligibility:_school_district",
        "eligibility:_mpo",
        "eligibility:_county_regional_govt__not_mpo",
        "eligibility:_other_local_entity__non_municipality_",
        "eligibility:_state_public_lands",
        "eligibility:_federal_public_lands",
        "eligibility:_tribal_government",
        "eligibility:_non_profit",
        "eligibility:_state_department_",
        "eligibility:_for_profit_companies",
    ]
]

In [24]:
# Sum up eligibility
df_elgibility["total_eligible_applicants"] = df_elgibility.iloc[:, 1:13].sum(axis=1)

# Drop old columns to simplify dataframe
df_elgibility = df_elgibility[["grant_program", "total_eligible_applicants"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [25]:
df_elgibility.sample(3)

Unnamed: 0,grant_program,total_eligible_applicants
60,Tribal Transportation Facility Bridges\r\n(Bridge Formula Funding Set-Aside)\r,1.0
46,Ferry Service for Rural Communities,1.0
34,All Stations Accessibility Program,2.0


In [26]:
df_elgibility["total_eligible_applicants"].value_counts()

1.0     16
5.0     15
2.0     12
3.0      7
4.0      5
6.0      5
8.0      3
0.0      3
10.0     2
11.0     1
7.0      1
Name: total_eligible_applicants, dtype: int64

In [27]:
df_elgibility["total_eligible_applicants"].median()

3.0

## Sums
***

In [17]:
df["funding_amount_available"].sum()

50098305298.0

In [18]:
sum_df = df.groupby(
    [
        "division",
        "flags",
    ]
).agg({"funding_amount_available": "sum"})

In [19]:
sum_df["funding_amount_available"] = (
    "$"
    + (sum_df["funding_amount_available"].astype(float) / 1000000000)
    .round()
    .astype(str)
    + "B"
)

In [20]:
sum_df

Unnamed: 0_level_0,Unnamed: 1_level_0,funding_amount_available
division,flags,Unnamed: 2_level_1
Caltrans - Unknown Division,Nationwide,$2.0B
Clean CA,No Flags,$0.0B
DLA,Lump sum,$1.0B
DLA,No Flags,$1.0B
DLA,No Flags,$0.0B
DRMT,Estimate,$0.0B
DRMT,Nationwide,$9.0B
DRMT,No Flags,$1.0B
Federal Highway Administration (DOT),Lump sum across all years,$0.0B
Federal Highway Administration (DOT),Nationwide,$4.0B


In [21]:
fed_state = df.groupby("federal_or_state_funded").agg(
    {"funding_amount_available": "sum"}
)
fed_state["funding_amount_available"] = (
    "$"
    + (fed_state["funding_amount_available"].astype(float) / 1000000000)
    .round()
    .astype(str)
    + "B"
)

In [22]:
fed_state

Unnamed: 0_level_0,funding_amount_available
federal_or_state_funded,Unnamed: 1_level_1
Both,$6.0B
Federal,$41.0B
Missing,$0.0B
State,$3.0B


## NOFO/Open Date and Due Dates


In [28]:
# New subsetted df that excludes any missing and rolling dates
populated_dates = df[
    (df["notice_of_funding"] != "Missing")
    & (df["fy_due_date"] != "Missing")
    & (df["fy_due_date"] != "Rolling")
]

In [29]:
f"Only {len(populated_dates)} or  {(len(populated_dates)/len(df))}  rows have a complete date information"

'Only 26 or  0.37142857142857144  rows have a complete date information'

In [30]:
populated_dates = populated_dates[
    [
        "division",
        "grant_program",
        "notice_of_funding",
        "fy_due_date",
        "weeks_to_apply__c_d_",
    ]
]

In [31]:
populated_dates.sort_values("weeks_to_apply__c_d_")

Unnamed: 0,division,grant_program,notice_of_funding,fy_due_date,weeks_to_apply__c_d_
18,SB1,Caltrans Sustainable Transportation Planning: Strategic Partnership & Strategic Partnership -Transit,"December 1, 2020","February 12, 2021",10
38,Office of the Secretary (DOT),Local and Regional Project Assistance Grants (RAISE),"January 28, 2022","April 14, 2022",11
29,Federal Highway Administration (DOT),Nationally Significant Federal Lands and Tribal Project Program,"Oct 20, 2021","January 9, 2022",12
21,DLA,Safe Route To School Program (SRTS),"March 16, 2022","June 15, 2022",13
15,SB1/DLA,Active Transportation Program,"March 16, 2022","June 15, 2022",13
12,SB1,Trade Corridor Enhancement Program,"August 17, 2022","November 18, 2022",13
67,Federal Transit Administration (DOT),Public Transportation on Indian Reservations Program; Tribal Transit Competitive Program,"February 16, 2022","May 25, 2022",14
11,SB1,​​Solutions for Congested Corridors Program,"August 17, 2022","December 2, 2022",15
14,SB1,Local Partnership Program,"August 17, 2022","November 28, 2022",15
3,DRMT,Transit Intercity and Rail Capital Program,"November 19, 2021","March 3, 2022",15


In [32]:
populated_dates["weeks_to_apply__c_d_"].value_counts()

9     7
8     4
15    3
13    3
7     1
4     1
10    1
18    1
24    1
12    1
17    1
11    1
14    1
Name: weeks_to_apply__c_d_, dtype: int64

In [33]:
populated_dates["weeks_to_apply__c_d_"].median()

9.5