# TIRCP Program Allocation Plan
----


In [55]:
import pandas as pd
import math
from siuba import * 
import numpy as np
pd.options.display.max_columns = 50
pd.options.display.float_format = "{:.2f}".format
import datetime

In [56]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/tircp/"
FILE_NAME1 = "Raw_Project_Tracking_Sheet.xlsx"
project = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME1}")
FILE_NAME2 = "Allocation_Agreement.xlsx"
allocation = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME2}")

In [57]:
#cleaning up spaces in columns
project.columns = project.columns.str.strip().str.replace(' ', '_')
allocation.columns = allocation.columns.str.strip().str.replace(' ', '_')

In [58]:
#strip spaces in columns
project.columns = project.columns.map(lambda x: x.strip())
allocation.columns = allocation.columns.map(lambda x: x.strip())

## Keeping only relevant columns.

In [59]:
#subsetting for only columns of interest
df_project = project[['Award_Year', 'Project_#','Unallocated_Amount','Local_Agency','Project_Title','PPNO',
'TIRCP_Award_Amount_($)',]]

In [60]:
#subsetting for only columns of interest
df_allocation = allocation[['Award_Year','Award_Recipient', 'Implementing_Agency',
'Components', 'PPNO','Phase','Prior_Fiscal_Years_to_2020',
'Fiscal_Year_2020-2021', 'Fiscal_Year_2021-2022',
'Fiscal_Year_2022-2023', 'Fiscal_Year_2023-2024',
 'Fiscal_Year_2024-2025', 'Fiscal_Year_2025-2026',
'Fiscal_Year_2026-2027', 'Fiscal_Year_2027-2028',
'Fiscal_Year_2028-2029', 'Fiscal_Year_2029-2030','CTC_Financial_Resolution',
'Allocation_Date','Project_ID','SB1_Funding','GGRF_Funding','Allocation_Amount']]

# Cleaning Allocation Sheet 

In [61]:
#Filtering out for 2021, since that entry is blank
df_allocation = df_allocation.query("Award_Year != 2021")

In [62]:
#clean up columns in a loop
for i in ["Allocation_Date"]:
    df_allocation[i] = df_allocation[i].replace('/', '-', regex = True).replace('Complete', '', regex = True).replace('\n', '', regex=True).replace('Pending','TBD',regex= True).fillna('TBD')

## Clean up Dates

In [63]:
#changing some of the dates
df_allocation["Allocation_Date"].replace({'October 15, 2018\nSeptember 30, 2021': '2018-10-15 00:00:00',
 '2/1/2021\n\n10/31/2022':'2021-02-01 00:00:00', '45211':'2023-10-22', "FY 26/27": "2026-12-31", "08/12//20": '2020-08-12 00:00:00', 'FY 21/22': '2021-12-31',
'FY 22/23': '2022-12-31','FY 20/21': '2020-12-31', 'FY 23/24': '2023-12-31','FY 24/25': '2024-12-31','FY 25/26': '2025-12-31'}, inplace =True)

## Cleaning up PPNO, can only be 5 characters.

In [64]:
#remove the extra characters in PPNO in allocation to match the PPNO in project data frame bc there should only be five characters and numbers in each PPNO value
df_allocation = df_allocation.assign(
    PPNO_New = df_allocation['PPNO'].str.slice(start=0, stop=5)
)

In [65]:
#CSV with PPNO & Award Recipients
FILE_NAME2 = "Allocation_PPNO_Crosswalk.csv"
allocation_ppno = pd.read_csv(f"{GCS_FILE_PATH}{FILE_NAME2}")

In [66]:
allocation_ppno #printing to make sure it makes sense.

Unnamed: 0,Award_Year,PPNO_New2,Award_Recipient
0,2020,CP065,Los Angeles County Metropolitan Transportation...
1,2020,CP066,Los Angeles-San Diego-San Luis Obispo Rail Cor...
2,2016,1230,San Bernardino County Transportation Authority...
3,2018,1155,Transportation Agency for Monterey County


In [67]:
#Merge in Crosswalk 
df_allocation = pd.merge(df_allocation, allocation_ppno, on = ["Award_Year", "Award_Recipient"], how = "left")

In [68]:
#some values in PPNO and PPNO_New2 are strings, some are floats...so have to convert PPNO New 2 to strings
df_allocation.PPNO_New = df_allocation.apply(lambda x: x.PPNO_New if (str(x.PPNO_New2) == 'nan') else x.PPNO_New2, axis=1)

In [69]:
#drop old column
df_allocation = df_allocation.drop(['PPNO','PPNO_New2'], axis=1)

In [70]:
## Clean up TIRCP
df_allocation[['SB1_Funding','GGRF_Funding','Allocation_Amount']] = df_allocation[['SB1_Funding','GGRF_Funding','Allocation_Amount']].fillna(value=0)

# Cleaning Project Sheet



In [71]:
df_project.head(2)

Unnamed: 0,Award_Year,Project_#,Unallocated_Amount,Local_Agency,Project_Title,PPNO,TIRCP_Award_Amount_($)
0,2015,1,0,Antelope Valley Transit Authority (AVTA),Regional Transit Interconnectivity & Environme...,CP005,24403000.0
1,2015,2,0,Capitol Corridor Joint Powers Authority,Travel Time Reduction Project,CP012,4620000.0


## Filling NA for TIRCP and Expended Amounts

In [72]:
df_project[['TIRCP_Award_Amount_($)', 'Unallocated_Amount']] = df_project[['TIRCP_Award_Amount_($)', 'Unallocated_Amount']].fillna(value=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


## Cleaning up PPNO Numbers based on Allocation Sheet

In [73]:
#slicing PPNO to be 5 characters
df_project = df_project.assign(PPNO_New = df_project['PPNO'].str.slice(start=0, stop=5)) 

In [74]:
#importing Excel crosswalk sheet
FILE_NAME3 = "Projects_PPNO.xlsx"
project_ppno = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME3}")

In [75]:
#Merge in Crosswalk 
df_project2 = pd.merge(df_project, project_ppno, on = ["Award_Year", "Local_Agency"], how = "left")

In [76]:
#some values in PPNO and PPNO_New2 are strings, some are floats...so have to convert PPNO New 2 to strings
df_project2.PPNO_New = df_project2.apply(lambda x: x.PPNO_New if (str(x.PPNO_New2) == 'nan') else x.PPNO_New2, axis=1)

In [77]:
#making sure PPNO_New is a string 
df_project2 = df_project2.astype({'PPNO_New': 'str'})

In [78]:
PPNO_project = set(df_project2.PPNO_New.unique().tolist())
PPNO_allocation = set(df_allocation.PPNO_New.unique().tolist())

In [79]:
#checking for differences - none. yay. 
PPNO_project - PPNO_allocation

set()

In [80]:
#drop old column
df_project2 = df_project2.drop(['PPNO', 'PPNO_New2'], axis=1)

# Merging Project & Allocations
* Lots of missing values. 
* No difference when I tried to drop values

In [81]:
#merge on left for projects.
df_combined = df_allocation.merge(df_project2, how = "left", on = ["PPNO_New", "Award_Year"])

In [82]:
df_combined.shape

(319, 28)

In [83]:
df_combined.isna().sum()

Award_Year                      0
Award_Recipient                 0
Implementing_Agency             0
Components                      0
Phase                           1
Prior_Fiscal_Years_to_2020    187
Fiscal_Year_2020-2021         243
Fiscal_Year_2021-2022         261
Fiscal_Year_2022-2023         289
Fiscal_Year_2023-2024         301
Fiscal_Year_2024-2025         304
Fiscal_Year_2025-2026         311
Fiscal_Year_2026-2027         313
Fiscal_Year_2027-2028         315
Fiscal_Year_2028-2029         319
Fiscal_Year_2029-2030         319
CTC_Financial_Resolution      106
Allocation_Date                 0
Project_ID                    113
SB1_Funding                     0
GGRF_Funding                    0
Allocation_Amount               0
PPNO_New                        0
Project_#                      33
Unallocated_Amount             33
Local_Agency                   33
Project_Title                  33
TIRCP_Award_Amount_($)         33
dtype: int64

In [84]:
df_combined.dtypes

Award_Year                      int64
Award_Recipient                object
Implementing_Agency            object
Components                     object
Phase                          object
Prior_Fiscal_Years_to_2020    float64
Fiscal_Year_2020-2021         float64
Fiscal_Year_2021-2022         float64
Fiscal_Year_2022-2023         float64
Fiscal_Year_2023-2024         float64
Fiscal_Year_2024-2025         float64
Fiscal_Year_2025-2026         float64
Fiscal_Year_2026-2027         float64
Fiscal_Year_2027-2028         float64
Fiscal_Year_2028-2029         float64
Fiscal_Year_2029-2030         float64
CTC_Financial_Resolution       object
Allocation_Date                object
Project_ID                     object
SB1_Funding                   float64
GGRF_Funding                  float64
Allocation_Amount             float64
PPNO_New                       object
Project_#                     float64
Unallocated_Amount            float64
Local_Agency                   object
Project_Titl

### Filing in NA Project ID values & CTC Financial Resolution with TBD. Fill in Allocation Date with something random.

In [85]:
df_combined[['Project_ID','CTC_Financial_Resolution']] = df_combined[['Project_ID','CTC_Financial_Resolution']].fillna(value = 'TBD')

In [86]:
missing_date = pd.to_datetime('2100-01-01')
df_combined['Allocation_Date'] = df_combined['Allocation_Date'].fillna(missing_date)

# Breakout each year in own dataframe
* Do it in a loop later

In [89]:
#filtering out for year
df_2015 = df_combined.loc[df_combined['Award_Year'] == 2015]
df_2016 = df_combined.loc[df_combined['Award_Year'] == 2016]
df_2018 = df_combined.loc[df_combined['Award_Year'] == 2018]
df_2020 = df_combined.loc[df_combined['Award_Year'] == 2020]

# Mimic sheet


In [97]:
def pivot(df):
    df = df.groupby(['Award_Year','Project_#','TIRCP_Award_Amount_($)','Unallocated_Amount','Award_Recipient','Implementing_Agency',
    'Project_Title','PPNO_New', 'Components','Phase','Project_ID','CTC_Financial_Resolution','Allocation_Date']).agg({
    'Prior_Fiscal_Years_to_2020': 'max', 'Fiscal_Year_2020-2021': 'max',
    'Fiscal_Year_2021-2022': 'max', 'Fiscal_Year_2022-2023': 'max',
    'Fiscal_Year_2023-2024': 'max', 'Fiscal_Year_2024-2025': 'max',
    'Fiscal_Year_2025-2026': 'max', 'Fiscal_Year_2026-2027': 'max',
    'Fiscal_Year_2027-2028': 'max', 'Fiscal_Year_2028-2029': 'max',
    'Fiscal_Year_2029-2030': 'max', 'SB1_Funding': 'sum', 'GGRF_Funding':'sum', 'Allocation_Amount':'sum'})
    return df 

In [98]:
df_2015 = pivot(df_2015)
df_2016 = pivot(df_2016)
df_2018 = pivot(df_2018)
df_2020 = pivot(df_2020)

# Export into Excel

In [None]:
with pd.ExcelWriter("gs://calitp-analytics-data/data-analyses/tircp/TIRCP_PAP_2022.xlsx") as writer:
    df_2015.to_excel(writer, sheet_name="2015 Cycle 1", index=True)
    df_2016.to_excel(writer, sheet_name="2016 Cycle 2", index=True)
    df_2018.to_excel(writer, sheet_name="2018 Cycle 3", index=True)
    df_2020.to_excel(writer, sheet_name="2020 Cycle 4", index=True)