# TIRCP Program Allocation Plan
----


In [None]:
import pandas as pd
import math
from siuba import * 
import numpy as np
pd.options.display.max_columns = 50
pd.options.display.float_format = "{:.2f}".format
import datetime

In [None]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/tircp/"
FILE_NAME1 = "Raw_Project_Tracking_Sheet.xlsx"
project = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME1}")
FILE_NAME2 = "Allocation_Agreement.xlsx"
allocation = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME2}")

In [None]:
#cleaning up spaces in columns
project.columns = project.columns.str.strip().str.replace(' ', '_')
allocation.columns = allocation.columns.str.strip().str.replace(' ', '_')

In [None]:
#strip spaces in columns
project.columns = project.columns.map(lambda x: x.strip())
allocation.columns = allocation.columns.map(lambda x: x.strip())

## Keeping only relevant columns.

In [None]:
#subsetting for only columns of interest
df_project = project[['Award_Year', 'Project_#','Unallocated_Amount','Local_Agency','Project_Title','PPNO',
'TIRCP_Award_Amount_($)',]]

In [None]:
#subsetting for only columns of interest
df_allocation = allocation[['Award_Year','Award_Recipient', 'Implementing_Agency',
'Components', 'PPNO','Phase','Prior_Fiscal_Years_to_2020',
'Fiscal_Year_2020-2021', 'Fiscal_Year_2021-2022',
'Fiscal_Year_2022-2023', 'Fiscal_Year_2023-2024',
 'Fiscal_Year_2024-2025', 'Fiscal_Year_2025-2026',
'Fiscal_Year_2026-2027', 'Fiscal_Year_2027-2028',
'Fiscal_Year_2028-2029', 'Fiscal_Year_2029-2030','CTC_Financial_Resolution',
'Allocation_Date','Project_ID','SB1_Funding','GGRF_Funding','Allocation_Amount']]

# Cleaning Allocation Sheet 

In [None]:
#Filtering out for 2021, since that entry is blank
df_allocation = df_allocation.query("Award_Year != 2021")

In [None]:
#clean up columns in a loop
for i in ["Allocation_Date"]:
    df_allocation[i] = df_allocation[i].replace('/', '-', regex = True).replace('Complete', '', regex = True).replace('\n', '', regex=True).replace('Pending','TBD',regex= True).fillna('TBD')

## Clean up Dates

#changing some of the dates
df_allocation["Allocation_Date"].replace({'October 15, 2018\nSeptember 30, 2021': '2018-10-15 00:00:00',
'2/1/2021\n\n10/31/2022':'2021-02-01 00:00:00', '45211':'2023-10-22', "FY 26/27": "2026-12-31", "08/12//20": '2020-08-12 00:00:00', 'FY 21/22': '2021-12-31',
'FY 22/23': '2022-12-31','FY 20/21': '2020-12-31', 'FY 23/24': '2023-12-31','FY 24/25': '2024-12-31','FY 25/26': '2025-12-31'}, inplace =True)

## Cleaning up PPNO, can only be 5 characters.

In [None]:
#remove the extra characters in PPNO in allocation to match the PPNO in project data frame bc there should only be five characters and numbers in each PPNO value
df_allocation = df_allocation.assign(
    PPNO_New = df_allocation['PPNO'].str.slice(start=0, stop=5)
)

In [None]:
#CSV with PPNO & Award Recipients
FILE_NAME2 = "Allocation_PPNO_Crosswalk.csv"
allocation_ppno = pd.read_csv(f"{GCS_FILE_PATH}{FILE_NAME2}")

In [None]:
allocation_ppno #printing to make sure it makes sense.

In [None]:
#Merge in Crosswalk 
df_allocation = pd.merge(df_allocation, allocation_ppno, on = ["Award_Year", "Award_Recipient"], how = "left")

In [None]:
#some values in PPNO and PPNO_New2 are strings, some are floats...so have to convert PPNO New 2 to strings
df_allocation.PPNO_New = df_allocation.apply(lambda x: x.PPNO_New if (str(x.PPNO_New2) == 'nan') else x.PPNO_New2, axis=1)

In [None]:
#drop old column
df_allocation = df_allocation.drop(['PPNO','PPNO_New2'], axis=1)

In [None]:
## Clean up TIRCP
df_allocation[['SB1_Funding','GGRF_Funding','Allocation_Amount']] = df_allocation[['SB1_Funding','GGRF_Funding','Allocation_Amount']].fillna(value=0)

# Cleaning Project Sheet



In [None]:
df_project.head(2)

## Filling NA for TIRCP and Expended Amounts

In [None]:
df_project[['TIRCP_Award_Amount_($)', 'Unallocated_Amount']] = df_project[['TIRCP_Award_Amount_($)', 'Unallocated_Amount']].fillna(value=0)

## Cleaning up PPNO Numbers based on Allocation Sheet

In [None]:
#slicing PPNO to be 5 characters
df_project = df_project.assign(PPNO_New = df_project['PPNO'].str.slice(start=0, stop=5)) 

In [None]:
#importing Excel crosswalk sheet
FILE_NAME3 = "Projects_PPNO.xlsx"
project_ppno = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME3}")

In [None]:
#Merge in Crosswalk 
df_project2 = pd.merge(df_project, project_ppno, on = ["Award_Year", "Local_Agency"], how = "left")

In [None]:
#some values in PPNO and PPNO_New2 are strings, some are floats...so have to convert PPNO New 2 to strings
df_project2.PPNO_New = df_project2.apply(lambda x: x.PPNO_New if (str(x.PPNO_New2) == 'nan') else x.PPNO_New2, axis=1)

In [None]:
#making sure PPNO_New is a string 
df_project2 = df_project2.astype({'PPNO_New': 'str'})

In [None]:
PPNO_project = set(df_project2.PPNO_New.unique().tolist())
PPNO_allocation = set(df_allocation.PPNO_New.unique().tolist())

In [None]:
#checking for differences - none. yay. 
PPNO_project - PPNO_allocation

In [None]:
#drop old column
df_project2 = df_project2.drop(['PPNO', 'PPNO_New2'], axis=1)

# Merging Project & Allocations

In [None]:
#merge on left for projects.
df_combined = df_allocation.merge(df_project2, how = "left", on = ["PPNO_New", "Award_Year"])

In [None]:
df_combined.shape

In [None]:
df_combined.isna().sum()

### Filing in NA Project ID values & CTC Financial Resolution with TBD. Create new column to rep Award No

In [None]:
df_combined[['Project_ID','CTC_Financial_Resolution']] = df_combined[['Project_ID','CTC_Financial_Resolution']].fillna(value = 'TBD')

In [None]:
#missing_date = pd.to_datetime('2100-01-01')
#df_combined['Allocation_Date'] = df_combined['Allocation_Date'].fillna(missing_date)

In [None]:
#Create a new column that combines award year with project number
df_combined['Award_No'] = df_combined["Award_Year"].astype(str) + "-" + df_combined["Project_#"].astype(str)

In [None]:
df_combined.dtypes

In [None]:
df_combined = df_combined.rename(columns = {'Allocation_Amount':'Total_Amount','SB1_Funding':'PTA-SB1_Amount','Components':'Separable_Phases/Components','CTC_Financial_Resolution':'Allocation_Resolution'})

# Breakout each year in own dataframe

In [None]:
#filtering out for year
df_2015 = df_combined.loc[df_combined['Award_Year'] == 2015]
df_2016 = df_combined.loc[df_combined['Award_Year'] == 2016]
df_2018 = df_combined.loc[df_combined['Award_Year'] == 2018]
df_2020 = df_combined.loc[df_combined['Award_Year'] == 2020]

In [None]:
df2020.loc['Grand_total'] = (df_2020.sum(numeric_only=True))

In [None]:
df2020

# Mimic sheet


In [None]:
def pivot(df):
    df = df.groupby(['Project_#','Award_No','TIRCP_Award_Amount_($)','Unallocated_Amount','Award_Recipient','Implementing_Agency',
    'Project_Title','PPNO_New', 'Separable_Phases/Components','Phase','Project_ID','Allocation_Resolution','Allocation_Date']).agg({
    'Prior_Fiscal_Years_to_2020': 'max', 'Fiscal_Year_2020-2021': 'max',
    'Fiscal_Year_2021-2022': 'max', 'Fiscal_Year_2022-2023': 'max',
    'Fiscal_Year_2023-2024': 'max', 'Fiscal_Year_2024-2025': 'max',
    'Fiscal_Year_2025-2026': 'max', 'Fiscal_Year_2026-2027': 'max',
    'Fiscal_Year_2027-2028': 'max', 'Fiscal_Year_2028-2029': 'max',
    'Fiscal_Year_2029-2030': 'max', 'PTA-SB1_Amount': 'sum', 'GGRF_Funding':'sum', 'Total_Amount':'sum'})
    return df 

In [None]:
agg_df_2015 = pivot(df_2015)
agg_df_2016 = pivot(df_2016)
agg_df_2018 = pivot(df_2018)
agg_df_2020 = pivot(df_2020)

# Export into Excel

In [None]:
with pd.ExcelWriter("gs://calitp-analytics-data/data-analyses/tircp/TIRCP_PAP_2022.xlsx") as writer:
    agg_df_2015.to_excel(writer, sheet_name="2015 Cycle 1", index=True)
    agg_df_2016.to_excel(writer, sheet_name="2016 Cycle 2", index=True)
    agg_df_2018.to_excel(writer, sheet_name="2018 Cycle 3", index=True)
    agg_df_2020.to_excel(writer, sheet_name="2020 Cycle 4", index=True)