#  Testing functions

In [9]:
import numpy as np
import pandas as pd
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/tircp/"
pd.options.display.max_columns = 50

In [10]:
#Project Sheet
def project(): 
    FILE_NAME1 = "Raw_Project_Tracking_Sheet.xlsx"
    df = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME1}")
    #stripping spaces & _ 
    df.columns = df.columns.str.strip().str.replace(' ', '_')
    #stripping spaces in columns
    df.columns = df.columns.map(lambda x: x.strip())
    # stripping PPNO down to <5 characters
    df = df.assign(PPNO_New = df['PPNO'].str.slice(start=0, stop=5))
    #replacing monetary amounts with 0 
    proj_cols = ['TIRCP_Award_Amount_($)', 'Allocated_Amount','Expended_Amount','Unallocated_Amount','Total_Project_Cost','Other_Funds_Involved']
    df[proj_cols] = df[proj_cols].fillna(value=0)
    df[proj_cols] = df[proj_cols].apply(pd.to_numeric, errors='coerce')
    #rename to avoid confusion with allocation sheet
    df = (df.rename(columns = {'TIRCP_Award_Amount_($)':'TIRCP_project_sheet',
                               'Expended_Amount': 'Expended_Amt_project_sheet',
                               'Unallocated_Amount':'Unallocated_amt_project_sheet'})
         )
    return df

In [11]:
project_test = project()
project_test.head(1)

Unnamed: 0,Award_Year,Project_#,Local_Agency,Vendor_ID_#,Project_Title,PPNO,District,County,Key_Project_Elements,Master_Agreement_Number,Master_Agreement_Expiration_Date,Project_Manager,Regional_Coordinator,Technical_Assistance-CALTP_(Y/N),Technical_Assistance-Fleet_(Y/N),Technical_Assistance-Network_Integration_(Y/N),Technical_Assistance-Priority_Population_(Y/N),Total_Project_Cost,TIRCP_project_sheet,Allocated_Amount,Unallocated_amt_project_sheet,Percentge_Allocated,Expended_Amt_project_sheet,Other_Funds_Involved,Award_Cycle,Local_Agency_Address,Local_Agency_City,Local_Agency_Zip,Local_Agency_Contact,Local_Agency_Email,Local_Agency_Phone_Number,Comments/Additional_Contacts,PPNO_New
0,2015,1,Antelope Valley Transit Authority (AVTA),TBD,Regional Transit Interconnectivity & Environme...,CP005,7,LA,Purchase 13 60-foot articulated BRT buses and ...,64AVTA2015MA,2024-04-01,Yesenia,Ryan Greenway,,,,,39478000.0,24403000.0,24403000,0,1,21714177.53,0.0,1,"42210 6th Street West\nLancaster, CA 93534",Lancaster,93534,Judy Fry,Jfry@avta.com,(611) 729-2234,,CP005


In [12]:
project_test.columns

Index(['Award_Year', 'Project_#', 'Local_Agency', 'Vendor_ID_#',
       'Project_Title', 'PPNO', 'District', 'County', 'Key_Project_Elements',
       'Master_Agreement_Number', 'Master_Agreement_Expiration_Date',
       'Project_Manager', 'Regional_Coordinator',
       'Technical_Assistance-CALTP_(Y/N)', 'Technical_Assistance-Fleet_(Y/N)',
       'Technical_Assistance-Network_Integration_(Y/N)',
       'Technical_Assistance-Priority_Population_(Y/N)', 'Total_Project_Cost',
       'TIRCP_project_sheet', 'Allocated_Amount',
       'Unallocated_amt_project_sheet', 'Percentge_Allocated',
       'Expended_Amt_project_sheet', 'Other_Funds_Involved', 'Award_Cycle',
       'Local_Agency_Address', 'Local_Agency_City', 'Local_Agency_Zip',
       'Local_Agency_Contact', 'Local_Agency_Email',
       'Local_Agency_Phone_Number', 'Comments/Additional_Contacts',
       'PPNO_New'],
      dtype='object')

In [13]:
def allocation(): 
    FILE_NAME2 = "Allocation_Agreement.xlsx"
    df = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME2}")
    #stripping spaces & _ 
    df.columns = df.columns.str.strip().str.replace(' ', '_')
    #stripping spaces in columns
    df.columns = df.columns.map(lambda x: x.strip())
    # stripping PPNO down to <5 characters
    df = df.assign(PPNO_New = df['PPNO'].str.slice(start=0, stop=5))
    # correcting string to 0 
    df["Expended_Amount"].replace({'Deallocation': 0}, inplace=True)
    #rename thid party award date
    df = df.rename(columns = {'3rd_Party_Award_Date':'Third_Party_Award_Date'})
    #clean up dates in a loop
    alloc_dates = ["Allocation_Date", "Third_Party_Award_Date", "Completion_Date", "LED"]
    for i in [alloc_dates]:
        df[i] = (df[i].replace('/', '-', regex = True).replace('Complete', '', regex = True)
            .replace('\n', '', regex=True).replace('Pending','TBD',regex= True)
            .fillna('TBD')
        )
    
    # coerce to dates
    df = df.assign(
    Allocation_Date_New = pd.to_datetime(df.Allocation_Date, errors="coerce").dt.date,
    Third_Party_Award_Date_New = pd.to_datetime(df.Third_Party_Award_Date, errors="coerce").dt.date,
    Completion_Date_New = pd.to_datetime(df.Completion_Date, errors="coerce").dt.date,
    LED_New = pd.to_datetime(df.LED, errors="coerce").dt.date)
    #dropping old date columns
    df = df.drop(alloc_dates, axis=1)
    #rename coerced columns
    df = (df.rename(columns = {'Allocation_Date_New':'Allocation_Date',
                               'Third_Party_Award_Date_New':'Third_Party_Award_Date',
                               'Completion_Date_New': 'Completion_Date','LED_New': 'LED'})
         )
    #replacing monetary amounts with 0 & coerce to numeric 
    allocation_monetary = ['SB1_Funding','Expended_Amount','Allocation_Amount',
       'GGRF_Funding','Prior_Fiscal_Years_to_2020',
       'Fiscal_Year_2020-2021', 'Fiscal_Year_2021-2022',
       'Fiscal_Year_2022-2023', 'Fiscal_Year_2023-2024',
       'Fiscal_Year_2024-2025', 'Fiscal_Year_2025-2026',
       'Fiscal_Year_2026-2027', 'Fiscal_Year_2027-2028',
       'Fiscal_Year_2028-2029', 'Fiscal_Year_2029-2030']
    df[allocation_monetary] = df[allocation_monetary].fillna(value=0)
    df[allocation_monetary] = df[allocation_monetary].apply(pd.to_numeric, errors='coerce')
    #rename columns that are similar to project sheet to avoid confusion
    df = (df.rename(columns = {'Allocation_Amount':'Allocation_Amt_Allocation_Sheet',
                               'Expended_Amount': 'Expended_Amt_Allocation_Sheet'})
         )
    return df

In [14]:
allocation_test = allocation()

allocation_test.head(1)

Unnamed: 0,Award_Year,Project_#,Award_Recipient,Implementing_Agency,PPNO,Project_ID,EA,Components,Phase,Allocation_Amt_Allocation_Sheet,Expended_Amt_Allocation_Sheet,SB1_Funding,SB1_Budget_Year,GGRF_Funding,GGRF_Budget_Year,CTC_Financial_Resolution,CTC_Allocation_Amendment,CTC_Waiver,CTC_CalSTA_Waiver,PSA_#,CT_Document_#,Date_Branch_Chief_Receives_PSA,Date_Regional_Coordinator_Receives_PSA,Date_OC_Receives_PSA,Date_OPM_Receives_PSA,Date_Legal_Receives_PSA,Date_Returned_to_PM,Date_PSA_Sent_to_Local_Agency,Date_PSA_Approved_by_Local_Agency,Date_Signed_by_DRMT,PSA_Expiry_Date,LONP,Prior_Fiscal_Years_to_2020,Fiscal_Year_2020-2021,Fiscal_Year_2021-2022,Fiscal_Year_2022-2023,Fiscal_Year_2023-2024,Fiscal_Year_2024-2025,Fiscal_Year_2025-2026,Fiscal_Year_2026-2027,Fiscal_Year_2027-2028,Fiscal_Year_2028-2029,Fiscal_Year_2029-2030,Allocation_Comments,PSA_Comments,PPNO_New,Allocation_Date,Third_Party_Award_Date,Completion_Date,LED
0,2015,1.0,Antelope Valley Transit Authority,Antelope Valley Transit Authority,CP005,16000048,T343GA,Purchase 13 60-foot articulated BRT buses and ...,CONST,24403000.0,21714177.53,0.0,2015-16,24403000.0,,TIRCP-1516-02\n Tech. Correction May 2016 & Ju...,,,,"07AVTA2015PS \nJUL 26, 2016;\n\n07AVTA2015PS A...",07AVTA2015PS\n*Listed under Unit 3040\n,NaT,NaT,,,NaT,,,NaT,2021-02-02 00:00:00,NaT,,24403000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Program Supplement be Amended to show a correc...,,CP005,2015-10-22,2016-03-14,2022-03-30,2022-03-31


In [15]:
allocation_test.columns

Index(['Award_Year', 'Project_#', 'Award_Recipient', 'Implementing_Agency',
       'PPNO', 'Project_ID', 'EA', 'Components', 'Phase',
       'Allocation_Amt_Allocation_Sheet', 'Expended_Amt_Allocation_Sheet',
       'SB1_Funding', 'SB1_Budget_Year', 'GGRF_Funding', 'GGRF_Budget_Year',
       'CTC_Financial_Resolution', 'CTC_Allocation_Amendment', 'CTC_Waiver',
       'CTC_CalSTA_Waiver', 'PSA_#', 'CT_Document_#',
       'Date_Branch_Chief_Receives_PSA',
       'Date_Regional_Coordinator_Receives_PSA', 'Date_OC_Receives_PSA',
       'Date_OPM_Receives_PSA', 'Date_Legal_Receives_PSA',
       'Date_Returned_to_PM', 'Date_PSA_Sent_to_Local_Agency',
       'Date_PSA_Approved_by_Local_Agency', 'Date_Signed_by_DRMT',
       'PSA_Expiry_Date', 'LONP', 'Prior_Fiscal_Years_to_2020',
       'Fiscal_Year_2020-2021', 'Fiscal_Year_2021-2022',
       'Fiscal_Year_2022-2023', 'Fiscal_Year_2023-2024',
       'Fiscal_Year_2024-2025', 'Fiscal_Year_2025-2026',
       'Fiscal_Year_2026-2027', 'Fiscal_Year_

In [18]:
def summary_SAR_table_two(df):
    #pivot
    df = df.drop_duplicates().groupby(['Award_Year']).agg({'Project_#':'count','TIRCP_project_sheet':'sum', 
    'Allocated_Amount':'sum','Expended_Amt_project_sheet':'sum'}).reset_index()
    #renaming columns to match report
    df = (df.rename(columns = {'Project_#':'Number_of_Awarded_Projects',
                               'TIRCP_project_sheet': 'Award_Amount',
                               'Allocated_Amount':'Amount_Allocated',
                               'Expended_Amt_project_sheet': 'Expended_Amount'})
         )
    #create percentages
    df['Expended_Percent_of_Awarded'] = (df['Expended_Amount']/df['Award_Amount'])*100
    df['Expended_Percent_of_Allocated'] = (df['Expended_Amount']/df['Amount_Allocated'])*100
    df['Percent_Allocated'] = (df['Amount_Allocated']/df['Award_Amount'])*100
    #transpose 
    df = df.set_index('Award_Year').T
    #grand totals for monetary columns
    list_to_add = ['Award_Amount','Amount_Allocated','Expended_Amount', 'Number_of_Awarded_Projects']
    df['Grand_Total']=df.loc[list_to_add, :].sum(axis=1)
    #grand total variables of each monetary column to fill in percentages below.
    Exp = df.at['Expended_Amount','Grand_Total']
    Alloc = df.at['Amount_Allocated','Grand_Total']
    TIRCP = df.at['Award_Amount','Grand_Total']
    #filling in totals of percentages
    df.at['Expended_Percent_of_Awarded','Grand_Total'] = (Exp/TIRCP)*100
    df.at['Expended_Percent_of_Allocated','Grand_Total'] = (Exp/Alloc)*100
    df.at['Percent_Allocated','Grand_Total'] = (Alloc/TIRCP)*100
    #switching rows to correct order
    df = (df.reindex(['Number_of_Awarded_Projects', 'Award_Amount', 'Amount_Allocated',
                     'Percent_Allocated','Expended_Amount', 'Expended_Percent_of_Awarded', 'Expended_Percent_of_Allocated'])
    )
    return df 

In [19]:
summary_SAR_table_two(project_test)

Award_Year,2015,2016,2018,2020,Grand_Total
Number_of_Awarded_Projects,14.0,15.0,28.0,17.0,74.0
Award_Amount,224328000.0,788444000.0,4251722000.0,500000000.0,5764494000.0
Amount_Allocated,224278000.0,422477800.0,1627802000.0,74030000.0,2348588000.0
Percent_Allocated,99.97771,53.58374,38.28571,14.806,40.74231
Expended_Amount,143556100.0,119517700.0,210456600.0,1823463.0,475353900.0
Expended_Percent_of_Awarded,63.99383,15.15868,4.949916,0.3646925,8.246239
Expended_Percent_of_Allocated,64.0081,28.28971,12.92889,2.46314,20.23999
