#  Loading in sheets

In [5]:
import numpy as np
import pandas as pd
import TIRCP_functions
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/tircp/"
pd.options.display.max_columns = 50
pd.options.display.float_format = "{:.0f}".format

In [6]:
###INPUT THIS IN
#Allocation PPNO Crosswalk
FILE_NAME3 = "Allocation_PPNO_Crosswalk.csv"
allocation_ppno_crosswalk = pd.read_csv(f"{GCS_FILE_PATH}{FILE_NAME3}")
    
#Allocation PPNO Crosswalk
FILE_NAME4 = "Projects_PPNO.xlsx"
project_ppno_crosswalk = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME4}")


In [7]:
#PASTE THIS IN
#Project Sheet
def project(): 
    FILE_NAME1 = "Raw_Project_Tracking_Sheet.xlsx"
    df = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME1}") 
    df.columns = df.columns.str.strip().str.replace(' ', '_')
    df.columns = df.columns.map(lambda x: x.strip())
    
    ### PPNO CLEAN UP ###
    # stripping PPNO down to <5 characters
    df = df.assign(PPNO_New = df['PPNO'].str.slice(start=0, stop=5))
    #Merge in Crosswalk 
    df = pd.merge(df, project_ppno_crosswalk, on = ["Award_Year", "Local_Agency"], how = "left")
    df.PPNO_New = df.apply(lambda x: x.PPNO_New if (str(x.PPNO_New2) == 'nan') else x.PPNO_New2, axis=1)
    df = df.drop(['PPNO','PPNO_New2'], axis=1).rename(columns = {'PPNO_New':'PPNO'})
    ### MONETARY COLS CLEAN UP ###
    proj_cols = ['TIRCP_Award_Amount_($)', 'Allocated_Amount','Expended_Amount','Unallocated_Amount','Total_Project_Cost','Other_Funds_Involved']
    df[proj_cols] = df[proj_cols].fillna(value=0)
    df[proj_cols] = df[proj_cols].apply(pd.to_numeric, errors='coerce')
    
    #rename to avoid confusion with allocation sheet
    df = (df.rename(columns = {'TIRCP_Award_Amount_($)':'TIRCP_project_sheet',
                               'Expended_Amount': 'Expended_Amt_project_sheet',
                               'Unallocated_Amount':'Unallocated_amt_project_sheet'})
         )
    return df

In [8]:
project_test =project()

In [9]:
project_test = project()
project_test.shape

(74, 32)

In [10]:
# PASTE THIS IN 
def allocation(): 
    FILE_NAME2 = "Allocation_Agreement.xlsx"
    df = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME2}")
    #stripping spaces & _ 
    df.columns = df.columns.str.strip().str.replace(' ', '_')
    #stripping spaces in columns
    df.columns = df.columns.map(lambda x: x.strip())
    
    ### PPNO CLEAN UP ### 
    # stripping PPNO down to <5 characters
    df = df.assign(PPNO_New = df['PPNO'].str.slice(start=0, stop=5))
    #Merge in Crosswalk 
    df = pd.merge(df, allocation_ppno_crosswalk, on = ["Award_Year", "Award_Recipient"], how = "left")
    #Map Crosswalk 
    df.PPNO_New = df.apply(lambda x: x.PPNO_New if (str(x.PPNO_New2) == 'nan') else x.PPNO_New2, axis=1)
    #Drop old PPNO 
    df = df.drop(['PPNO','PPNO_New2'], axis=1).rename(columns = {'PPNO_New': 'PPNO'}) 
    ### DATES CLEAN UP ###
    #rename thid party award date
    df = df.rename(columns = {'3rd_Party_Award_Date':'Third_Party_Award_Date'})
    #clean up dates in a loop
    alloc_dates = ["Allocation_Date", "Third_Party_Award_Date", "Completion_Date", "LED",
                  ]
    for i in [alloc_dates]:
        df[i] = (df[i].replace('/', '-', regex = True).replace('Complete', '', regex = True)
            .replace('\n', '', regex=True).replace('Pending','TBD',regex= True)
            .fillna('TBD')
        )
    # coerce to dates
    df = df.assign(
    Allocation_Date_New = pd.to_datetime(df.Allocation_Date, errors="coerce").dt.date,
    Third_Party_Award_Date_New = pd.to_datetime(df.Third_Party_Award_Date, errors="coerce").dt.date,
    Completion_Date_New = pd.to_datetime(df.Completion_Date, errors="coerce").dt.date,
    LED_New = pd.to_datetime(df.LED, errors="coerce").dt.date)
    #dropping old date columns
    df = df.drop(alloc_dates, axis=1)
    #rename coerced columns
    df = (df.rename(columns = {'Allocation_Date_New':'Allocation_Date',
                               'Third_Party_Award_Date_New':'Third_Party_Award_Date',
                               'Completion_Date_New': 'Completion_Date','LED_New': 'LED'})
         )
    ### CLEAN UP MONETARY COLS ###
    # correcting string to 0 
    df["Expended_Amount"].replace({'Deallocation': 0}, inplace=True)
    #replacing monetary amounts with 0 & coerce to numeric 
    allocation_monetary_cols = ['SB1_Funding','Expended_Amount','Allocation_Amount',
       'GGRF_Funding','Prior_Fiscal_Years_to_2020',
       'Fiscal_Year_2020-2021', 'Fiscal_Year_2021-2022',
       'Fiscal_Year_2022-2023', 'Fiscal_Year_2023-2024',
       'Fiscal_Year_2024-2025', 'Fiscal_Year_2025-2026',
       'Fiscal_Year_2026-2027', 'Fiscal_Year_2027-2028',
       'Fiscal_Year_2028-2029', 'Fiscal_Year_2029-2030']
    df[allocation_monetary_cols] = df[allocation_monetary_cols].fillna(value=0)
    df[allocation_monetary_cols] = df[allocation_monetary_cols].apply(pd.to_numeric, errors='coerce')
    #rename columns that are similar to project sheet to avoid confusion
    df = (df.rename(columns = {'Allocation_Amount':'Allocation_Amt_Allocation_Sheet',
                               'Expended_Amount': 'Expended_Amt_Allocation_Sheet'})
         )
    return df

In [11]:
allocation_test = allocation()

allocation_test.sample(5)

Unnamed: 0,Award_Year,Project_#,Award_Recipient,Implementing_Agency,Project_ID,EA,Components,Phase,Allocation_Amt_Allocation_Sheet,Expended_Amt_Allocation_Sheet,SB1_Funding,SB1_Budget_Year,GGRF_Funding,GGRF_Budget_Year,CTC_Financial_Resolution,CTC_Allocation_Amendment,CTC_Waiver,CTC_CalSTA_Waiver,PSA_#,CT_Document_#,Date_Branch_Chief_Receives_PSA,Date_Regional_Coordinator_Receives_PSA,Date_OC_Receives_PSA,Date_OPM_Receives_PSA,Date_Legal_Receives_PSA,Date_Returned_to_PM,Date_PSA_Sent_to_Local_Agency,Date_PSA_Approved_by_Local_Agency,Date_Signed_by_DRMT,PSA_Expiry_Date,LONP,Prior_Fiscal_Years_to_2020,Fiscal_Year_2020-2021,Fiscal_Year_2021-2022,Fiscal_Year_2022-2023,Fiscal_Year_2023-2024,Fiscal_Year_2024-2025,Fiscal_Year_2025-2026,Fiscal_Year_2026-2027,Fiscal_Year_2027-2028,Fiscal_Year_2028-2029,Fiscal_Year_2029-2030,Allocation_Comments,PSA_Comments,PPNO,Allocation_Date,Third_Party_Award_Date,Completion_Date,LED
223,2018,27,Southern California Regional Rail Authority,Los Angeles County Metropolitan Transportation...,20000207.0,R386GL,LINK US\n,R/W,68532000,0,38145000,,30387000,,TIRCP 1920-18 (supplemental),,,,07LACMTAPS-04,,NaT,NaT,,Resubmitted to OPM 8/3/2021,2021-08-06,8/6/2021\n2/18/2020,,NaT,2021-08-06 00:00:00,NaT,,68532000,0,0,0,0,0,0,0,0,0,0,AJ243:AV247AC243:AV247U243:AV247AW247AP245:AV2...,As of 12/2 awaiting PPR and info from local ag...,CP033,2020-06-25,NaT,2023-06-30,NaT
236,2018,27,Southern California Regional Rail Authority,Southern California Regional Rail Authority,21000202.0,,Burbank Junction Speed Improvements,CONST,16480000,0,9173000,,7307000,,TIRCP -2021-24,,,,07SCRRAPS-02 A3,,NaT,NaT,,2021-05-11 00:00:00,2021-05-13,2021-05-13 00:00:00,,NaT,2021-06-24 00:00:00,NaT,,0,16480000,0,0,0,0,0,0,0,0,0,,,CP033,2021-03-25,NaT,NaT,NaT
291,2020,11,San Diego Association of Governments (SANDAG),San Diego Metropolitan Transit System (MTS),21000084.0,R443GB,El Cajon Third Track,PS&E,720000,0,360000,2019-20,360000,,TIRCP-2021-04,,,,11SDMTSPS-03\n3/12/2021,11SDMTSPS-03,NaT,NaT,2021-01-27 00:00:00,2021-02-04 00:00:00,2021-02-04,,,2021-03-01,2021-03-12 00:00:00,NaT,,0,720000,0,0,0,0,0,0,0,0,0,,,CP069,2020-10-22,NaT,2023-06-30,2023-06-30
238,2018,27,Southern California Regional Rail Authority,Southern California Regional Rail Authority,21000203.0,R386GU,Chatsworth Station Improvements,CONST,1500000,0,835000,,665000,,TIRCP -2021-24,,,,07SCRRAPS-02 A3,,NaT,NaT,,2021-05-11 00:00:00,2021-05-13,2021-05-13 00:00:00,,NaT,2021-06-24 00:00:00,NaT,,0,1500000,0,0,0,0,0,0,0,0,0,,,CP033,2021-03-25,NaT,NaT,NaT
59,2016,15,San Joaquin Regional Rail Commission,San Joaquin Regional Rail Commission,,,Procurement of Two Tier IV locomotives,CONST,14698284,0,0,,0,,Senate Bill No. 132,,,,10SJRRCPS-03\nPending,,NaT,NaT,,,NaT,,,NaT,,NaT,,0,0,0,0,0,0,0,0,0,0,0,,,CP026,NaT,2020-02-07,2027-12-01,NaT


In [12]:
allocation_test.shape

(307, 49)

In [13]:
allocation_test.isna().sum()

Award_Year                                  0
Project_#                                   1
Award_Recipient                             1
Implementing_Agency                         1
Project_ID                                107
EA                                        122
Components                                  1
Phase                                       2
Allocation_Amt_Allocation_Sheet             0
Expended_Amt_Allocation_Sheet               0
SB1_Funding                                 0
SB1_Budget_Year                           140
GGRF_Funding                                0
GGRF_Budget_Year                          307
CTC_Financial_Resolution                  100
CTC_Allocation_Amendment                  307
CTC_Waiver                                307
CTC_CalSTA_Waiver                         307
PSA_#                                     113
CT_Document_#                             177
Date_Branch_Chief_Receives_PSA            304
Date_Regional_Coordinator_Receives

In [14]:
allocation_test.dtypes

Award_Year                                         int64
Project_#                                        float64
Award_Recipient                                   object
Implementing_Agency                               object
Project_ID                                        object
EA                                                object
Components                                        object
Phase                                             object
Allocation_Amt_Allocation_Sheet                  float64
Expended_Amt_Allocation_Sheet                    float64
SB1_Funding                                      float64
SB1_Budget_Year                                   object
GGRF_Funding                                     float64
GGRF_Budget_Year                                 float64
CTC_Financial_Resolution                          object
CTC_Allocation_Amendment                         float64
CTC_Waiver                                       float64
CTC_CalSTA_Waiver              

# Semi Annual Report

In [15]:
def summary_SAR_table_two(df):
    #pivot
    df = df.drop_duplicates().groupby(['Award_Year']).agg({'Project_#':'count','TIRCP_project_sheet':'sum', 
    'Allocated_Amount':'sum','Expended_Amt_project_sheet':'sum'}).reset_index()
    #renaming columns to match report
    df = (df.rename(columns = {'Project_#':'Number_of_Awarded_Projects',
                               'TIRCP_project_sheet': 'Award_Amount',
                               'Allocated_Amount':'Amount_Allocated',
                               'Expended_Amt_project_sheet': 'Expended_Amount'})
         )
    #create percentages
    df['Expended_Percent_of_Awarded'] = (df['Expended_Amount']/df['Award_Amount'])
    df['Expended_Percent_of_Allocated'] = (df['Expended_Amount']/df['Amount_Allocated'])
    df['Percent_Allocated'] = (df['Amount_Allocated']/df['Award_Amount'])
    #transpose 
    df = df.set_index('Award_Year').T
    #grand totals for monetary columns
    list_to_add = ['Award_Amount','Amount_Allocated','Expended_Amount', 'Number_of_Awarded_Projects']
    df['Grand_Total']=df.loc[list_to_add, :].sum(axis=1)
    #grand total variables of each monetary column to fill in percentages below.
    Exp = df.at['Expended_Amount','Grand_Total']
    Alloc = df.at['Amount_Allocated','Grand_Total']
    TIRCP = df.at['Award_Amount','Grand_Total']
    #filling in totals of percentages
    df.at['Expended_Percent_of_Awarded','Grand_Total'] = (Exp/TIRCP)
    df.at['Expended_Percent_of_Allocated','Grand_Total'] = (Exp/Alloc)
    df.at['Percent_Allocated','Grand_Total'] = (Alloc/TIRCP)
    #switching rows to correct order
    df = (df.reindex(['Number_of_Awarded_Projects', 'Award_Amount', 'Amount_Allocated',
                     'Percent_Allocated','Expended_Amount', 'Expended_Percent_of_Awarded', 'Expended_Percent_of_Allocated'])
    )
    return df 

In [16]:
summary_SAR_table_two(project_test)

Award_Year,2015,2016,2018,2020,Grand_Total
Number_of_Awarded_Projects,14,15,28,17,74
Award_Amount,224328000,788444000,4251722000,500000000,5764494000
Amount_Allocated,224278000,422477816,1627802000,74030000,2348587816
Percent_Allocated,100,54,38,15,41
Expended_Amount,143556085,119517738,210456650,1823463,475353935
Expended_Percent_of_Awarded,64,15,5,0,8
Expended_Percent_of_Allocated,64,28,13,2,20


In [36]:
project_test.shape
project_test.isna().sum()

Award_Year                                         0
Project_#                                          0
Local_Agency                                       0
Vendor_ID_#                                        0
Project_Title                                      0
District                                           8
County                                             4
Key_Project_Elements                               0
Master_Agreement_Number                            7
Master_Agreement_Expiration_Date                   7
Project_Manager                                    0
Regional_Coordinator                               3
Technical_Assistance-CALTP_(Y/N)                  19
Technical_Assistance-Fleet_(Y/N)                  24
Technical_Assistance-Network_Integration_(Y/N)    22
Technical_Assistance-Priority_Population_(Y/N)    22
Total_Project_Cost                                 0
TIRCP_project_sheet                                0
Allocated_Amount                              

In [18]:
allocation_test.shape

(307, 49)

In [33]:
#INPUT IN
def semi_annual_report():
    ### Load in sheets ### 
    df_project = TIRCP_functions.project()
    df_allocation = TIRCP_functions.allocation()
    #Only keeping certain columns
    df_project = df_project[['Project_Manager','Award_Year', 'Project_#','Project_Title','PPNO',
                             'TIRCP_project_sheet','Expended_Amt_project_sheet','Allocated_Amount']]
    df_allocation = df_allocation[['Expended_Amt_Allocation_Sheet','Allocation_Amt_Allocation_Sheet','Award_Year','Award_Recipient', 'Implementing_Agency','PPNO',
                                'Phase', 'LED','Allocation_Date','Completion_Date','Third_Party_Award_Date','Components']]
    
    ###Summary ###
    summary_table_2 = summary_SAR_table_two(df_project) 
    ### Join ###
    df_sar = df_allocation.merge(df_project, how = "left", on = ["PPNO", "Award_Year"])
    #drop duplicates
    df_sar = df_sar.drop_duplicates() 
    
    ### Add % ###
    df_sar = df_sar.assign(
    Percent_of_Allocation_Expended = (df_sar['Expended_Amt_Allocation_Sheet']/df_sar['Allocation_Amt_Allocation_Sheet']),
    Percent_of_Award_Fully_Allocated = (df_sar['Allocated_Amount']/df_sar['TIRCP_project_sheet'])
    )
    
    ### Clean up % cols ### 
    cols = ['Expended_Amt_Allocation_Sheet','Allocation_Amt_Allocation_Sheet','TIRCP_project_sheet','Expended_Amt_project_sheet','Percent_of_Allocation_Expended', 'Percent_of_Award_Fully_Allocated']
    df_sar[cols] = df_sar[cols].apply(pd.to_numeric, errors='coerce').fillna(0)
    #rename cols 
    df_sar = df_sar.rename(columns = {'LED': 'Phase_Completion_Date', 'TIRCP_project_sheet': 'TIRCP_Award_Amount','Third_Party_Award_Date':'CON_Contract_Award_Date'})
    
    ### Clean Up Dates ### 
    #fill in missing dates with a fake one
    missing_date = pd.to_datetime('2100-01-01')
    dates = ["Allocation_Date", "CON_Contract_Award_Date", "Completion_Date", "Phase_Completion_Date"]
    for i in dates:
        df_sar[i] = df_sar[i].fillna(missing_date)
    #force to date time
    df_sar[dates] = df_sar[dates].apply(pd.to_datetime)
    
    #if the allocation date is AFTER  7-31-2020 then 0, if BEFORE 7-31-2020 then 1
    df_sar = df_sar.assign(Allocated_Before_July_31_2020 = df_sar.apply(lambda x: ' ' if x.Allocation_Date > pd.Timestamp(2020, 7, 31, 0) else 'X', axis=1))
    
    ### Pivot ### 
    df_pivot = df_sar.groupby(['Award_Year','Project_#','Award_Recipient','Project_Title','Project_Manager','TIRCP_Award_Amount','Percent_of_Award_Fully_Allocated','Components','PPNO','Phase',"Allocation_Date", 
     "CON_Contract_Award_Date", "Completion_Date", "Phase_Completion_Date", ]).agg({'Allocation_Amt_Allocation_Sheet': 'sum', 
    'Expended_Amt_Allocation_Sheet':'sum',
    'Percent_of_Allocation_Expended':'max',                                                                                                               
    'Allocated_Before_July_31_2020':'max',
    })
    
    ### GCS ###
    with pd.ExcelWriter("gs://calitp-analytics-data/data-analyses/tircp/FUNCTION_TEST_TIRCP_SAR.xlsx") as writer:
        summary_table_2.to_excel(writer, sheet_name="Summary", index=True)
        df_pivot.to_excel(writer, sheet_name="FY", index=True)
    return df_pivot

In [34]:
test_sar = semi_annual_report()

  return self._engine.is_unique
  indexer = self._engine.get_indexer(target._get_engine_target())


In [31]:
test_sar.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Allocation_Amt_Allocation_Sheet,Expended_Amt_Allocation_Sheet,Percent_of_Allocation_Expended,Allocated_Before_July_31_2020
Award_Year,Project_#,Award_Recipient,Project_Title,Project_Manager,TIRCP_Award_Amount,Percent_of_Award_Fully_Allocated,Implementing_Agency,Components,PPNO,Phase,Allocation_Date,CON_Contract_Award_Date,Completion_Date,Phase_Completion_Date,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020,8,Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency (LOSSAN),Building Up Control: LOSSAN Service Enhancement Program,Phil,38743000,0,Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency (LOSSAN),San Diego County Maintenance and Layover Facility,CP066,PS&E,2100-01-01,2100-01-01,2025-12-31,2100-01-01,0,0,0,
2020,9,Sacramento Regional Transit District,Light Rail Modernization and Expansion of Low-Floor Fleet,Cinthia,23600000,1,Sacramento Regional Transit District,Acquisition of light rail vehicles,CP067,CONST,2021-08-19,2100-01-01,2100-01-01,2100-01-01,23600000,0,0,
2020,10,San Bernardino County Transportation Authority (SBCTA) and Omnitrans,West Valley Connector Bus Rapid Transit Phase 1 & ZEB Initiative,Daniela,15000000,0,San Bernardino County Transportation Authority (SBCTA) and Omnitrans,Acquisition of zero-emission buses,CP068,CONST,2100-01-01,2022-12-31,2023-12-01,2100-01-01,0,0,0,
2020,11,San Diego Association of Governments (SANDAG),SDConnect: San Diego Rail Improvement Program,Daniela,12100000,0,San Diego Association of Governments (SANDAG),Del Mar Bluffs Stabilization,CP069,CONST,2100-01-01,2022-12-31,2100-01-01,2100-01-01,0,0,0,
2020,11,San Diego Association of Governments (SANDAG),SDConnect: San Diego Rail Improvement Program,Daniela,12100000,0,San Diego Association of Governments (SANDAG),Del Mar Bluffs Stabilization,CP069,PS&E,2020-08-13,2100-01-01,2023-06-30,2023-06-30,3774000,0,0,
2020,11,San Diego Association of Governments (SANDAG),SDConnect: San Diego Rail Improvement Program,Daniela,12100000,0,San Diego Metropolitan Transit System (MTS),El Cajon Third Track,CP069,CONST,2100-01-01,2022-12-31,2100-01-01,2100-01-01,0,0,0,
2020,11,San Diego Association of Governments (SANDAG),SDConnect: San Diego Rail Improvement Program,Daniela,12100000,0,San Diego Metropolitan Transit System (MTS),El Cajon Third Track,CP069,PS&E,2020-10-22,2100-01-01,2023-06-30,2023-06-30,720000,0,0,
2020,12,San Francisco Municipal Transportation Agency,Core Capacity Program,Leela,41668000,0,San Francisco Municipal Transportation Agency,MUNI Forward J Line,CP070,CONST,2100-01-01,2025-12-31,2027-12-01,2100-01-01,0,0,0,
2020,12,San Francisco Municipal Transportation Agency,Core Capacity Program,Leela,41668000,0,San Francisco Municipal Transportation Agency,MUNI Forward M Line,CP070,CONST,2100-01-01,2025-12-31,2027-12-01,2100-01-01,0,0,0,
2020,15,Torrance Transit Department,Torrance Transit Bus Service Enhancement Program,Marissa Brown,6000000,1,Torrance Transit Department,Procurement of 7 zero-emission buses,CP073,CONST,2020-10-22,2021-03-01,2100-01-01,2100-01-01,6000000,0,0,


In [22]:
test_sar.isna().sum()

Allocation_Amt_Allocation_Sheet    0
Expended_Amt_Allocation_Sheet      0
Percent_of_Allocation_Expended     0
Allocated_Before_July_31_2020      0
dtype: int64

# Sheet for Tableau

In [23]:
# Categorize years and expended_percent_group into bins
def progress(df):   
    if (df['Award_Year'] == 2015) and (df['Expended_Percent_Group'] == "1-50"):
        return 'Behind'
    elif (df['Award_Year'] == 2015) and (df['Expended_Percent_Group'] == "51-70"):
        return 'On Track'
    elif (df['Award_Year'] == 2015) and (df['Expended_Percent_Group'] == "71-100"):
        return 'On Track'
    elif (df['Award_Year'] == 2016) and (df['Expended_Percent_Group'] == "1-50"):
        return 'Behind'
    elif (df['Award_Year'] == 2016) and (df['Expended_Percent_Group'] == "71-100"):
        return 'On Track'
    elif (df['Award_Year'] == 2016) and (df['Expended_Percent_Group'] == "51-70"):
        return 'On Track'
    elif (df['Award_Year'] == 2018) and (df['Expended_Percent_Group'] == "1-50"):
        return 'On Track'
    elif (df['Award_Year'] == 2018) and (df['Expended_Percent_Group'] == "51-70"):
        return 'Ahead'
    elif (df['Award_Year'] == 2018) and (df['Expended_Percent_Group'] == "71-100"):
        return 'Ahead'
    elif (df['Award_Year'] == 2020) and (df['Expended_Percent_Group'] == "1-50"):
        return 'On Track'
    elif (df['Award_Year'] == 2020) and (df['Expended_Percent_Group'] == "51-70"):
        return 'Ahead'
    elif (df['Award_Year'] == 2020) and (df['Expended_Percent_Group'] == "71-100"):
        return 'Ahead'
    else: 
        return "No Expenditures"

In [24]:
#Categorize Expended Percent into bins
def expended_percent(row):
            if row.Expended_Percent == 0:
                return "No expenditure recorded"
            elif ((row.Expended_Percent > 0) and (row.Expended_Percent < .50)):
                return "1-50"
            elif row.Expended_Percent < 0.71:
                return "51-70"
            else:
                return "71-100"

In [25]:
project_test.columns

Index(['Award_Year', 'Project_#', 'Local_Agency', 'Vendor_ID_#',
       'Project_Title', 'District', 'County', 'Key_Project_Elements',
       'Master_Agreement_Number', 'Master_Agreement_Expiration_Date',
       'Project_Manager', 'Regional_Coordinator',
       'Technical_Assistance-CALTP_(Y/N)', 'Technical_Assistance-Fleet_(Y/N)',
       'Technical_Assistance-Network_Integration_(Y/N)',
       'Technical_Assistance-Priority_Population_(Y/N)', 'Total_Project_Cost',
       'TIRCP_project_sheet', 'Allocated_Amount',
       'Unallocated_amt_project_sheet', 'Percentge_Allocated',
       'Expended_Amt_project_sheet', 'Other_Funds_Involved', 'Award_Cycle',
       'Local_Agency_Address', 'Local_Agency_City', 'Local_Agency_Zip',
       'Local_Agency_Contact', 'Local_Agency_Email',
       'Local_Agency_Phone_Number', 'Comments/Additional_Contacts', 'PPNO'],
      dtype='object')

In [26]:
#INPUT INTO MY SCRIPT
#Script for the projects sheet that I inputted into Tableau
def tableau():
    #Keeping only the columns we want
    df = TIRCP_functions.project()
    df = df[['Award_Year', 'Project_#','Local_Agency','Project_Title','PPNO',
    'Key_Project_Elements','TIRCP_project_sheet','Allocated_Amount',
     'Expended_Amt_project_sheet']]
    
    #Getting percentages & filling in with 0
    df['Expended_Percent'] = df['Expended_Amt_project_sheet']/df['Allocated_Amount']
    df['Allocated_Percent'] = df['Allocated_Amount']/df['TIRCP_project_sheet']
    df[['Expended_Percent','Allocated_Percent']] = df[['Expended_Percent','Allocated_Percent']].fillna(value=0)
    
    #Categorizing expended percentage into bins
    df["Expended_Percent_Group"] = df.apply(lambda x: expended_percent(x), axis=1)
    
    # Categorize years and expended_percent_group into bins
    df['Progress'] = df.apply(progress, axis = 1)
    return df 

In [27]:

tableau_test = tableau()


In [28]:
type(tableau_test)

pandas.core.frame.DataFrame

In [29]:
tableau_test

Unnamed: 0,Award_Year,Project_#,Local_Agency,Project_Title,PPNO,Key_Project_Elements,TIRCP_project_sheet,Allocated_Amount,Expended_Amt_project_sheet,Expended_Percent,Allocated_Percent,Expended_Percent_Group,Progress
0,2015,1,Antelope Valley Transit Authority (AVTA),Regional Transit Interconnectivity & Environme...,CP005,Purchase 13 60-foot articulated BRT buses and ...,24403000,24403000,21714178,1,1,71-100,On Track
1,2015,2,Capitol Corridor Joint Powers Authority,Travel Time Reduction Project,CP012,Track and curve improvements between San Jose ...,4620000,4620000,4620000,1,1,71-100,On Track
2,2015,3,Los Angeles County Metropolitan Transportation...,Willowbrook/Rosa Parks Station & Blue Line Lig...,CP015,Replace Blue Line signal system; install new t...,38494000,38494000,38494000,1,1,71-100,On Track
3,2015,4,Los Angeles-San Diego-San Luis Obispo Rail Cor...,Pacific Surfliner Transit Transfer Program,CP007,LOSSAN and 12 transit agencies from San Luis O...,1675000,1675000,277840,0,1,1-50,Behind
4,2015,5,Montery-Salinas Transit,Monterey Bay Operations and Maintenance Facili...,CP013,Renovation and expansion of the Monterey maint...,10000000,10000000,0,0,1,No expenditure recorded,No Expenditures
...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,2020,13,Santa Monica Big Blue Bus,"For People, Place and Planet: Connecting Ingle...",,Purchase 7 zero emission buses to enhance and ...,1105000,0,0,0,0,No expenditure recorded,No Expenditures
70,2020,14,Solano Transportation Authority,Solano Regional Transit Improvements Phase 2,,"Improve the frequency, reliability, and access...",10400000,2900000,0,0,0,No expenditure recorded,No Expenditures
71,2020,15,Torrance Transit Department,Torrance Transit Bus Service Enhancement Program,CP073,Purchase 7 electric buses to expand services o...,6000000,6000000,0,0,1,No expenditure recorded,No Expenditures
72,2020,16,Transit Joint Powers Authority of Merced County,Improving Air Quality & Economic Growth with E...,10-CP074,Purchases 3 zero-emission electric buses to in...,3112000,3112000,0,0,1,No expenditure recorded,No Expenditures


In [30]:
project_test.isna().sum()

Award_Year                                         0
Project_#                                          0
Local_Agency                                       0
Vendor_ID_#                                        0
Project_Title                                      0
District                                           8
County                                             4
Key_Project_Elements                               0
Master_Agreement_Number                            7
Master_Agreement_Expiration_Date                   7
Project_Manager                                    0
Regional_Coordinator                               3
Technical_Assistance-CALTP_(Y/N)                  19
Technical_Assistance-Fleet_(Y/N)                  24
Technical_Assistance-Network_Integration_(Y/N)    22
Technical_Assistance-Priority_Population_(Y/N)    22
Total_Project_Cost                                 0
TIRCP_project_sheet                                0
Allocated_Amount                              