# Cleaning up Grants Data
----

There are three sheets in the TIRCP workbook. However, it seems like the summary reports mostly rely on two: one called project tracking and one called allocation tracking, so I am only loading those in.


In [78]:
import pandas as pd
import math
!pip install openpyxl



In [79]:
#read in files 
project = pd.read_excel(open('Raw Project Tracking Sheet.xlsx','rb'), sheet_name='Project Tracking DRAFT')  
allocation = pd.read_excel(open('Raw Project Tracking Sheet.xlsx','rb'), sheet_name='Agreement Allocations DRAFT')  


In [80]:
#cleaning up spaces in columns
project.columns = project.columns.str.strip().str.replace(' ', '_')
allocation.columns = allocation.columns.str.strip().str.replace(' ', '_')

In [81]:
(project.columns)

Index(['Award_Year', 'Project_#', 'Local_Agency', 'Vendor_ID_#',
       'Project_Title', 'PPNO', 'District', 'County', 'Key_Project_Elements',
       'Master_Agreement_Number', 'Master_Agreement_Expiration_Date',
       'Project_Manager', 'Regional_Coordinator',
       'Technical_Assistance-CALTP_(Y/N)', 'Technical_Assistance-Fleet_(Y/N)',
       'Technical_Assistance-Network_Integration_(Y/N)',
       'Technical_Assistance-Priority_Population_(Y/N)', 'Total_Project_Cost',
       'TIRCP_Award_Amount_($)', 'Allocated_Amount', 'Unallocated_Amount',
       'Percentge_Allocated', 'Expended_Amount', 'Other_Funds_Involved',
       'Award_Cycle', 'Local_Agency_Address', 'Local_Agency_City',
       'Local_Agency_Zip', 'Local_Agency_Contact', 'Local_Agency_Email',
       'Local_Agency_Phone_Number', 'Comments/Additional_Contacts'],
      dtype='object')

In [82]:
(allocation.columns)

Index(['Award_Year', 'Project_#', 'Award_Recipient', 'Implementing_Agency',
       'PPNO', 'Project_ID', 'EA', 'Components', 'Phase', 'Allocation_Amount',
       'Expended_Amount', 'SB1_Funding', 'SB1_Budget_Year', 'GGRF_Funding',
       'GGRF_Budget_Year', 'CTC_Financial_Resolution',
       'CTC_Allocation_Amendment', 'CTC_Waiver', 'CTC_CalSTA_Waiver',
       'Allocation_Date', 'Completion_Date', 'PSA_#', 'CT_Document_#',
       '3rd_Party_Award_Date', 'LED', 'Date_Branch_Chief_Receives_PSA',
       'Date_Regional_Coordinator_Receives_PSA', 'Date_OC_Receives_PSA',
       'Date_OPM_Receives_PSA', 'Date_Legal_Receives_PSA',
       'Date_Returned_to_PM', 'Date_PSA_Sent_to_Local_Agency',
       'Date_PSA_Approved_by_Local_Agency', 'Date_Signed_by_DRMT',
       'PSA_Expiry_Date', 'LONP', 'Prior_Fiscal_Years_to_2020',
       'Fiscal_Year_2020-2021', 'Fiscal_Year_2021-2022',
       'Fiscal_Year_2022-2023', 'Fiscal_Year_2023-2024',
       'Fiscal_Year_2024-2025', 'Fiscal_Year_2025-2026',
    

### Note: Third party award date is called CON Contract Award Date in TIRCP SAR Attachment 

In [83]:
#subsetting for only columns of interest
df_project = project[['Award_Year', 'Project_#','Local_Agency','Project_Title','PPNO',
                      'Key_Project_Elements','TIRCP_Award_Amount_($)','Allocated_Amount','Expended_Amount']]

In [84]:
#subsetting for only columns of interest
df_allocation = allocation[['Award_Year','Award_Recipient', 'Implementing_Agency', 'PPNO','Phase',
                            'Allocation_Date','Completion_Date','3rd_Party_Award_Date','Components']]

In [85]:
#strip spaces in columns
df_project.columns = df_project.columns.map(lambda x: x.strip())
df_allocation.columns = df_allocation.columns.map(lambda x: x.strip())

In [86]:
#just testing to make sure everything looks okay...
#df_project.to_csv("./test_df_project.csv")
#df_allocation.to_csv("./test_df_allocation.csv")

# Cleaning Allocation Sheet 

## Cleaning up PPNO, can only be 5 characters.

In [87]:
#remove the extra characters in PPNO in allocation to match the PPNO in project data frame bc there should only be five characters and numbers in each PPNO value
df_allocation = df_allocation.assign(
    PPNO_New = df_allocation['PPNO'].str.slice(start=0, stop=5)
)

In [88]:
#CSV with PPNO & Award Recipients
allocation_ppno = pd.read_csv('Allocation_PPNO_Crosswalk.csv')

In [89]:
allocation_ppno #printing to make sure it makes sense.

Unnamed: 0,Award_Year,PPNO_New2,Award_Recipient
0,2020,CP065,Los Angeles County Metropolitan Transportation...
1,2020,CP066,Los Angeles-San Diego-San Luis Obispo Rail Cor...
2,2016,1230,San Bernardino County Transportation Authority...
3,2018,1155,Transportation Agency for Monterey County


In [90]:
#Filtering out for 2021, since that entry is blank
df_allocation = df_allocation.query("Award_Year != 2021")

In [91]:
#Merge in Crosswalk 
df_allocation = pd.merge(df_allocation, allocation_ppno, on = ["Award_Year", "Award_Recipient"], how = "left")

In [92]:
type(df_allocation.iloc[0].PPNO_New2)


float

In [93]:
#some values in PPNO and PPNO_New2 are strings, some are floats...so have to convert PPNO New 2 to strings
df_allocation.PPNO_New = df_allocation.apply(lambda x: x.PPNO_New if (str(x.PPNO_New2) == 'nan') else x.PPNO_New2, axis=1)

In [94]:
#drop old column
df_allocation = df_allocation.drop(['PPNO'], axis=1)

In [95]:
#renaming columns to something neater
df_allocation = df_allocation.rename(columns = {'PPNO_New':'PPNO', '3rd_Party_Award_Date':'Third_Party_Award_Date'})

## Cleaning up completion, allocation, & 3rd Party dates 

In [96]:
#cleaning up allocation dates
df_allocation.Allocation_Date.unique().tolist()

[datetime.datetime(2015, 10, 22, 0, 0),
 datetime.datetime(2016, 5, 19, 0, 0),
 datetime.datetime(2016, 6, 30, 0, 0),
 datetime.datetime(2015, 12, 10, 0, 0),
 datetime.datetime(2015, 8, 27, 0, 0),
 datetime.datetime(2016, 1, 21, 0, 0),
 datetime.datetime(2017, 6, 29, 0, 0),
 datetime.datetime(2016, 10, 20, 0, 0),
 datetime.datetime(2017, 8, 17, 0, 0),
 datetime.datetime(2018, 1, 31, 0, 0),
 datetime.datetime(2017, 1, 19, 0, 0),
 datetime.datetime(2016, 3, 17, 0, 0),
 datetime.datetime(2017, 3, 16, 0, 0),
 datetime.datetime(2017, 5, 17, 0, 0),
 datetime.datetime(2018, 8, 16, 0, 0),
 'TBD',
 datetime.datetime(2021, 6, 24, 0, 0),
 datetime.datetime(2016, 12, 8, 0, 0),
 datetime.datetime(2020, 6, 25, 0, 0),
 datetime.datetime(2019, 12, 5, 0, 0),
 datetime.datetime(2018, 10, 18, 0, 0),
 datetime.datetime(2021, 1, 28, 0, 0),
 nan,
 datetime.datetime(2018, 2, 1, 0, 0),
 datetime.datetime(2018, 5, 17, 0, 0),
 'FY 26/27',
 datetime.datetime(2017, 5, 18, 0, 0),
 datetime.datetime(2018, 6, 28, 0,

In [97]:
#Had to change FY to an actual date 
df_allocation["Allocation_Date"].replace({"FY 26/27": "2026-12-31", "08/12//20": '2020-08-12 00:00:00', 'FY 21/22': '2021-12-31',
                                         'FY 22/23': '2022-12-31','FY 20/21': '2020-12-31', 'FY 23/24': '2023-12-31','FY 24/25': '2024-12-31','FY 25/26': '2025-12-31'}, inplace =True)

In [98]:
#clean up columns in a loop
for i in ["Allocation_Date", "Third_Party_Award_Date", "Completion_Date"]:
    df_allocation[i] = df_allocation[i].replace('/', '-', regex = True).replace('Complete', '', regex = True).replace('\n', '', regex=True).replace('Pending','TBD',regex= True).fillna('TBD')

In [99]:
df_allocation.Completion_Date.unique().tolist()

[datetime.datetime(2022, 3, 30, 0, 0),
 '6-1-2019',
 datetime.datetime(2021, 6, 30, 0, 0),
 datetime.datetime(2018, 9, 30, 0, 0),
 '2-11-2018',
 '6-30-2020',
 datetime.datetime(2020, 9, 30, 0, 0),
 ' 6-30-2018',
 '6-29-2020',
 '11-1-2019',
 ' 12-10-2018',
 ' 11-13-2019',
 '3-30-2020',
 datetime.datetime(2022, 9, 30, 0, 0),
 datetime.datetime(2021, 12, 30, 0, 0),
 datetime.datetime(2021, 9, 30, 0, 0),
 '5-16-2020',
 datetime.datetime(2024, 6, 30, 0, 0),
 'TBD',
 'June 24. 2024',
 datetime.datetime(2022, 12, 30, 0, 0),
 datetime.datetime(2024, 6, 24, 0, 0),
 '11-21-20247-30-2025 (Q4)',
 datetime.datetime(2022, 6, 30, 0, 0),
 datetime.datetime(2019, 5, 21, 0, 0),
 datetime.datetime(2024, 7, 25, 0, 0),
 datetime.datetime(2021, 12, 31, 0, 0),
 datetime.datetime(2024, 1, 28, 0, 0),
 datetime.datetime(2022, 10, 31, 0, 0),
 datetime.datetime(2022, 1, 16, 0, 0),
 datetime.datetime(2018, 2, 1, 0, 0),
 datetime.datetime(2022, 8, 22, 0, 0),
 datetime.datetime(2022, 7, 31, 0, 0),
 '5-7-2020',
 date

In [100]:
#cleaning up completion dates
df_allocation["Completion_Date"].replace({ 
    'June 24. 2024': '2024-06-01 00:00:00',  
    '11/21/2024\n7/30/2025 (Q4)': '2024-11-21 00:00:00', 
    'Jun-26': '2026-01-01 00:00:00', 
     'Jun-29': '2029-06-01 00:00:00',
    'Complete\n11/12/2019': '2019-11-12 00:00:00' , 
    'Deallocated': '', 
    'Jun-28': '2028-06-01 00:00:00',  
    'Jun-25': '2025-06-01 00:00:00', 
    'Jun-23':'2023-06-01 00:00:00', 
    'Jun-27': '2027-06-01 00:00:00',
    'Jan-25': '2025-01-01 00:00:00',
    '11-21-20247-30-2025 (Q4)':'2025-07-30 00:00:00',
    '6-30-202112-31-2021': '2021-12-31 00:00:00',
    '6-1-2019': '2019-06-01 00:00:00',
    '2-11-2018': '2018-02-11 00:00:00',
     '6-30-2020': '2020-06-30 00:00:00',
    ' 6-30-2018': '2018-06-30 00:00:00',
     '6-29-2020': '2020-06-29 00:00:00',
     '11-1-2019': '2019-11-01 00:00:00',
     ' 12-10-2018': '2018-12-10 00:00:00',
     ' 11-13-2019': '2019-11-13 00:00:00',
     '3-30-2020':'2020-03-30 00:00:00',
    ' 6-30-2020': '2020-06-30 00:00:00',
    '11-12-2019': '2019-11-12 00:00:00',
    '1-31-2020': '2020-01-31 00:00:00',
    '8-30-2020': '2020-08-30 00:00:00',
    '5-16-2020': '2020,05-16 00:00:00',
     '5-7-2020': '2020-05-07 00:00:00'}, inplace =True)

In [101]:
#cleaning up 3rd Party dates
df_allocation["Third_Party_Award_Date"].replace({ 
'Augsut 12, 2021': '2021-08-12 00:00:00',
'43435': '2018-12-01 00:00:00',
'07-29-2020': '2020-07-29 00:00:00',
'43497' : '2019-02-01 00:00:00',
'TBD 6-24-2021' : 'TBD',
'TBD 6-30-2022' : 'TBD'
}, inplace =True)

In [102]:
#coercing to dates
df_allocation = df_allocation.assign(
    Allocation_Date_New = pd.to_datetime(df_allocation.Allocation_Date, errors="coerce").dt.date,
    Third_Party_Award_Date_New = pd.to_datetime(df_allocation.Third_Party_Award_Date, errors="coerce").dt.date,
    Completion_Date_New = pd.to_datetime(df_allocation.Completion_Date, errors="coerce").dt.date
)

In [103]:
df_allocation.dtypes

Award_Year                     int64
Award_Recipient               object
Implementing_Agency           object
Phase                         object
Allocation_Date               object
Completion_Date               object
Third_Party_Award_Date        object
Components                    object
PPNO                          object
PPNO_New2                     object
Allocation_Date_New           object
Third_Party_Award_Date_New    object
Completion_Date_New           object
dtype: object

In [104]:
#checking for nas
df_allocation.isna().sum()

Award_Year                      0
Award_Recipient                 0
Implementing_Agency             0
Phase                           1
Allocation_Date                 0
Completion_Date                 0
Third_Party_Award_Date          0
Components                      0
PPNO                            0
PPNO_New2                     292
Allocation_Date_New            83
Third_Party_Award_Date_New    145
Completion_Date_New            83
dtype: int64

In [105]:
df_allocation.shape

(306, 13)

## Final CSV Version

In [106]:
#drop old columns
df_allocation = df_allocation.drop(['PPNO_New2','Allocation_Date','Completion_Date','Third_Party_Award_Date'], axis=1)
#rename columns
df_allocation = df_allocation.rename(columns = {'Allocation_Date_New':'Allocation_Date', 'Completion_Date_New':'Completion_Date', 'Third_Party_Award_Date_New':'Third_Party_Award_Date'})

In [107]:
df_allocation.to_csv("./final_df_allocation.csv", index= False)

# Cleaning Project Sheet



In [108]:
df_project.columns

Index(['Award_Year', 'Project_#', 'Local_Agency', 'Project_Title', 'PPNO',
       'Key_Project_Elements', 'TIRCP_Award_Amount_($)', 'Allocated_Amount',
       'Expended_Amount'],
      dtype='object')

## Filling NA for TIRCP, Allocated, and Expended Amounts

In [109]:
df_project[['TIRCP_Award_Amount_($)', 'Allocated_Amount','Expended_Amount']] = df_project[['TIRCP_Award_Amount_($)', 'Allocated_Amount', 'Expended_Amount']].fillna(value=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [110]:
#checking for nas
df_project.isna().sum()

Award_Year                0
Project_#                 0
Local_Agency              0
Project_Title             0
PPNO                      5
Key_Project_Elements      0
TIRCP_Award_Amount_($)    0
Allocated_Amount          0
Expended_Amount           0
dtype: int64

## Cleaning up PPNO Numbers based on Allocation Sheet

In [111]:
#slicing PPNO to be 5 characters
df_project = df_project.assign(PPNO_New = df_project['PPNO'].str.slice(start=0, stop=5)) 

In [112]:
#importing Excel crosswalk sheet
project_ppno = pd.read_excel(open('Projects_PPNO.xlsx','rb')) 

In [113]:
#just previewing
project_ppno

Unnamed: 0,Award_Year,PPNO_New2,Local_Agency
0,2020,CP060,Bay Area Rapid Transit District (BART)
1,2020,CP071,Santa Monica Big Blue Bus
2,2020,CP072,Solano Transportation Authority
3,2020,CP075,San Francisco Bay Area Water Emergency Transpo...
4,2020,CP074,Transit Joint Powers Authority of Merced County
5,2020,CP068,San Bernardino County Transportation Authority...
6,2016,1230,San Bernardino County Transportation Authority...
7,2018,1155,Transportation Agency for Monterey County (TAMC)
8,2018,CP053,Sacramento Regional Transit District (SacRT)
9,2015,CP002,Southern California Regional Rail Authority


In [114]:
#Merge in Crosswalk 
df_project2 = pd.merge(df_project, project_ppno, on = ["Award_Year", "Local_Agency"], how = "left")

In [115]:
#some values in PPNO and PPNO_New2 are strings, some are floats...so have to convert PPNO New 2 to strings
df_project2.PPNO_New = df_project2.apply(lambda x: x.PPNO_New if (str(x.PPNO_New2) == 'nan') else x.PPNO_New2, axis=1)

In [116]:
#making sure PPNO_New is a string 
df_project2 = df_project2.astype({'PPNO_New': 'str'})

In [117]:
PPNO_project = set(df_project2.PPNO_New.unique().tolist())
PPNO_allocation = set(df_allocation.PPNO.unique().tolist())

In [118]:
PPNO_project #looking at list

{'1155',
 '1230',
 '2320B',
 'CP001',
 'CP002',
 'CP003',
 'CP004',
 'CP005',
 'CP006',
 'CP007',
 'CP008',
 'CP010',
 'CP011',
 'CP012',
 'CP013',
 'CP014',
 'CP015',
 'CP016',
 'CP017',
 'CP019',
 'CP020',
 'CP021',
 'CP022',
 'CP023',
 'CP025',
 'CP026',
 'CP027',
 'CP028',
 'CP029',
 'CP030',
 'CP031',
 'CP032',
 'CP033',
 'CP034',
 'CP035',
 'CP036',
 'CP039',
 'CP041',
 'CP045',
 'CP046',
 'CP047',
 'CP048',
 'CP051',
 'CP053',
 'CP054',
 'CP055',
 'CP057',
 'CP058',
 'CP059',
 'CP060',
 'CP061',
 'CP062',
 'CP063',
 'CP064',
 'CP065',
 'CP066',
 'CP067',
 'CP068',
 'CP069',
 'CP070',
 'CP071',
 'CP072',
 'CP073',
 'CP074',
 'CP075',
 'CP076',
 'CP077',
 'CP078',
 'CP079',
 'CP080'}

In [119]:
PPNO_allocation #looking at list

{'1155',
 '1230',
 '2320B',
 'CP001',
 'CP002',
 'CP003',
 'CP004',
 'CP005',
 'CP006',
 'CP007',
 'CP008',
 'CP010',
 'CP011',
 'CP012',
 'CP013',
 'CP014',
 'CP015',
 'CP016',
 'CP017',
 'CP018',
 'CP019',
 'CP020',
 'CP021',
 'CP022',
 'CP023',
 'CP024',
 'CP025',
 'CP026',
 'CP027',
 'CP028',
 'CP029',
 'CP030',
 'CP031',
 'CP032',
 'CP033',
 'CP034',
 'CP035',
 'CP036',
 'CP039',
 'CP041',
 'CP042',
 'CP043',
 'CP045',
 'CP046',
 'CP047',
 'CP048',
 'CP051',
 'CP053',
 'CP054',
 'CP055',
 'CP057',
 'CP058',
 'CP059',
 'CP060',
 'CP061',
 'CP062',
 'CP063',
 'CP064',
 'CP065',
 'CP066',
 'CP067',
 'CP068',
 'CP069',
 'CP070',
 'CP071',
 'CP072',
 'CP073',
 'CP074',
 'CP075',
 'CP076',
 'CP077',
 'CP078',
 'CP079',
 'CP080'}

In [120]:
PPNO_project - PPNO_allocation #checking for differences - none. yay. 

set()

## Final CSV for project

In [121]:
#drop old column
df_project2 = df_project2.drop(['PPNO','PPNO_New2','Local_Agency'], axis=1)

In [122]:
#renaming PPNO 
df_project2 = df_project2.rename(columns = {'PPNO_New':'PPNO'})

In [123]:
df_project2.to_csv("./final_df_project.csv", index= False)

# Merging Project & Allocations
Merge on PPNO & Award_Year

In [124]:
#left merge to keep everything on the alllocation side since there are more records in that dataset 
df_combined2 = df_allocation.merge(df_project2, how = "left", on = ["PPNO", "Award_Year"])

In [125]:
df_combined2.shape

(319, 15)

In [126]:
df_combined2.drop_duplicates #dropping duplicates

<bound method DataFrame.drop_duplicates of      Award_Year                                    Award_Recipient  \
0          2015                 Antelope Valley Transit Authority    
1          2015            Capitol Corridor Joint Powers Authority   
2          2015  Los Angeles County Metropolitan Transportation...   
3          2015  Los Angeles-San Diego-San Luis Obispo Rail Cor...   
4          2015                           Monterey-Salinas Transit   
..          ...                                                ...   
314        2020                    Solano Transportation Authority   
315        2020                    Solano Transportation Authority   
316        2020                        Torrance Transit Department   
317        2020   Transit Joint Powers Authority for Merced County   
318        2020  San Francisco Bay Area Water Emergency Transpo...   

                                   Implementing_Agency  Phase  \
0                   Antelope Valley Transit Authori

In [127]:
df_combined2.head(2)

Unnamed: 0,Award_Year,Award_Recipient,Implementing_Agency,Phase,Components,PPNO,Allocation_Date,Third_Party_Award_Date,Completion_Date,Project_#,Project_Title,Key_Project_Elements,TIRCP_Award_Amount_($),Allocated_Amount,Expended_Amount
0,2015,Antelope Valley Transit Authority,Antelope Valley Transit Authority,CONST,Purchase 13 60-foot articulated BRT buses and ...,CP005,2015-10-22,2016-03-14,2022-03-30,1.0,Regional Transit Interconnectivity & Environme...,Purchase 13 60-foot articulated BRT buses and ...,24403000.0,24403000.0,21714177.53
1,2015,Capitol Corridor Joint Powers Authority,Capitol Corridor Joint Powers Authority,CONST,Track and curve improvements between San Jose ...,CP012,2016-05-19,2016-06-01,2019-06-01,2.0,Travel Time Reduction Project,Track and curve improvements between San Jose ...,4620000.0,4620000.0,4619999.9


In [128]:
df_combined2.shape

(319, 15)

In [129]:
df_combined2[['TIRCP_Award_Amount_($)', 'Allocated_Amount','Expended_Amount']] = df_combined2[['TIRCP_Award_Amount_($)', 'Allocated_Amount', 'Expended_Amount']].fillna(value=0)

In [130]:
df_combined2.isna().sum()

Award_Year                  0
Award_Recipient             0
Implementing_Agency         0
Phase                       1
Components                  0
PPNO                        0
Allocation_Date            90
Third_Party_Award_Date    154
Completion_Date            90
Project_#                  33
Project_Title              33
Key_Project_Elements       33
TIRCP_Award_Amount_($)      0
Allocated_Amount            0
Expended_Amount             0
dtype: int64

In [131]:
#dividing allocated amount and expended amount
df_combined2['Expended_Percent'] = df_combined2['Expended_Amount']/df_combined2['Allocated_Amount']
df_combined2['Allocated_Percent'] = df_combined2['Allocated_Amount']/df_combined2['TIRCP_Award_Amount_($)']

In [132]:
df_combined2.sample(2)

Unnamed: 0,Award_Year,Award_Recipient,Implementing_Agency,Phase,Components,PPNO,Allocation_Date,Third_Party_Award_Date,Completion_Date,Project_#,Project_Title,Key_Project_Elements,TIRCP_Award_Amount_($),Allocated_Amount,Expended_Amount,Expended_Percent,Allocated_Percent
100,2018,Los Angeles County Metropolitan Transportation...,Los Angeles County Metropolitan Transportation...,PA&ED,Vermont Transit Corridor,CP030,2021-03-13,2022-06-30,2029-06-01,10.0,Los Angeles Region Transit System Integration ...,Capital improvements that will broaden and mod...,1088499000.0,261200000.0,0.0,0.0,0.239963
94,2018,Livermore Amador Valley Transit Authority,Livermore Amador Valley Transit Authority,CONST,Network Integration,CP051,2018-12-06,2019-06-03,2021-12-31,9.0,Dublin/Pleasanton Capacity Improvement and Con...,Increase BART ridership through\nconstruction ...,20500000.0,20500000.0,157092.4,0.007663,1.0


In [133]:
len(df_combined2) #checking rows  before deleting duplicates

319

In [134]:
df_combined2 = df_combined2.drop_duplicates()

In [135]:
df_combined2.head(2)

Unnamed: 0,Award_Year,Award_Recipient,Implementing_Agency,Phase,Components,PPNO,Allocation_Date,Third_Party_Award_Date,Completion_Date,Project_#,Project_Title,Key_Project_Elements,TIRCP_Award_Amount_($),Allocated_Amount,Expended_Amount,Expended_Percent,Allocated_Percent
0,2015,Antelope Valley Transit Authority,Antelope Valley Transit Authority,CONST,Purchase 13 60-foot articulated BRT buses and ...,CP005,2015-10-22,2016-03-14,2022-03-30,1.0,Regional Transit Interconnectivity & Environme...,Purchase 13 60-foot articulated BRT buses and ...,24403000.0,24403000.0,21714177.53,0.889816,1.0
1,2015,Capitol Corridor Joint Powers Authority,Capitol Corridor Joint Powers Authority,CONST,Track and curve improvements between San Jose ...,CP012,2016-05-19,2016-06-01,2019-06-01,2.0,Travel Time Reduction Project,Track and curve improvements between San Jose ...,4620000.0,4620000.0,4619999.9,1.0,1.0


In [137]:
len(df_combined2)

288

In [138]:
df_combined2.to_csv("./df_combined2.csv", index = False) 