# Interim solution to address expenditure data issue for March 15 presentation

* Using data from Data Link.

In [1]:
import numpy as np
import pandas as pd
import TIRCP_functions
from siuba import *
from calitp import *

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/tircp/"

pd.options.display.max_columns = 50
pd.options.display.max_rows = 120
pd.set_option('display.max_colwidth', None)

E0310 21:25:22.269069078    1284 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
E0310 21:25:22.762165564    1284 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


## Expenditure information from the Enterprise Data Link data Linda pulled.

In [2]:
enterprise = "Enterprise.xlsx"
enterprise_df = pd.read_excel(f"{GCS_FILE_PATH}{enterprise}")
enterprise_df = to_snakecase(enterprise_df)

In [3]:
enterprise_df = enterprise_df.rename(columns = {'project':'project_id'})

In [4]:
#keep only columns necessary.
enterprise_df = enterprise_df[['fy','cycle','project_id','project_name','tot_exp']]

In [5]:
enterprise_df.head()

Unnamed: 0,fy,cycle,project_id,project_name,tot_exp
0,2016,One,16000007,Refurbishment of Seven Light Rail Vehicles,2182302.44
1,2016,One,16000008,South Bay Bus Rapid Transit (BRT) Project,30770.46
2,2016,One,16000041,Bravo! Route 560 Rapid Buses,1972865.52
3,2017,One,16000007,Refurbishment of Seven Light Rail Vehicles,3691437.55
4,2017,One,16000008,South Bay Bus Rapid Transit (BRT) Project,1646527.16


In [6]:
enterprise_df.shape

(254, 5)

In [7]:
f'There are {enterprise_df.project_id.nunique()} unique project IDS'

'There are 106 unique project IDS'

In [8]:
enterprise_df.isna().sum()

fy              0
cycle           0
project_id      0
project_name    0
tot_exp         0
dtype: int64

#### Aggregate by project name and each cycle],so each project has only one record for total expenses

####  <font color='red'> 88 projects but only 74 in TIRCP</font> 

In [9]:

enterprise_df_group = enterprise_df.groupby(['project_name','cycle']).agg({'tot_exp':'sum'}).reset_index()

In [10]:
enterprise_df_group.project_name.nunique()

88

In [11]:
enterprise_df_group.shape

(88, 3)

In [12]:
enterprise_df_group

Unnamed: 0,project_name,cycle,tot_exp
0,#Electrify Anaheim (ZEV Bus Procurement),Three,13492703.78
1,#Electrify Anaheim: Changing the Transit Paradigm in So Cal,Three,1726121.55
2,#Electrify Anaheim: Changing the Transit Paradigm in So.CA,Three,610535.78
3,(2018: 11) All Aboard 2018: Transform SoCal Rail T (Fencing),Three,12562.03
4,ACE Near-Term Capacity Improvement Program,Two,428492.45
5,ACE Wayside Power Project,One,15000.0
6,AVTA Component- Southern California Zero Emission Regional C,Two,6481000.0
7,Accelerating Rail Modernization and Expansion (LRV Procure),Three,31090378.54
8,All Aboard 2018 (On-Time Performance Incentive),Three,6921214.22
9,All Aboard 2018 (Right of Way Fencing),Three,58994.67


#### Aggregate by project ID and each cycle, so each project has only one record for total expenses

In [13]:
enterprise_df_project_id = enterprise_df.groupby(['project_id','cycle']).agg({'tot_exp':'sum'}).reset_index()

In [14]:
enterprise_df_project_id.shape

(106, 3)

In [15]:
enterprise_df_project_id

Unnamed: 0,project_id,cycle,tot_exp
0,16000007,One,6315208.89
1,16000008,One,4000000.0
2,16000009,One,38023039.68
3,16000041,One,2320000.0
4,16000048,One,22284205.53
5,16000119,One,277840.47
6,16000121,One,41181000.0
7,16000188,One,713000.0
8,16000237,One,11000000.0
9,16000238,One,6841000.0


## Merge in with allocation sheet of Excel workbook 
* There are 171 unique project IDS in our allocation sheet compared to 106 project IDS in Data Link....
* Lots of missing Project IDS: 107 out of 307 rows of data.

In [16]:
#read in allocation df because that's the sheet with project ids. 
allocation = TIRCP_functions.allocation()

In [17]:
allocation.shape

(307, 49)

In [18]:
allocation.isna().sum()

Award_Year                                  0
Project_#                                   1
Award_Recipient                             1
Implementing_Agency                         1
Project_ID                                107
EA                                        122
Components                                  1
Phase                                       2
Allocation_Amt_Allocation_Sheet             0
Expended_Amt_Allocation_Sheet               0
SB1_Funding                                 0
SB1_Budget_Year                           140
GGRF_Funding                                0
GGRF_Budget_Year                          307
CTC_Financial_Resolution                  100
CTC_Allocation_Amendment                  307
CTC_Waiver                                307
CTC_CalSTA_Waiver                         307
PSA_#                                     113
CT_Document_#                             177
Date_Branch_Chief_Receives_PSA            304
Date_Regional_Coordinator_Receives

In [19]:
#keep only columns that are relevant.
allocation_df = allocation[['Award_Year','Project_ID','PPNO']]

In [20]:
f'There are {allocation_df.Project_ID.nunique()} unique project ids in allocation'

'There are 171 unique project ids in allocation'

In [21]:
#only keep rows with Project IDS
allocation_df = allocation_df.loc[allocation_df['Project_ID'].notnull()]

In [22]:
allocation_df.shape

(200, 3)

In [23]:
joined_alloc = pd.merge(allocation_df, enterprise_df_project_id, 
                        left_on='Project_ID', right_on='project_id', 
                        how = 'outer', indicator = True)

In [24]:
joined_alloc._merge.value_counts()

both          123
left_only      77
right_only      2
Name: _merge, dtype: int64

In [25]:
joined_alloc.shape

(202, 7)

In [26]:
joined_alloc.head(100)

Unnamed: 0,Award_Year,Project_ID,PPNO,project_id,cycle,tot_exp,_merge
0,2015.0,16000048,CP005,16000048.0,One,22284205.53,both
1,2015.0,16000276,CP012,16000276.0,One,4619999.9,both
2,2016.0,16000276,CP023,16000276.0,One,4619999.9,both
3,2015.0,16000329,CP015,16000329.0,One,38494000.0,both
4,2015.0,16000119,CP007,16000119.0,One,277840.47,both
5,2015.0,16000275,CP013,16000275.0,One,10000000.0,both
6,2015.0,16000041,CP004,16000041.0,One,2320000.0,both
7,2015.0,16000007,CP001,16000007.0,One,6315208.89,both
8,2015.0,16000008,CP003,16000008.0,One,4000000.0,both
9,2015.0,16000188,CP008,16000188.0,One,713000.0,both


In [27]:
#only keep "both" results...
joined_alloc2 = joined_alloc.loc[joined_alloc['_merge'] == 'both'].drop(columns =['_merge'])

In [28]:
#group by so that only one row for each PPNO number & Year
joined_alloc3 = (joined_alloc2
                  .groupby(['PPNO', 'Award_Year'])
                  .agg({'tot_exp':'sum'})
                  .reset_index()
                 )

In [29]:
joined_alloc3.shape

(51, 3)

In [30]:
#rename cols before joining with "joined" df 
joined_alloc3 = joined_alloc3.add_prefix('from_joined_alloc_df_')

In [31]:
joined_alloc3

Unnamed: 0,from_joined_alloc_df_PPNO,from_joined_alloc_df_Award_Year,from_joined_alloc_df_tot_exp
0,1155,2018.0,486509.7
1,1230,2016.0,9204000.0
2,2320B,2018.0,500000.0
3,CP001,2015.0,6315208.89
4,CP002,2015.0,38023039.68
5,CP003,2015.0,4000000.0
6,CP004,2015.0,2320000.0
7,CP005,2015.0,22284205.53
8,CP006,2015.0,41181000.0
9,CP006,2016.0,45092000.0


In [32]:
joined_alloc3.astype({'from_joined_alloc_df_Award_Year': 'int64'}).dtypes

from_joined_alloc_df_PPNO           object
from_joined_alloc_df_Award_Year      int64
from_joined_alloc_df_tot_exp       float64
dtype: object

## Joining original sheets on Award Year & PPNO

In [33]:
project = TIRCP_functions.project()

In [34]:
project.columns

Index(['Award_Year', 'Project_#', 'Local_Agency', 'Vendor_ID_#',
       'Project_Title', 'District', 'County', 'Key_Project_Elements',
       'Master_Agreement_Number', 'Master_Agreement_Expiration_Date',
       'Project_Manager', 'Regional_Coordinator',
       'Technical_Assistance-CALTP_(Y/N)', 'Technical_Assistance-Fleet_(Y/N)',
       'Technical_Assistance-Network_Integration_(Y/N)',
       'Technical_Assistance-Priority_Population_(Y/N)', 'Total_Project_Cost',
       'TIRCP_project_sheet', 'Allocated_Amount',
       'Unallocated_amt_project_sheet', 'Percentge_Allocated',
       'Expended_Amt_project_sheet', 'Other_Funds_Involved', 'Award_Cycle',
       'Local_Agency_Address', 'Local_Agency_City', 'Local_Agency_Zip',
       'Local_Agency_Contact', 'Local_Agency_Email',
       'Local_Agency_Phone_Number', 'Comments/Additional_Contacts', 'PPNO'],
      dtype='object')

In [35]:
project.dtypes

Award_Year                                                 int64
Project_#                                                  int64
Local_Agency                                              object
Vendor_ID_#                                               object
Project_Title                                             object
District                                                  object
County                                                    object
Key_Project_Elements                                      object
Master_Agreement_Number                                   object
Master_Agreement_Expiration_Date                  datetime64[ns]
Project_Manager                                           object
Regional_Coordinator                                      object
Technical_Assistance-CALTP_(Y/N)                          object
Technical_Assistance-Fleet_(Y/N)                          object
Technical_Assistance-Network_Integration_(Y/N)            object
Technical_Assistance-Prio

In [36]:
final_join = pd.merge(project, joined_alloc3, 
                      left_on = ['PPNO', 'Award_Year'], 
                      right_on = ['from_joined_alloc_df_PPNO', 'from_joined_alloc_df_Award_Year'],
                      how = 'left', indicator = True)

In [37]:
final_join._merge.value_counts()

both          49
left_only     25
right_only     0
Name: _merge, dtype: int64

#### Check to see if PPNO matches  cross original project sheet & joined 

In [38]:
final_join.shape

(74, 36)

In [39]:
final_join.PPNO.nunique()

70

In [40]:
project.PPNO.nunique()

70

In [41]:
PPNO_project = set(project.PPNO.unique().tolist())
PPNO_join = set(final_join.PPNO.unique().tolist())
PPNO_join - PPNO_project 

set()

In [42]:
final_join.columns

Index(['Award_Year', 'Project_#', 'Local_Agency', 'Vendor_ID_#',
       'Project_Title', 'District', 'County', 'Key_Project_Elements',
       'Master_Agreement_Number', 'Master_Agreement_Expiration_Date',
       'Project_Manager', 'Regional_Coordinator',
       'Technical_Assistance-CALTP_(Y/N)', 'Technical_Assistance-Fleet_(Y/N)',
       'Technical_Assistance-Network_Integration_(Y/N)',
       'Technical_Assistance-Priority_Population_(Y/N)', 'Total_Project_Cost',
       'TIRCP_project_sheet', 'Allocated_Amount',
       'Unallocated_amt_project_sheet', 'Percentge_Allocated',
       'Expended_Amt_project_sheet', 'Other_Funds_Involved', 'Award_Cycle',
       'Local_Agency_Address', 'Local_Agency_City', 'Local_Agency_Zip',
       'Local_Agency_Contact', 'Local_Agency_Email',
       'Local_Agency_Phone_Number', 'Comments/Additional_Contacts', 'PPNO',
       'from_joined_alloc_df_PPNO', 'from_joined_alloc_df_Award_Year',
       'from_joined_alloc_df_tot_exp', '_merge'],
      dtype='object

### Checking out our 2 columns of expended amounts:
* From original project data sheet
* Second merge of Data Link with Allocations df

In [43]:
final_join[['Award_Year','PPNO','Project_Title', 'TIRCP_project_sheet',
            'Expended_Amt_project_sheet', 'from_joined_alloc_df_tot_exp', '_merge']]

Unnamed: 0,Award_Year,PPNO,Project_Title,TIRCP_project_sheet,Expended_Amt_project_sheet,from_joined_alloc_df_tot_exp,_merge
0,2015,CP005,Regional Transit Interconnectivity & Environmental Sustability,24403000.0,21714177.53,22284205.53,both
1,2015,CP012,Travel Time Reduction Project,4620000.0,4619999.9,4619999.9,both
2,2015,CP015,Willowbrook/Rosa Parks Station & Blue Line Light Rail Operational Improvements Project,38494000.0,38494000.0,38494000.0,both
3,2015,CP007,Pacific Surfliner Transit Transfer Program,1675000.0,277840.47,277840.47,both
4,2015,CP013,Monterey Bay Operations and Maintenance Facility/Salinas Transit Service Project,10000000.0,0.0,10000000.0,both
5,2015,CP004,Bravo! Route 560 Rapid Buses,2320000.0,0.0,2320000.0,both
6,2015,CP001,Sacramento Regional Transit's Refurbishment of 7 Light Rail Vehicles,6427000.0,0.0,6315208.89,both
7,2015,CP003,South Bay Bus Rapid Transit,4000000.0,4000000.0,4000000.0,both
8,2015,CP008,San Diego Metropolitan Transit System Trolley Capacity Improvements,31986000.0,28176000.0,31936000.0,both
9,2015,CP006,SFMTA Light Rail Vehicle Fleet Expansion,41181000.0,0.0,41181000.0,both


### Still missing 25 values - Manually replace them.
* Manual replacement.

In [44]:
#subset final joined data frame for projects that still have N/A in expenditures 
final_join2_subset = final_join[['Award_Year','PPNO','Project_Title', 'from_joined_alloc_df_tot_exp']]
Nulls = final_join2_subset.loc[final_join2_subset['from_joined_alloc_df_tot_exp'].isnull()]

In [45]:
#Read in crosswalk
crosswalk_expenditures = pd.read_excel('Unmatched_Data_Crosswalk.xlsx')

In [46]:
#Merge in Crosswalk 
final_join2 = pd.merge(final_join, crosswalk_expenditures, on = ["Award_Year", "PPNO","Project_Title"], how = "left")

In [47]:
#fill in NaN in original expenses column with dummy variable 888
final_join2['from_joined_alloc_df_tot_exp'] = final_join2['from_joined_alloc_df_tot_exp'].fillna(888)

In [48]:
#Map Crosswalk 
final_join2.from_joined_alloc_df_tot_exp = (final_join2.apply(lambda x: x.Expenses_Manual 
                                                              if x.from_joined_alloc_df_tot_exp == 888.0 
                                                              else x.from_joined_alloc_df_tot_exp, axis=1)
                                           )

### Some expended values aren't really correct compared to SAR

In [49]:
final_join2.loc[(final_join2['PPNO'] == 'CP023'), "from_joined_alloc_df_tot_exp"] =   8999000 
final_join2.loc[(final_join2['PPNO'] == 'CP036'), "from_joined_alloc_df_tot_exp"] =  80340000 
#CP030 Cycle 3 2018 Lossan
final_join2.loc[(final_join2['PPNO'] == 'CP030'), "from_joined_alloc_df_tot_exp"] =    1088499000 

## Fill in $0 TIRCP Amount
* CP045
* CP046
* CP078

In [50]:
final_join2.loc[(final_join2['PPNO'] == 'CP045'), "TIRCP_project_sheet"] =  8641000 
final_join2.loc[(final_join2['PPNO'] == 'CP046'), "TIRCP_project_sheet"] = 10788000 
final_join2.loc[(final_join2['PPNO'] == 'CP078'), "TIRCP_project_sheet"] =  15000000 


## Missing allocated amounts & unallocated - fill in w/ TIRCP amount.
* Clarified with Linda that these should be filled in with TIRCP amount

In [51]:
missing_allocation = final_join2.loc[final_join2['Allocated_Amount'] == 0] 

In [52]:
missing_allocation = missing_allocation[['Project_Title','PPNO','TIRCP_project_sheet', 
                    'Unallocated_amt_project_sheet', 
                    'Allocated_Amount','from_joined_alloc_df_tot_exp']]

In [53]:
#export missing allocated amounts to csv to crosswalk in. 
#missing_allocation = missing_allocation.to_excel('missing_allocated_amounts.xlsx')

In [54]:
#Read in crosswalk
crosswalk_allocated = pd.read_excel('Unmatched_Allocated_Crosswalk.xlsx')

In [55]:
crosswalk_allocated = crosswalk_allocated.drop(columns = ['Unallocated_amt_manual',
                                                          'from_joined_alloc_df_tot_exp']) 

In [56]:
#Merge in Crosswalk 
final_join2 = pd.merge(final_join2, crosswalk_allocated, on = ["PPNO","Project_Title"], how = "left")

In [57]:
#Map Crosswalk 
final_join2.Allocated_Amount = (final_join2.apply(lambda x: x.Allocated_Amount_Manual 
                                                              if x.Allocated_Amount == 0 
                                                              else x.Allocated_Amount, axis=1)
                                           )

In [58]:
#Check that everything mapped over: allocated, expenses, TIRCP
final_join2[['PPNO','TIRCP_project_sheet',
             'Allocated_Amount', 'from_joined_alloc_df_tot_exp']].sort_values('PPNO')

Unnamed: 0,PPNO,TIRCP_project_sheet,Allocated_Amount,from_joined_alloc_df_tot_exp
56,1155,10148000.0,500000.0,486509.7
24,1230,9204000.0,9204000.0,9204000.0
29,2320B,14000000.0,8000000.0,500000.0
6,CP001,6427000.0,6427000.0,6315209.0
12,CP002,41181000.0,41181000.0,38023040.0
7,CP003,4000000.0,4000000.0,4000000.0
5,CP004,2320000.0,2320000.0,2320000.0
0,CP005,24403000.0,24403000.0,22284210.0
25,CP006,45092000.0,45092000.0,45092000.0
46,CP006,26867000.0,26867000.0,26867000.0


## Running new dataframe with my Tableau function.

In [59]:
#dropping cols I don't need
final_join3 = final_join2.drop(columns=['Expended_Amt_project_sheet',
                                       'from_joined_alloc_df_PPNO',
                                        'from_joined_alloc_df_Award_Year', '_merge', 'Unallocated_amt_project_sheet',
                                       'Allocated_Amount_Manual'])

In [60]:
#just renaming the Data Link expenditures col to project sheet to make things easier 
final_join3 = final_join3.rename(columns = {'from_joined_alloc_df_tot_exp':'Expended_Amt_project_sheet'}) 

In [96]:
def tableau(df):
    #Keeping only the columns we want
    df = (df[['PPNO','Award_Year', 'Project_#', 'Local_Agency', 'Vendor_ID_#',
       'Project_Title', 'District', 'County', 'Key_Project_Elements',
       'Master_Agreement_Number', 'Master_Agreement_Expiration_Date',
       'Project_Manager', 'Regional_Coordinator',
       'Technical_Assistance-CALTP_(Y/N)', 'Technical_Assistance-Fleet_(Y/N)',
       'Technical_Assistance-Network_Integration_(Y/N)',
       'Technical_Assistance-Priority_Population_(Y/N)', 'Total_Project_Cost',
       'TIRCP_project_sheet', 'Allocated_Amount',
       'Percentge_Allocated',
       'Expended_Amt_project_sheet', 'Other_Funds_Involved']]
                 )
    #Getting percentages & filling in with 0
    df['Expended_Percent'] = df['Expended_Amt_project_sheet']/df['Allocated_Amount']
    df['Allocated_Percent'] = df['Allocated_Amount']/df['TIRCP_project_sheet']
    
    # Subtract TIRCP with Allocated Amount with Unallocated
    df['Unallocated_Amount'] = df["TIRCP_project_sheet"] - df["Allocated_Amount"]
    #filling in for 0's 
    df[['Expended_Percent','Allocated_Percent']] = df[['Expended_Percent','Allocated_Percent']].fillna(value=0)
   
    df[['Expended_Percent','Allocated_Percent']]  =  df[['Expended_Percent','Allocated_Percent']].replace(np.inf, 0)
    #Categorizing expended percentage into bins
    def expended_percent(row):
           
            if ((row.Expended_Percent > 0) and (row.Expended_Percent < .26)):
                return "1-25"
            elif ((row.Expended_Percent > .25) and (row.Expended_Percent < .51)):
                return "26-50"
            elif ((row.Expended_Percent > .50) and (row.Expended_Percent < .76)):
                return "51-75"
            elif ((row.Expended_Percent > .75) and (row.Expended_Percent < 1.0)):
                return "76-99"
            else:
                return "100"
    df["Expended_Percent_Group"] = df.apply(lambda x: expended_percent(x), axis=1)
    
    # Categorize years and expended_percent_group into bins
    def progress(df):   
        ### 2015 ### 
        if (df['Award_Year'] == 2015) and (df['Expended_Percent_Group'] == "1-25") | (df['Expended_Percent_Group'] == "26-50") | (df['Expended_Percent_Group'] == "51-75"):
            return 'Behind'
        elif (df['Award_Year'] == 2015) and (df['Expended_Percent_Group'] == "76-99"):
            return 'On Track'
        
        ### 2016 ###
        elif (df['Award_Year'] == 2016) and (df['Expended_Percent_Group'] == "1-25") | (df['Expended_Percent_Group'] == "26-50"):
            return 'Behind'
        elif (df['Award_Year'] == 2016) and (df['Expended_Percent_Group'] == "51-75") | (df['Expended_Percent_Group'] == "76-99"):
            return 'On Track'
        
        ### 2018 ###
        elif (df['Award_Year'] == 2018) and (df['Expended_Percent_Group'] == "1-25"):
            return 'Behind'
        elif (df['Award_Year'] == 2018) and (df['Expended_Percent_Group'] == "26-50") | (df['Expended_Percent_Group'] == "51-75"):
            return 'On Track'
        elif (df['Award_Year'] == 2018) and (df['Expended_Percent_Group'] == "76-99"):
            return 'Ahead'
        
        ### 2020 ### 
        elif (df['Award_Year'] == 2020) and (df['Expended_Percent_Group'] == "1-25"):
            return 'Behind'
        elif (df['Award_Year'] == 2020) and (df['Expended_Percent_Group'] == "26-50"):
            return 'On Track'
        elif (df['Award_Year'] == 2020) and (df['Expended_Percent_Group'] == "51-75") | (df['Expended_Percent_Group'] == "76-99"):
            return 'Ahead'

        ### Else ### 
        else: 
            return "100% of allocated funds spent"

    df['Progress'] = df.apply(progress, axis = 1)
    
    #Which projects are large,small, medium
    p75 = df.TIRCP_project_sheet.quantile(0.75).astype(float)
    p25 = df.TIRCP_project_sheet.quantile(0.25).astype(float)
    p50 = df.TIRCP_project_sheet.quantile(0.50).astype(float)
    
    def project_size (row):
        if ((row.TIRCP_project_sheet > 0) and (row.TIRCP_project_sheet < p25)):
            return "Small"
        elif ((row.TIRCP_project_sheet > p25) and (row.TIRCP_project_sheet < p75)):
            return "Medium"
        elif ((row.TIRCP_project_sheet > p50) and (row.TIRCP_project_sheet > p75 )):
            return "Large"
        else:
            return "$0 recorded for TIRCP"
        
    df["Project_Category"] = df.apply(lambda x: project_size(x), axis=1)
     #Rename cols to the right names
    df = (df.rename(columns = {'Expended_Amt_project_sheet':'Expended_Amount', 
                                                'TIRCP_project_sheet': "TIRCP_Amount"}
                  ))
    ### GCS ###
   # df = df.to_excel(f'{GCS_FILE_PATH}INTERIM_EXPENDITURE_Tableau_Parquet.xlsx')
  

    return df 

In [97]:
test = tableau(final_join3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [64]:
pd.options.display.float_format = '{:,}'.format
pd.options.display.float_format = "{:.2f}".format

In [98]:
test[['Award_Year','PPNO','Project_Title', 'TIRCP_Amount','Allocated_Amount','Unallocated_Amount','Expended_Amount','Expended_Percent','Progress']].sort_values('Expended_Percent')

Unnamed: 0,Award_Year,PPNO,Project_Title,TIRCP_Amount,Allocated_Amount,Unallocated_Amount,Expended_Amount,Expended_Percent,Progress
59,2020,CP061,Sacramento Valley Station (SVS) Transit Center,3914000.0,720000.0,3194000.0,981.93,0.0,Behind
34,2018,CP079,Southwest Fresno Community Connector,7798000.0,7798000.0,0.0,63708.0,0.01,Behind
37,2018,CP051,Dublin/Pleasanton Capacity Improvement and Congestion Reduction Program,20500000.0,20500000.0,0.0,382718.05,0.02,Behind
45,2018,CP032,Blue Line Rail Corridor Transit Enhancements,40098000.0,32882000.0,7216000.0,1643395.45,0.05,Behind
26,2016,CP025,ACE Near-Term Capacity Improvement Program\n,16459000.0,8000000.0,8459000.0,428492.45,0.05,Behind
29,2018,2320B,Purchase Zero Emission High Capacity Buses to Support Transbay Tomorrow and Clean Corridors Plan,14000000.0,8000000.0,6000000.0,500000.0,0.06,Behind
55,2018,CP033,Southern California Optimized Rail Expansion (SCORE),875708000.0,364207000.0,511501000.0,33499300.6,0.09,Behind
32,2018,CP055,The Transbay Corridor Core Capacity Program: Vehicle Acquistion and Communications-Based Train Control System,318600000.0,368600000.0,-50000000.0,37515342.18,0.1,Behind
67,2020,CP069,SDConnect: San Diego Rail Improvement Program,12100000.0,4494000.0,7606000.0,644919.97,0.14,Behind
28,2016,CP026,SB 132 ACE Extension Lathrop to Ceres/Merced,400000000.0,102899816.0,297100184.0,16750768.0,0.16,Behind


In [94]:
test.Progress.value_counts()

100% of allocated funds spent    37
On Track                         16
Behind                           15
Ahead                             6
Name: Progress, dtype: int64

In [100]:
test.TIRCP_Amount.describe()

count           74.00
mean      78363824.32
std      191325631.46
min         200000.00
25%        8160250.00
50%       18229500.00
75%       40910250.00
max     1088499000.00
Name: TIRCP_Amount, dtype: float64

In [99]:
test.to_excel(f'{GCS_FILE_PATH}tableau_with_temporary_expenditure_sol.xlsx', index = False)