In [3]:
import matplotlib.pyplot as plt
import pandas as pd
import shared_utils

# set_option to increase max rows displayed to 200, to see entire df in 1 go/
pd.set_option("display.max_rows", 200)

## AGREEMENT ALLOCATIONS SHEET DATA

### Agreement Allocations - Read in Raw data

In [None]:
url = "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/TIRCP Tracking Sheets 2_1-10-2024.xlsx"
sheet_name = "Agreement Allocations"

In [None]:
tircp = pd.read_excel(url, sheet_name)

### Agreement Allocations -Data Cleaning and QC

In [None]:
# reducing initialdf to first 11 columns.
tircp = tircp.iloc[:, :12]

In [None]:
# dictionary for column name update
new_col = [
    "award_year",
    "project_#",
    "grant_recipient",
    "implementing_agency",
    "ppno",
    "project_id",
    "ea",
    "components",
    "#_of_buses",
    "phase",
    "allocation_amount",
    "expended_amount",
]

In [None]:
tircp.columns = new_col
tircp.columns

In [None]:
tircp = tircp.drop("expended_amount", axis=1)

In [None]:
# fill NaN with zero?
# see if you can sum the bus column
tircp.agg({"#_of_buses": "sum"})
# nope this is correct

In [None]:
display(tircp.shape, list(tircp.columns), tircp.head())

In [None]:
tircp.grant_recipient.nunique()

In [None]:
# use strip to help combine names
tircp["grant_recipient"] = tircp["grant_recipient"].str.strip()

tircp.grant_recipient.nunique()

In [None]:
# see list of unique names
# may be able to consolidate a few
tircp.grant_recipient.sort_values().unique()

In [None]:
new_dict = {
    "Antelope Valley Transit Authority": "Antelope Valley Transit Authority (AVTA)",
    "Bay Area Rapid Transit District": "Bay Area Rapid Transit (BART)",
    "Capitol Corridor Joint Powers Authority": "Capitol Corridor Joint Powers Authority (CCJPA)",
    "Los Angeles County Metropolitan Transportation (LA Metro)": "Los Angeles County Metropolitan Transportation Authority (LA Metro)",
    "Los Angeles County Metropolitan Transportation Authority": "Los Angeles County Metropolitan Transportation Authority (LA Metro)",
    "Sacramento Regional Transit (SacRT)": "Sacramento Regional Transit District (SacRT)",
    "Sacramento Regional Transit District": "Sacramento Regional Transit District (SacRT)",
    "San Diego Metropolitan Transit System (SDMTS)": "San Diego Metropolitan Transit System (MTS)",
    "San Francisco Bay Area Water Emergency Transportation Authority": "San Francisco Bay Area Water Emergency Transportation Authority (WETA)",
    "San Francisco Municipal Transportation Agency": "San Francisco Municipal Transportation Authority (SFMTA)",
    "Santa Barbara County Association of Governments\n(SBCAG)": "Santa Barbara County Association of Governments (SBCAG)",
    "Santa Clara Valley Transportation Authority": "Santa Clara Valley Transportation Authority (VTA)",
    "Transportation Agency for Monterey County": "Transportation Agency for Monterey County (TAMC)",
}

In [None]:
# replace the values in grant_recipient using dict
# df.replace({'bus_desc': new_dict}, inplace=True)
tircp = tircp.replace({"grant_recipient": new_dict})

In [None]:
#see that some rows were consolidated
display(tircp.grant_recipient.nunique())

### Agreement Allocations-Export Cleaned data

In [None]:
tircp.to_csv(
    "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_allocations_clean.csv"
)

### Agreement Allocations-Read in Cleaned data from GCS

In [None]:
tircp = pd.read_csv(
    "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_allocations_clean.csv"
)

In [None]:
display(tircp.shape, tircp.columns, tircp.head())

### Agreement Allocations-Cost per Bus, per agency

In [None]:
# filer to project with bus count values
# caveat: some rows in "component" column state some variation of "purchased buses", but did not specify the amount of buses. 
# only rows stating the specificy number of buses purchased are included
only_bus = tircp[tircp['#_of_buses']>0]


In [None]:
display(only_bus.shape)

In [None]:
#aggregate # of buses and allocation by transit agency
bus_cost = only_bus.groupby('grant_recipient').agg({
    '#_of_buses':"sum",
    'allocation_amount':'sum'
}).reset_index()

In [None]:
bus_cost

In [None]:
bus_cost['cost_per_bus']= ((bus_cost['allocation_amount'])/(bus_cost['#_of_buses'])).astype('int64')

In [None]:
display(bus_cost.dtypes,bus_cost)

In [None]:
#exporting cost per bus
bus_cost.to_csv("gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_allocation_cost_per_bus.csv")

### Agreement Allocations - Stat analysis

In [None]:
bus_cost

In [None]:
plt.figure(
plt.hist(bus_cost['cost_per_bus'],density=True)
plt.show()

## PROJECT TRACKING SHEET DATA

### project tracking -  read raw data


In [4]:
url = "gs://calitp-analytics-data/data-analyses/bus_procurement_cost/TIRCP Tracking Sheets 2_1-10-2024.xlsx"
sheet_name = "Project Tracking"

In [5]:
project = pd.read_excel(url, sheet_name)

  warn(msg)


In [6]:
display(
    project.shape,
    project.columns,
    project.dtypes,
)

(124, 49)

Index(['Award Year', 'Project #', 'Grant Recipient', 'Project Title', 'PPNO',
       'District', 'County', 'Project Description', 'bus_count',
       'Master Agreement Number', 'Master Agreement Expiration Date',
       'Project Manager', 'Regional Coordinator',
       'Technical Assistance-CALITP (Y/N)', 'Technical Assistance-Fleet (Y/N)',
       'Technical Assistance-Network Integration (Y/N)',
       'Technical Assistance-Priority Population (Y/N)', 'Total Project Cost',
       'TIRCP Award Amount ($)', 'Allocated Amount', 'Unallocated Amount',
       'Percentage Allocated', 'Expended Amount', 'Other Funds Involved',
       'Award Cycle', 'Is SB1?', 'Is GGRF?', 'Is IIJA?', 'ON SHS?', 'CalITP',
       'Estimated TIRCP GHG Reductions', 'Estemated Project Completion',
       'Estimated TIRCP GHG Reductions2', 'Increased Ridership',
       'Service Integration', 'Improve Safety', 'Project Readiness',
       'Funding Leverage', 'Multi-Agency Coordination/Integration',
       'AB 1550 Com

Award Year                                          int64
Project #                                           int64
Grant Recipient                                    object
Project Title                                      object
PPNO                                               object
District                                           object
County                                             object
Project Description                                object
bus_count                                         float64
Master Agreement Number                            object
Master Agreement Expiration Date                   object
Project Manager                                    object
Regional Coordinator                               object
Technical Assistance-CALITP (Y/N)                  object
Technical Assistance-Fleet (Y/N)                   object
Technical Assistance-Network Integration (Y/N)     object
Technical Assistance-Priority Population (Y/N)     object
Total Project 

## Project Tracking- data cleaning

### data frame cleaning

In [7]:
#only keep first couple of columns
#tircp = tircp.iloc[:, :12]
project = project.iloc[:, :20]

In [8]:
list(project.columns)

['Award Year',
 'Project #',
 'Grant Recipient',
 'Project Title',
 'PPNO',
 'District',
 'County',
 'Project Description',
 'bus_count',
 'Master Agreement Number',
 'Master Agreement Expiration Date',
 'Project Manager',
 'Regional Coordinator',
 'Technical Assistance-CALITP (Y/N)',
 'Technical Assistance-Fleet (Y/N)',
 'Technical Assistance-Network Integration (Y/N)',
 'Technical Assistance-Priority Population (Y/N)',
 'Total Project Cost',
 'TIRCP Award Amount ($)',
 'Allocated Amount']

In [9]:
#drop specific columns
drop_col=[
 'Master Agreement Expiration Date',
 'Project Manager',
 'Regional Coordinator',
 'Technical Assistance-CALITP (Y/N)',
 'Technical Assistance-Fleet (Y/N)',
 'Technical Assistance-Network Integration (Y/N)',
 'Technical Assistance-Priority Population (Y/N)',]

In [10]:
project.drop(columns=drop_col, inplace=True)

In [12]:
len(project.columns)

13

In [13]:
#replace space with _ & lower everything
project.columns =project.columns.str.replace(' ','_')
project.columns=project.columns.str.lower()

In [14]:
#check work
project.columns

Index(['award_year', 'project_#', 'grant_recipient', 'project_title', 'ppno',
       'district', 'county', 'project_description', 'bus_count',
       'master_agreement_number', 'total_project_cost',
       'tircp_award_amount_($)', 'allocated_amount'],
      dtype='object')

### check columns
check values of all columns to see if:
-any duplicates values
-invalid int/str values


In [16]:
project.columns

Index(['award_year', 'project_#', 'grant_recipient', 'project_title', 'ppno',
       'district', 'county', 'project_description', 'bus_count',
       'master_agreement_number', 'total_project_cost',
       'tircp_award_amount_($)', 'allocated_amount'],
      dtype='object')

In [49]:
#function to check column information

def col_checker(col):
    display(f'Displaying column: {col}',
        len(project[col]),
        list(project[col].sort_values(ascending=True).unique())
           )

In [None]:
#col is OK, all numbers
col_checker('tircp_award_amount_($)')

In [None]:
#col is good, everything is a number
col_checker('total_project_cost')

In [None]:
#col is OK
col_checker('master_agreement_number')

In [None]:
#col is OK
col_checker('bus_count')

In [None]:
# column is OK
col_checker('project_description')

In [None]:
project[project['district']=='VAR']

In [None]:
#Project title OK, 
col_checker('project_title')

In [None]:
#award year OK
col_checker('award_year')

In [None]:
#project num OK
col_checker('project_#')

---

In [None]:
# DROP COL
#Col is OK
col_checker('allocated_amount')

In [None]:
# NEEDS CLEANING grant_recipient need to clean
col_checker('grant_recipient')

In [None]:
#may need to clean, there are rows that say '3, 4' 
col_checker('county')

In [None]:
#Move to cleaning, check what is 'VAR'. various?
#may be ok just check to make sure
project.district.unique()

In [None]:
#couldnt run col_checker, guessing because some PPNO numbers are inconsistent
#may need to clean, there is a ppno of CP052/CP053
project.ppno.unique()

### dropping allocated amount column

In [79]:
#dropping allocated amount column
project.drop(columns=['allocated_amount'], inplace=True)

In [80]:
#checking work
project.columns

Index(['award_year', 'project_#', 'grant_recipient', 'project_title', 'ppno',
       'district', 'county', 'project_description', 'bus_count',
       'master_agreement_number', 'total_project_cost',
       'tircp_award_amount_($)'],
      dtype='object')

### Clean `grant_recipient` column

In [83]:
list(project.grant_recipient.sort_values(ascending=True).unique())

['Alameda Contra Costa Transit District (AC Transit)',
 'Anaheim Transportation Network (ATN)',
 'Antelope Valley Transit Authority ',
 'Antelope Valley Transit Authority (AVTA)',
 'Antelope Valley Transit Authority (AVTA) & Long Beach Transit (LBT)',
 'Antelope Valley Transit Authority (AVTA) and Victor Valley Transit Agency',
 'Bay Area Rapid Transit (BART)',
 'Capitol Corridor Joint Powers Authority',
 'Capitol Corridor Joint Powers Authority (CCJPA)',
 'Capitol Corridor Joint Powers Authority (CCJPA) with City of Sacramento, City of Hercules, and Santa Cruz Metropolitan Transit District',
 'Capitol Corridor Joint Powers Authority, City of Sacramento, SacRT, & Downtown Railyard Venture',
 'City of Cupertino ',
 'City of Fresno',
 'City of Glendale and Arroyo Verdugo Communities',
 'City of Inglewood',
 'City of Los Angeles (LA DOT)',
 'City of Oakland',
 'City of Pasadena',
 'City of Richmond',
 'City of Santa Monica',
 'City of Simi Valley',
 'City of Torrance',
 'City of Wasco',
 

In [86]:
new_dict ={
 'Antelope Valley Transit Authority ':'Antelope Valley Transit Authority (AVTA)',
 'Humboldt Transit Authority':'Humboldt Transit Authority (HTA)',
 'Orange County Transportation Authority':'Orange County Transportation Authority (OCTA)',
 'Capitol Corridor Joint Powers Authority':'Capitol Corridor Joint Powers Authority (CCJPA)',
 'Los Angeles County Metropolitan Transportation Authority': 'Los Angeles County Metropolitan Transportation Authority (LA Metro)',
 'Monterey-Salinas Transit':'Monterey-Salinas Transit District (MST)',
 'Sacramento Regional Transit (SacRT)':'Sacramento Regional Transit District (SacRT)',
 'Sacramento Regional Transit District':'Sacramento Regional Transit District (SacRT)',
 'Sacramento Regional Transit District (SacRT) ':'Sacramento Regional Transit District (SacRT)',
 'San Diego Association of Governments': 'San Diego Association of Governments (SANDAG)',
 'Santa Clara Valley Transportation Authority (SCVTA)':'Santa Clara Valley Transportation Authority (VTA)',
 'Southern California  Regional Rail Authority (SCRRA)':'Southern California Regional Rail Authority (SCRRA - Metrolink)',
 'Southern California Regional Rail Authority':'Southern California Regional Rail Authority (SCRRA - Metrolink)',
}

In [87]:
#df.replace({'bus_desc': new_dict}, inplace=True)
project.replace({'grant_recipient': new_dict}, inplace=True)

In [None]:
#check work. looks good
list(project['grant_recipient'].sort_values().unique())

### Cleaning `county` column

In [89]:
col_checker('county')

'Displaying column: county'

124

['3, 4',
 'ALA',
 'CC',
 'FRE',
 'Fresno',
 'HUM',
 'KER',
 'LA',
 'LA ',
 'LA, RIV',
 'LAK',
 'MC',
 'MER',
 'MON',
 'NV',
 'OC',
 'ORA',
 'RIV',
 'SAC',
 'SB',
 'SBD',
 'SC',
 'SCL',
 'SD',
 'SF',
 'SJ',
 'SJ ',
 'SM',
 'SON',
 'SON, MRN',
 'TUL',
 'VAR',
 'VEN',
 'YUB']

In [90]:
project[project['county']=='3, 4']

Unnamed: 0,award_year,project_#,grant_recipient,project_title,ppno,district,county,project_description,bus_count,master_agreement_number,total_project_cost,tircp_award_amount_($)
3,2015,4,Los Angeles-San Diego-San Luis Obispo Rail Cor...,Pacific Surfliner Transit Transfer Program (De...,CP007,VAR,"3, 4",LOSSAN and 12 transit agencies from San Luis O...,,64LOSSANMA-A01,1675000,1675000


In [94]:
#change county value from '3, 4' to 'VAR' like the other rows.
project.at[3,'county']='VAR'

In [98]:
#check work
project.iloc[3]

award_year                                                              2015
project_#                                                                  4
grant_recipient            Los Angeles-San Diego-San Luis Obispo Rail Cor...
project_title              Pacific Surfliner Transit Transfer Program (De...
ppno                                                                   CP007
district                                                                 VAR
county                                                                   VAR
project_description        LOSSAN and 12 transit agencies from San Luis O...
bus_count                                                                NaN
master_agreement_number                                       64LOSSANMA-A01
total_project_cost                                                   1675000
tircp_award_amount_($)                                               1675000
Name: 3, dtype: object

### Cleaning `district`column
This is good as is, no cleaning requried. All rows with VAR district has VAR in county as well.

In [91]:
project.district.unique()

array([7, 4, 'VAR', 5, 12, 3, 11, 10, 6, 8, 2, 1], dtype=object)

In [92]:
project[project['district']=='VAR']

Unnamed: 0,award_year,project_#,grant_recipient,project_title,ppno,district,county,project_description,bus_count,master_agreement_number,total_project_cost,tircp_award_amount_($)
3,2015,4,Los Angeles-San Diego-San Luis Obispo Rail Cor...,Pacific Surfliner Transit Transfer Program (De...,CP007,VAR,"3, 4",LOSSAN and 12 transit agencies from San Luis O...,,64LOSSANMA-A01,1675000,1675000
20,2016,7,Los Angeles-San Diego-San Luis Obispo Rail Cor...,All Aboard: Transforming Southern California R...,CP043,VAR,VAR,LOSSAN Wide Network lntegratlon &\nStrategic i...,,64LOSSANMA-A01,350322000,82000000
26,2016,13,San Joaquin Regional Rail Commission (SJRRC)/ ...,ACE Near-Term Capacity Improvement Program\n,CP025,VAR,VAR,Lengthen platforms for 8 car trains and purcha...,,64SJRRCMA A1,18959000,23259000
38,2018,11,Los Angeles-San Diego-San Luis Obispo Rail Cor...,All Aboard 2018: Transforming SoCal Rail Travel,CP031Y,VAR,VAR,Improve on-time performance and rail corridor ...,,64LOSSANMA-A01,65570000,40412000
39,2018,12,Los Angeles-San Diego-San Luis Obispo Rail Cor...,Building Up: LOSSAN North Improvement Program,CP031,VAR,VAR,Improve on-time performance and rail corridor ...,,64LOSSANMA-A01,201669000,147930000
46,2018,19,San Joaquin Joint Powers Authority (SJJPA) & S...,Valley Rail,CP035Y,VAR,VAR,"Expand Commuter Rail Service between Ceres, Mo...",,64SJRRCMA A1,904600000,500500000
55,2018,28,Transportation Agency for Monterey County (TAMC),Extend rail service to Monterey County,1155A,VAR,VAR,This project will implement the extension of t...,,64TAMCMA,81519000,10148000
63,2020,8,Los Angeles-San Diego-San Luis Obispo Rail Cor...,Building Up Control: LOSSAN Service Enhancemen...,CP066,VAR,VAR,Increase ridership through service restructuri...,,64LOSSANMA-A01,87196969,38743000
97,2023,2,Capitol Corridor Joint Powers Authority (CCJPA...,Capitol Corridor Emerging Market Access Program,CP108,VAR,VAR,The project constructs a two‐story regional bu...,,Pending,163861000,30871000
104,2023,9,High Desert Corridor Joint Powers Agency (HDC ...,High Desert Intercity High-Speed Rail Corridor...,CP115,VAR,VAR,"Funding advances project development, includin...",,,57000000,8000000


### Clean `ppno` column
This should all be fine as is, no cleaning needed

In [None]:
list(project.ppno.unique())

In [102]:
project[project['ppno']=='CP052/CP053']

Unnamed: 0,award_year,project_#,grant_recipient,project_title,ppno,district,county,project_description,bus_count,master_agreement_number,total_project_cost,tircp_award_amount_($)
41,2018,14,Sacramento Regional Transit District (SacRT),Accelerating Rail Modernization and Expansion ...,CP052/CP053,3,SAC,Expanded service to Folsom. Combines\nwith pre...,,64SacRTMA,144350000,64350000


---

## Export cleaned Project df 

In [106]:
#exproject cleaned project df
project.to_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_clean.csv')

## Read in cleaned project data

In [108]:
project = pd.read_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_clean.csv')

In [114]:
#ensure df is able to read in
display(project.shape, project.columns)

(124, 13)

Index(['Unnamed: 0', 'award_year', 'project_#', 'grant_recipient',
       'project_title', 'ppno', 'district', 'county', 'project_description',
       'bus_count', 'master_agreement_number', 'total_project_cost',
       'tircp_award_amount_($)'],
      dtype='object')

### filter df for project descriptions that contain bus

In [111]:
bus_only = project[project['bus_count']>0]

In [112]:
#this looks correct
display(project.shape,
        bus_only.shape)

(124, 13)

(35, 13)

### export project- bus only df

In [115]:
bus_only.to_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.csv')

### Read in project bus only data


In [116]:
bus_only= pd.read_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_bus_only.csv')

In [117]:
display(bus_only.shape,
        bus_only.columns,
        bus_only.head())

(35, 14)

Index(['Unnamed: 0.1', 'Unnamed: 0', 'award_year', 'project_#',
       'grant_recipient', 'project_title', 'ppno', 'district', 'county',
       'project_description', 'bus_count', 'master_agreement_number',
       'total_project_cost', 'tircp_award_amount_($)'],
      dtype='object')

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,award_year,project_#,grant_recipient,project_title,ppno,district,county,project_description,bus_count,master_agreement_number,total_project_cost,tircp_award_amount_($)
0,0,0,2015,1,Antelope Valley Transit Authority (AVTA),Regional Transit Interconnectivity & Environme...,CP005,7,LA,Purchase 13 60-foot articulated BRT buses and ...,13.0,64AVTA2015MA,39478000,24403000
1,5,5,2015,6,Orange County Transportation Authority (OCTA),Bravo! Route 560 Rapid Buses,CP004,12,ORA,Purchase five 40-foot CNG buses for BRT Route ...,40.0,64OCTAMA,2900000,2320000
2,11,11,2015,12,San Joaquin Regional Transit District (SJRTD),BRT Expansion: MLK Corridor and Crosstown Mine...,CP011,10,SJ,Bus rapid transit infrastructure along the MLK...,12.0,64SJRRCMA A1,19118776,6841000
3,16,16,2016,3,Foothill Transit,"Transforming California: Bus Electrification, ...",CP076,7,LA,Purchase 20 zero-emission buses to extend Rout...,20.0,64FOOTHILLMA,16580000,5000000
4,29,29,2018,2,Anaheim Transportation Network (ATN),#Electrify Anaheim: Changing the Transit Parad...,CP027,12,ORA,Deploys 40 zero-emission electric buses to dou...,40.0,64ATNMA A1,45201000,28617000


In [118]:
#inspect columns values.
list(bus_only['grant_recipient'].sort_values().unique())
#everything looks good

['Anaheim Transportation Network (ATN)',
 'Antelope Valley Transit Authority (AVTA)',
 'Antelope Valley Transit Authority (AVTA) & Long Beach Transit (LBT)',
 'City of Fresno',
 'City of Glendale and Arroyo Verdugo Communities',
 'City of Los Angeles (LA DOT)',
 'City of Pasadena',
 'City of Santa Monica',
 'City of Simi Valley',
 'City of Torrance',
 'City of Wasco',
 'Culver City',
 'Foothill Transit',
 'Humboldt Transit Authority (HTA)',
 'Humboldt Transit Authority (HTA) with Yurok Tribe and Redwood Coast Transit Authority',
 'Lake Transit Authority (LTA)',
 'Long Beach Transit (LBT)',
 'Los Angeles County Metropolitan Transportation Authority (LA Metro)',
 'Orange County Transportation Authority (OCTA)',
 'San Joaquin Regional Transit District (SJRTD)',
 'Santa Barbara Metropolitan Transit District (SBMTD) ',
 'Santa Cruz Metropolitan Transit District (Metro)',
 'Santa Monica Big Blue Bus',
 'Shasta Regional Transportation Agency (SRTA)',
 'Solano Transportation Authority (STA)',


### Consolidate up grant recipient name

### aggregate up

In [119]:
#aggregate # of buses and allocation by transit agency
#bus_cost = only_bus.groupby('grant_recipient').agg({
#    '#_of_buses':"sum",
#    'allocation_amount':'sum'
#}).reset_index()

bus_cost = bus_only.groupby('grant_recipient').agg({
    'bus_count':'sum',
    'tircp_award_amount_($)': 'sum'
}).reset_index()

In [120]:
#confirm aggregation worked
bus_cost

Unnamed: 0,grant_recipient,bus_count,tircp_award_amount_($)
0,Anaheim Transportation Network (ATN),65.0,51395000
1,Antelope Valley Transit Authority (AVTA),36.0,35735000
2,Antelope Valley Transit Authority (AVTA) & Lon...,7.0,13156000
3,City of Fresno,6.0,7798000
4,City of Glendale and Arroyo Verdugo Communities,27.0,34648000
5,City of Los Angeles (LA DOT),112.0,36104000
6,City of Pasadena,40.0,14424000
7,City of Santa Monica,113.0,26027000
8,City of Simi Valley,6.0,7053000
9,City of Torrance,10.0,96000000


## create new cost per bus column

In [6]:
bus_cost['cost_per_bus']= (bus_cost['tircp_award_amount_($)']/bus_cost['bus_count']).astype('int64')

In [7]:
#confirm new column was created and values were populated
bus_cost.sort_values('cost_per_bus')

Unnamed: 0,grant_recipient,bus_count,tircp_award_amount_($),cost_per_bus
22,Santa Monica Big Blue Bus,7.0,1105000,157857
7,City of Santa Monica,113.0,26027000,230327
12,Foothill Transit,20.0,5000000,250000
5,City of Los Angeles (LA DOT),112.0,36104000,322357
10,City of Wasco,3.0,1000000,333333
6,City of Pasadena,40.0,14424000,360600
19,San Joaquin Regional Transit District (SJRTD),12.0,6841000,570083
18,Orange County Transportation Authority (OCTA),73.0,41727000,571602
23,Shasta Regional Transportation Agency (SRTA),14.0,8641000,617214
11,Culver City,5.0,3247000,649400


## Export cost per bus via project tracking sheet to gcs

In [8]:
bus_cost.to_csv('gs://calitp-analytics-data/data-analyses/bus_procurement_cost/tircp_project_cost_per_bus.csv')