# Bus Cost Data Schema

In [21]:
import pandas as pd
from IPython.display import Markdown, display
from _bus_cost_utils import GCS_PATH

Bus Cost Analysis takes in data from multiple sources, cleans, filters and merges them together to get an aggregated dataset that show a Trasnit Agency's price per Zero-Emission Bus (ZEB).

Data was pulled from 3 sources
- FTA FY 23 Bus Award List
- TIRCP Tracking Sheet
- DGS Usage Report

The final, combined output contains data related to bus size, propulsion type, bus count and unit cost

## FTA FY 23 Bus Award Data

In [62]:
# initial
fta_raw = pd.read_csv(f"{GCS_PATH}raw_data-analyses_bus_procurement_cost_fta_press_release_data_csv.csv")

In [67]:
list(fta_raw.columns)

['State',
 'Project Sponsor',
 'Project Title',
 'Description',
 'Funding',
 'approx # of buses',
 'project type',
 'propulsion type',
 'area served',
 'congressional districts',
 'FTA Region',
 'Bus/Low-No program']

In [3]:
# Final
fta = pd.read_parquet(f"{GCS_PATH}clean_fta_bus_only.parquet")

## TIRCP Data

In [63]:
#initial
file_name = "raw_TIRCP Tracking Sheets 2_1-10-2024.xlsx"
tircp_name = "Project Tracking"

# read in data
tircp_raw = pd.read_excel(f"{GCS_PATH}{file_name}", sheet_name=tircp_name)

  warn(msg)


In [68]:
list(tircp_raw.columns)

['Award Year',
 'Project #',
 'Grant Recipient',
 'Project Title',
 'PPNO',
 'District',
 'County',
 'Project Description',
 'bus_count',
 'Master Agreement Number',
 'Master Agreement Expiration Date',
 'Project Manager',
 'Regional Coordinator',
 'Technical Assistance-CALITP (Y/N)',
 'Technical Assistance-Fleet (Y/N)',
 'Technical Assistance-Network Integration (Y/N)',
 'Technical Assistance-Priority Population (Y/N)',
 'Total Project Cost',
 'TIRCP Award Amount ($)',
 'Allocated Amount',
 'Unallocated Amount',
 'Percentage Allocated',
 'Expended Amount',
 'Other Funds Involved',
 'Award Cycle',
 'Is SB1?',
 'Is GGRF?',
 'Is IIJA?',
 'ON SHS?',
 'CalITP',
 'Estimated TIRCP GHG Reductions',
 'Estemated Project Completion',
 'Estimated TIRCP GHG Reductions2',
 'Increased Ridership',
 'Service Integration',
 'Improve Safety',
 'Project Readiness',
 'Funding Leverage',
 'Multi-Agency Coordination/Integration',
 'AB 1550 Community Benefits',
 'Housing Co-Benefits',
 'Local Agency Address'

In [5]:
# Final
tircp = pd.read_parquet(f"{GCS_PATH}clean_tircp_bus_only.parquet")

## DGS Usage Report

In [65]:
# initial

file_17c = "raw_17c compiled-Proterra Compiled Contract Usage Report .xlsx"
file_17b = "raw_17b compiled.xlsx"
sheet_17c = "Proterra "
sheet_17b = "Usage Report Template"

dgs_17c = pd.read_excel(f"{GCS_PATH}{file_17c}", sheet_name=sheet_17c)
dgs_17b = pd.read_excel(f"{GCS_PATH}{file_17b}", sheet_name=sheet_17b)

In [69]:
list(dgs_17c.columns)

['Supplier Contract Usage ID',
 'Ordering Agency Name',
 'State (S) or Local (L) agency',
 'Purchasing Authority Number                    (for State departments)',
 'Agency Billing Code',
 'Purchase Order Number',
 'Purchase Order Date',
 'Delivery Date',
 'Contract Line Item Number (CLIN)                (RFP ID)',
 'UNSPSC Code\n(Version 10)',
 'Manufacturer Part Number (OEM #)',
 'Manufacturer (OEM)',
 'SKU # / Item #',
 'Item Description',
 'Unit of Measure',
 'Quantity in \nUnit of Measure\n',
 'EPP (Y/N)',
 'Quantity',
 'List Price/MSRP',
 'Index Date / Catalog Version',
 'Contract Unit Price',
 'Contract Discount',
 'Extended Contract Price Paid',
 'Core/ NonCore',
 'Group ID/ Segment ID']

In [7]:
# Final
dgs = pd.read_parquet(f"{GCS_PATH}clean_dgs_bus_only_w_options.parquet")

## Final output

In [8]:
final= pd.read_parquet(f'{GCS_PATH}cleaned_no_outliers_cpb_analysis_data_merge.parquet')

In [44]:
column_dict = {
        "quantity": "bus_count",
        "new_bus_size": "bus_size_type",
        "new_bus_size_type": "bus_size_type",        
        "purchase_order_number": "ppno",
        "new_prop_type": "prop_type",
        "new_prop_type_finder": "prop_type",
        "grant_recipient": "transit_agency",
        "ordering_agency_name": "transit_agency",
        "project_sponsor": "transit_agency",    
        "funding": "total_cost",
        "total_project_cost": "total_cost",
    }

## Schema

[![](https://mermaid.ink/img/pako:eNqllF1v0zAUhv-K5esMwdoVmrtp09A0QBPbFapkuc5pa5b4GPtYkH78d5y462BJyseSizg-r3POefzGG66wAJ5zcJdaLp2sZobF6-r-nG23JyfbLbu6_nT-geVsxuN9CG7SsLk8OW2WzDr8CoqEt2g8OnZ7MyghTSV0oga-i6iwgmoLYqFNAa5XNA9eeL2GVthRFOCV05Y0mqEUqYjfFmtDkzFbBFNE3fPpJqHCYCgFdulxf_354naYUgp3OUXIhoQDpa0GQ72crME-eLa_4-M8Hvt9CRdCkuVBodDTk2JRohyGdPn-bhhRE-wCQhc3Pg6EXIJRtTCygl5KwamV9I2yXSJMqOa_WibV_i1E3prq4347arRO0GPMPUDpic4eQeq72ydFI3hN-zb__X_5X0O83Ah_ZYAjuP5g82G7piKa9MLGDY8Zu2WsvUIHol80RyyZjnXaucBApX70y45nvAJXSV3E07DdrBmnFVQw441lC-keGts2OhkI72qjeE4uQMYdhuWK5wtZ-vgWbCEJ9qfpYRYKTeg-psO2PXMzbqXh-Yb_4Pnp2_Gr0dloOp5Mz96dTqbjUcZrnr_eZXyNGL_yplV_acdN0t1PgTLRaA?type=png)](https://mermaid.live/edit#pako:eNqllF1v0zAUhv-K5esMwdoVmrtp09A0QBPbFapkuc5pa5b4GPtYkH78d5y462BJyseSizg-r3POefzGG66wAJ5zcJdaLp2sZobF6-r-nG23JyfbLbu6_nT-geVsxuN9CG7SsLk8OW2WzDr8CoqEt2g8OnZ7MyghTSV0oga-i6iwgmoLYqFNAa5XNA9eeL2GVthRFOCV05Y0mqEUqYjfFmtDkzFbBFNE3fPpJqHCYCgFdulxf_354naYUgp3OUXIhoQDpa0GQ72crME-eLa_4-M8Hvt9CRdCkuVBodDTk2JRohyGdPn-bhhRE-wCQhc3Pg6EXIJRtTCygl5KwamV9I2yXSJMqOa_WibV_i1E3prq4347arRO0GPMPUDpic4eQeq72ydFI3hN-zb__X_5X0O83Ah_ZYAjuP5g82G7piKa9MLGDY8Zu2WsvUIHol80RyyZjnXaucBApX70y45nvAJXSV3E07DdrBmnFVQw441lC-keGts2OhkI72qjeE4uQMYdhuWK5wtZ-vgWbCEJ9qfpYRYKTeg-psO2PXMzbqXh-Yb_4Pnp2_Gr0dloOp5Mz96dTqbjUcZrnr_eZXyNGL_yplV_acdN0t1PgTLRaA)

The 3 data sources are cleaned and aggregated by transit agency. Then the data is merged into the final table, the main columns of interest being:
- transit_agency
- prop_type (propulsion type)
- bus_size_type
- total_cost
- bus_count
- cost_per_bus
- zscore_cost_per_bus

In [37]:
print(
f"""Data Schema: 
    
FTA FY 23 Bus Award:
{fta.dtypes}
    
TIRCP Tracking Sheet:
{tircp.dtypes}
    
DGS Usage Report:
{dgs.dtypes}
    
Final Output:
{final.dtypes}
"""
)

Data Schema: 
    
FTA FY 23 Bus Award:
project_sponsor         object
project_title           object
new_prop_type_finder    object
new_bus_size_type       object
description             object
new_project_type        object
funding                  int64
bus_count                int64
dtype: object
    
TIRCP Tracking Sheet:
grant_recipient         object
ppno                    object
prop_type               object
bus_size_type           object
project_description     object
new_project_type        object
total_project_cost       int64
bus_count              float64
dtype: object
    
DGS Usage Report:
ordering_agency_name     object
purchase_order_number    object
quantity                  int64
new_prop_type            object
new_bus_size             object
source                   object
total_cost                int64
dtype: object
    
Final Output:
transit_agency          object
project_title           object
prop_type               object
bus_size_type           object
descr