# Bus Cost Data Schema

In [21]:
import pandas as pd
from IPython.display import Markdown, display
from _bus_cost_utils import GCS_PATH

Bus Cost Analysis takes in data from multiple sources, cleans, filters down and joins them together to get an aggregated dataset that show a Trasnit Agency's price per Zero-Emission Bus (ZEB).

Data was pulled from 3 sources
- FTA FY 23 Bus Award
- TIRCP Tracking Sheet
- DGS Usage Report

The final, compiled output contains data related to bus size, propulsion type, bus count and cost

## FTA FY 23 Bus Award Data

In [3]:
# Final
fta = pd.read_parquet(f"{GCS_PATH}clean_fta_bus_only.parquet")

## TIRCP Data

In [5]:
# Final
tircp = pd.read_parquet(f"{GCS_PATH}clean_tircp_bus_only.parquet")

## DGS Usage Report

In [7]:
# Final
dgs = pd.read_parquet(f"{GCS_PATH}clean_dgs_bus_only_w_options.parquet")

## Final output

In [8]:
final= pd.read_parquet(f'{GCS_PATH}cleaned_no_outliers_cpb_analysis_data_merge.parquet')

## Schema

[![](https://mermaid.ink/img/pako:eNqlVMtu2zAQ_BWBZ6VoY8etdQsSpAjQFkWTU2GAoKmVzUDisuQSrfz491Ki7biV5LykgyjOiLs7O9o1k5gDyxjYayUWVlQznYTr5v4y2WzOzjab5Ob22-WXJEtmLNwHcB2XzeXIKr1IjMUHkMSdQe3QDuKkqIQOquE3DwzDqTbAC6VzsL2kuXfcqRW0xA4jByetMqRQD4WISfzzsdI0GSeF13ng_b_dBJToNUVgGx_3tz-uvg9LFOGuSEFhTdyCVEbB_shjkYzGPuVMf7mnxdgX-xZRCEmUB4ZEd5RzUaIYVuj6892wPg3YVQdt6HpYcLEALWuuRdVTlbdyKVxDa_lc-2p-bJaY-C8flFZUn3baSYt1QIch9oBEj9Ls6o9Fd4ukYAGnaFfjC3-T11rh7RZ4VutPaPWEwYeNGpNownMTuh0idtNYOYkWeD9pjlgmKuRp5hw9lWpvli1LWQW2EioPE7Dt1IzREoLpWGPWHArhS2o821CFJ7yrtWQZWQ8ps-gXS5YVonThzZtcEOyG6GEXckVov8YZ247alBmhWbZmf1h2_nH8bnQxmo4n04tP55PpeJSymmXvtylbIYZTPrTsn-26Cbr9C7Zlz78?type=png)](https://mermaid.live/edit#pako:eNqlVMtu2zAQ_BWBZ6VoY8etdQsSpAjQFkWTU2GAoKmVzUDisuQSrfz491Ki7biV5LykgyjOiLs7O9o1k5gDyxjYayUWVlQznYTr5v4y2WzOzjab5Ob22-WXJEtmLNwHcB2XzeXIKr1IjMUHkMSdQe3QDuKkqIQOquE3DwzDqTbAC6VzsL2kuXfcqRW0xA4jByetMqRQD4WISfzzsdI0GSeF13ng_b_dBJToNUVgGx_3tz-uvg9LFOGuSEFhTdyCVEbB_shjkYzGPuVMf7mnxdgX-xZRCEmUB4ZEd5RzUaIYVuj6892wPg3YVQdt6HpYcLEALWuuRdVTlbdyKVxDa_lc-2p-bJaY-C8flFZUn3baSYt1QIch9oBEj9Ls6o9Fd4ukYAGnaFfjC3-T11rh7RZ4VutPaPWEwYeNGpNownMTuh0idtNYOYkWeD9pjlgmKuRp5hw9lWpvli1LWQW2EioPE7Dt1IzREoLpWGPWHArhS2o821CFJ7yrtWQZWQ8ps-gXS5YVonThzZtcEOyG6GEXckVov8YZ247alBmhWbZmf1h2_nH8bnQxmo4n04tP55PpeJSymmXvtylbIYZTPrTsn-26Cbr9C7Zlz78)

In [37]:
print(
f"""Data Schema: 
    
FTA FY 23 Bus Award:
{fta.dtypes}
    
TIRCP Tracking Sheet:
{tircp.dtypes}
    
DGS Usage Report:
{dgs.dtypes}
    
Final Output:
{final.dtypes}
"""
)

Data Schema: 
    
FTA FY 23 Bus Award:
project_sponsor         object
project_title           object
new_prop_type_finder    object
new_bus_size_type       object
description             object
new_project_type        object
funding                  int64
bus_count                int64
dtype: object
    
TIRCP Tracking Sheet:
grant_recipient         object
ppno                    object
prop_type               object
bus_size_type           object
project_description     object
new_project_type        object
total_project_cost       int64
bus_count              float64
dtype: object
    
DGS Usage Report:
ordering_agency_name     object
purchase_order_number    object
quantity                  int64
new_prop_type            object
new_bus_size             object
source                   object
total_cost                int64
dtype: object
    
Final Output:
transit_agency          object
project_title           object
prop_type               object
bus_size_type           object
descr

In [44]:
column_dict = {
        "quantity": "bus_count",
        "new_bus_size": "bus_size_type",
        "new_bus_size_type": "bus_size_type",        
        "purchase_order_number": "ppno",
        "new_prop_type": "prop_type",
        "new_prop_type_finder": "prop_type",
        "grant_recipient": "transit_agency",
        "ordering_agency_name": "transit_agency",
        "project_sponsor": "transit_agency",    
        "funding": "total_cost",
        "total_project_cost": "total_cost",
    }

In [59]:
print(f"""Dictionary for columns names:""")
column_dict


Dictionary for columns names:


{'quantity': 'bus_count',
 'new_bus_size': 'bus_size_type',
 'new_bus_size_type': 'bus_size_type',
 'purchase_order_number': 'ppno',
 'new_prop_type': 'prop_type',
 'new_prop_type_finder': 'prop_type',
 'grant_recipient': 'transit_agency',
 'ordering_agency_name': 'transit_agency',
 'project_sponsor': 'transit_agency',
 'funding': 'total_cost',
 'total_project_cost': 'total_cost'}