## Add NTD Data to Section 1

In [1]:
import _operators_prep as op_prep
import pandas as pd
import _section1_utils

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
import os
from calitp_data_analysis.sql import query_sql
from calitp_data_analysis.tables import tbls
from siuba import *

### NTD 
* https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.dim_annual_ntd_agency_information
* https://www.transit.dot.gov/ntd/data-product/2022-annual-database-agency-information
* 

In [4]:
def load_ntd(year: int) -> pd.DataFrame:
    df = (
        tbls.mart_ntd.dim_annual_ntd_agency_information()
        >> filter(_.year == year, _.state == "CA", _._is_current == True)
        >> select(
            _.number_of_state_counties,
            _.uza_name,
            _.density,
            _.number_of_counties_with_service,
            _.state_admin_funds_expended,
            _.service_area_sq_miles,
            _.population,
            _.service_area_pop,
            _.subrecipient_type,
            _.primary_uza,
            _.reporter_type,
            _.organization_type,
            _.agency_name,
            _.voms_pt,
            _.voms_do,
            _.ntd_id
        )
        >> collect()
    )

    cols = list(df.columns)
    df2 = df.sort_values(by=cols, na_position="last")
    df3 = df2.groupby("agency_name").first().reset_index()

    return df3

In [5]:
ntd_df = load_ntd(2022)

In [6]:
ntd_df.head(2)

Unnamed: 0,agency_name,number_of_state_counties,uza_name,density,number_of_counties_with_service,state_admin_funds_expended,service_area_sq_miles,population,service_area_pop,subrecipient_type,primary_uza,reporter_type,organization_type,voms_pt,voms_do,ntd_id
0,Access Services,,"Los Angeles--Long Beach--Anaheim, CA",7476.0,,,1621.0,12237376.0,11638106.0,,,Full Reporter,Public Agency or Authority of Transit Service,719.0,,90157
1,Alameda-Contra Costa Transit District,,"San Francisco--Oakland, CA",6943.0,,,364.0,3515933.0,1586454.0,,,Full Reporter,Public Agency or Authority of Transit Service,147.0,399.0,90014


In [7]:
org_name = "City of Fairfield"

In [9]:
op_df = _section1_utils.load_operator_profiles(org_name)

In [10]:
op_df.head(2)

Unnamed: 0,schedule_gtfs_dataset_key,# Routes,# Trips,# Shapes,# Stops,# Arrivals,Operator Service Miles,Avg Arrivals per Stop,# Downtown Local Route Types,# Local Route Types,# Coverage Route Types,# Rapid Route Types,# Express Route Types,# Rail Route Types,Transit Operator,Organization ID,Organization,Date
13,0f5e1b251db53223200c5bfc365d33f2,5,185,11,124,2948,41.73,23.77,0,2,8,9,0,0,Bay Area 511 Fairfield and Suisun Transit Schedule,recot6qBamlOoLcrM,City of Fairfield,2024-04-17


###  Mobility Marketplace provider map
* https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.dim_mobility_mart_providers

In [None]:
def load_mobility()->pd.DataFrame:
    df = (
    tbls.mart_transit_database.dim_mobility_mart_providers()
     >> select(
        _.agency_name,
        _.counties_served,
        _.hq_city,
        _.hq_county,
        _.is_public_entity,
        _.is_publicly_operating,
        _.funding_sources,
        _.on_demand_vehicles_at_max_service,
        _.vehicles_at_max_service
    )
    >> collect()
    )
    
    cols = list(df.columns)
    df2 = df.sort_values(by=cols, na_position='last')
    df2 = df2.sort_values(by=["on_demand_vehicles_at_max_service","vehicles_at_max_service"], ascending = [False, False])
    df3 = df2.groupby('agency_name').first().reset_index()
    return df3

#### Merge mobility w/ NTD

In [None]:
def merge_ntd_mobility(year:int)->pd.DataFrame:
    ntd = load_ntd(year)
    mobility = load_mobility()
    m1 = pd.merge(
    mobility,
    ntd,
    how="inner",
    on="agency_name")
    agency_dict = {
    "City of Fairfield, California": "City of Fairfield",
    "Livermore / Amador Valley Transit Authority": "Livermore-Amador Valley Transit Authority",
    "Nevada County Transit Services": "Nevada County",
    "Omnitrans": "OmniTrans"}
    
    m1.agency_name = m1.agency_name.replace(agency_dict)
    m1.agency_name = m1.
    return m1

In [None]:
op_profiles = op_prep.operators_with_rt()[['organization_name']]

In [None]:
op_profiles.organization_name.unique()

In [None]:
ntd_mobility_df = merge_ntd_mobility(2022)

In [None]:
ntd_mobility_df.loc[ntd_mobility_df.agency_name.str.contains("Los Angeles")].agency_name.unique()

#### Merge mobility w/ relevant operators

In [None]:
def ntd_operator_info(year:int)->pd.DataFrame:
    ntd_mobility_df = merge_ntd_mobility(year)
    op_profiles = op_prep.operators_with_rt()[['organization_name']]
    m1 = pd.merge(op_profiles, ntd_mobility_df,
                 how = "inner", left_on = ["organization_name"],
                 right_on = ["agency_name"])
    return m1

In [None]:
ntd_operator_info(2022)

### Dim Organizations
* https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.dim_organizations

In [None]:
# rganizations = tbls.mart_transit_database.dim_organizations() >> collect()

In [None]:
# organizations.head(2)