# Revenue Vehicles for NTD agencies with no GTFS RT

* `dim_annual_ntd_agency_information`
* `dim_organizations`

[Slack thread](https://cal-itp.slack.com/archives/C02H6JUSS9L/p1688662549443129)

In [1]:
import pandas as pd
from calitp_data_analysis.tables import tbls
from calitp_data_analysis.sql import to_snakecase
from siuba import *



In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Organizations

In [3]:
def dim_orgs()->pd.DataFrame:
    # https://dbt-docs.calitp.org/#!/model/model.calitp_warehouse.dim_organizations
    df = (tbls.mart_transit_database.dim_organizations() 
        >> filter(_._is_current == True)
        >> select(_.ntd_id, _.name, 
                  _.gtfs_static_status, _.gtfs_realtime_status)
        >> collect()
        >> distinct()
       )    
    
    df = df[df.ntd_id.notna()].reset_index(drop=True)
    
    return df

In [4]:
orgs = dim_orgs()

In [5]:
orgs.name.value_counts().head()

Yurok Tribe              1
Solano County Transit    1
City of West Covina      1
City of Baldwin Park     1
City of Bell Gardens     1
Name: name, dtype: int64

In [6]:
orgs.shape

(222, 4)

### NTD

In [7]:
def ntd_data(year: int) -> pd.DataFrame:
    
    df = (tbls.mart_ntd.dim_annual_ntd_agency_information() 
          >> filter(_.year == year, _.state == "CA")
          >> select(_.ntd_id, _.total_voms, _.agency_name, _.state)
          >> collect()
         )

    return df

In [8]:
ntd = ntd_data(2021)

In [9]:
ntd.head()

Unnamed: 0,ntd_id,total_voms,agency_name,state
0,90003,512.0,San Francisco Bay Area Rapid Transit District,CA
1,90004,91.0,Golden Empire Transit District,CA
2,90006,80.0,Santa Cruz Metropolitan Transit District,CA
3,90007,72.0,City of Modesto,CA
4,90008,128.0,City of Santa Monica,CA


In [10]:
df = pd.merge(
    orgs,
    ntd,
    on = "ntd_id",
    how = "outer",
    validate = "m:1",
    indicator=True
)

df._merge.value_counts()

both          212
right_only     26
left_only      10
Name: _merge, dtype: int64

In [11]:
# Who hasn't merged on?
df[df._merge=="right_only"].agency_name.unique()

array(['Metropolitan Transportation Commission',
       'Los Angeles County Dept. of Public Works - Athens Shuttle Service',
       'Los Angeles County Department of Public Works - Avocado Heights',
       'Los Angeles County Department of Public Works - East Valinda',
       'Los Angeles County Dept. of Public Works - Florence-Firestone',
       'Los Angeles County Dept. of Public Works - King Medical Center Shuttle Service',
       'Los Angeles County Dept. of Public Works - Lennox Shuttle',
       'Los Angeles County Department of Public Works - South Whittier',
       'Los Angeles County Department of Public Works - Whittier',
       'Los Angeles County Dept. of Public Works - Willowbrook Shuttle',
       'Los Angeles County Dept. of Public Works - Willowbrook et al.',
       'Riverfront Joint Powers Authority',
       'SACRAMENTO AREA COUNCIL OF GOVERNMENTS FINANCING CORPORATION',
       'Pomona Valley Transportation Authority', 'Elk Valley Rancheria',
       'Los Angeles County M

In [29]:
df.sample()

Unnamed: 0,ntd_id,name,gtfs_static_status,gtfs_realtime_status,total_voms,agency_name,state,_merge
114,90289,City of Rosemead,Static OK,RT Incomplete,7.0,City of Rosemead,CA,both


In [30]:
df.loc[df.ntd_id == "9R02-91116"]

Unnamed: 0,ntd_id,name,gtfs_static_status,gtfs_realtime_status,total_voms,agency_name,state,_merge
1,9R02-91116,Alpine County,Static OK,RT Incomplete,1.0,Alpine County Local Transportation Commission,CA,both


In [12]:
no_rt_vehicles = df[df.gtfs_realtime_status == "RT Incomplete"].total_voms.sum()
yes_rt_vehicles = df[df.gtfs_realtime_status == "RT OK"].total_voms.sum()

In [13]:
no_rt_vehicles / (no_rt_vehicles + yes_rt_vehicles)

0.2779102240991758

In [14]:
yes_rt_vehicles

10601.0

In [28]:
no_rt_vehicles

4080.0

### Revenue Vehicles

In [15]:
rev_vehicle_url = "gs://calitp-analytics-data/data-analyses/2021-Annual-Database-Files/2021 Revenue Vehicle Inventory.xlsx"

In [16]:
rev_vehicle = to_snakecase(pd.read_excel(rev_vehicle_url))

In [17]:
rev_vehicle.columns

Index(['ntd_id', 'agency_name', 'reporter_type', 'reporting_module',
       'group_plan_sponsor_ntdid', 'group_plan_sponsor_name', 'modes',
       'revenue_vehicle_inventory_id', 'agency_fleet_id',
       'modetos_vehicles_operated_in_maximum_service', 'total_fleet_vehicles',
       'dedicated_fleet', 'vehicle_type', 'ownership_type', 'funding_source',
       'manufacture_year', 'rebuild_year', 'type_of_last_renewal',
       'useful_life_benchmark', 'manufacturer',
       'other_manufacturer_description', 'model', 'active_fleet_vehicles',
       'ada_fleet_vehicles', 'emergency_contingency_vehicles', 'fuel_type',
       'vehicle_length', 'seating_capacity', 'standing_capacity',
       'total_miles_on_active_vehicles_during_period',
       'average_lifetime_miles_per_active_vehicles',
       'no_capital_replacement_flag', 'separate_asset_flag',
       'event_data_recorders', 'emergency_lighting_system_design',
       'emergency_signage', 'emergency_path_marking',
       'automated_vehic

In [18]:
rev_vehicle2 = rev_vehicle.groupby(['ntd_id']).agg({'total_fleet_vehicles':'sum','active_fleet_vehicles':'sum'}).reset_index()

In [19]:
rev_vehicle2.sample(2)

Unnamed: 0,ntd_id,total_fleet_vehicles,active_fleet_vehicles
894,90041,115,111
1895,5R05-50293,15,15


In [22]:
pd.merge(orgs, rev_vehicle2, on ='ntd_id', how = 'outer', indicator = True)[['_merge']].value_counts()

_merge    
right_only    2714
left_only      164
both            58
dtype: int64

In [23]:
m1 = pd.merge(orgs, rev_vehicle2, on ='ntd_id', how = 'left')
m1 = m1.drop_duplicates()

In [24]:
m1.shape

(222, 6)

In [25]:
m1[['total_fleet_vehicles','active_fleet_vehicles']] = m1[['total_fleet_vehicles','active_fleet_vehicles']].fillna(0)

In [27]:
m1.loc[m1.total_fleet_vehicles != 0].head(10)

Unnamed: 0,ntd_id,name,gtfs_static_status,gtfs_realtime_status,total_fleet_vehicles,active_fleet_vehicles
1,9R02-91116,Alpine County,Static OK,RT Incomplete,3.0,3.0
4,9R02-91041,City of Dixon,Static OK,RT Incomplete,13.0,13.0
5,9R02-99426,City of Wasco,Static OK,RT Incomplete,3.0,3.0
6,9R02-91119,Plumas County,Static OK,RT Incomplete,11.0,10.0
10,9R02-91040,City of Dinuba,Static OK,RT Incomplete,10.0,10.0
18,9R02-91082,Mariposa County,Static OK,RT Incomplete,10.0,10.0
19,9R02-91101,Town of Truckee,Static OK,RT Incomplete,8.0,8.0
21,9R02-91002,City of Corcoran,Static OK,RT Incomplete,6.0,6.0
25,9R02-91079,City of Woodlake,Static OK,RT Incomplete,3.0,3.0
30,9R02-91110,City of McFarland,Static OK,RT Incomplete,2.0,2.0
