# Adding & Calulating Door Data for 5311 Agencies

### Script work and analysis for caculating fleet age and doors for agencies

* adding door counts using [Eric Dasmalchi's Notebook](https://github.com/cal-itp/data-analyses/blob/572e255f997e7fa0969c88caf23a7791070ad28a/lossan_validators/lossan_validators.ipynb) count for the known 5311 Agencies
 
Eric's estimates: 
`({'bus':2, 'articulated_bus':3,
'over_the_road_bus':1,
'vintage_historic_trolley':2,
'trolleybus':2,
'van':1,
'cutaway':1, 
'automobile':1,
minivan':1,
'sport_utility_vehicle:':1,
'lrt_stn':6, 'hrt_stn':15,
'la_union_stn':40,
'commuter_rail_stn':6,
'interchange':20, ## interchange is for places like 7th/Metro
'ferry_stn':4})`

In [1]:
import numpy as np
import pandas as pd
from siuba import *
import altair as alt
import altair_saver
from calitp import *
from plotnine import *
import intake



In [2]:
from shared_utils import geography_utils

In [3]:
import _data_prep

In [64]:
#df = data_prep.load_grantprojects()
#vehicles = _data_prep.clean_vehicles_data()

In [68]:
# vehicles.head(2)

In [69]:
vehicles = _data_prep.load_vehicle_data()
vehicles2 = _data_prep.initial_cleaning(vehicles)

In [70]:
vehicles2

Unnamed: 0,agency,city,state,legacy_ntd_id,ntd_id,organization_type,reporter_type,primary_uza_population,agency_voms,vehicle_type,...,_21_25,_26_30,_31_60,_60plus,total_vehicles,average_age_of_fleet__in_years_,average_lifetime_miles_per_vehicle,_0_9,_10_12,vehicle_groups
25,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Van,...,0,0,0,0,1208,1.440397,3.145661e+04,1208,0,Van
26,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Trucks and other Rubber Tire Vehicles (Service),...,8,5,6,1,961,,,467,273,Service
27,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Steel Wheel Vehicles (Service),...,2,0,0,0,8,,,5,1,Service
28,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Light Rail Vehicle,...,6,9,0,0,286,7.234266,5.161625e+05,193,42,Train
29,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Heavy Rail Passenger Car,...,62,28,0,0,94,23.382979,1.426841e+06,0,0,Train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7550,City of Sierra Madre,Sierra Madre,CA,,A0003-99447,"City, County or Local Government Unit or Depar...",Reduced Asset Reporter,0,0,Cutaway,...,0,0,0,0,3,12.000000,0.000000e+00,0,3,Van
7551,City of El Segundo,El Segundo,CA,,A0003-99449,"City, County or Local Government Unit or Depar...",Reduced Asset Reporter,0,0,Cutaway,...,0,0,0,0,5,12.200000,0.000000e+00,3,0,Van
7552,City of Hawaiian Gardens,Hawaiian Gardens,CA,,A0003-99450,"City, County or Local Government Unit or Depar...",Reduced Asset Reporter,0,0,Minivan,...,0,0,0,0,4,4.000000,0.000000e+00,3,1,Van
7553,City of Hawaiian Gardens,Hawaiian Gardens,CA,,A0003-99450,"City, County or Local Government Unit or Depar...",Reduced Asset Reporter,0,0,Bus,...,0,0,0,0,4,9.250000,0.000000e+00,2,0,Bus


# Function Work

## Grouping Vehicles

In [19]:
vehicles2>>count(_.vehicle_type)>>arrange(-_.n)

Unnamed: 0,vehicle_type,n
9,Cutaway,170
4,Bus,133
20,Trucks and other Rubber Tire Vehicles (Service),94
3,Automobiles (Service),79
21,Van,64
14,Minivan,63
16,Over-the-road Bus,24
0,Articulated Bus,16
2,Automobile,11
17,Sports Utility Vehicle,10


In [None]:
#rerunning loading vehicle data with the updated function in data_prep

In [22]:
vehicles2>>group_by(_.vehicle_groups)>>count(_.vehicle_type)

Unnamed: 0,vehicle_groups,vehicle_type,n
0,Automobiles,Automobile,11
1,Automobiles,Sports Utility Vehicle,10
2,Bus,Articulated Bus,16
3,Bus,Bus,133
4,Bus,Double Decker Bus,4
5,Bus,Over-the-road Bus,24
6,Bus,Trolleybus,1
7,Other,Ferryboat,4
8,Other,Other,1
9,Service,Automobiles (Service),79


In [23]:
#vehicles2 = vehicles2.rename(columns={'_60+': '_60plus'})
vehicles2.head()

Unnamed: 0,agency,city,state,legacy_ntd_id,ntd_id,organization_type,reporter_type,primary_uza_population,agency_voms,vehicle_type,...,_21_25,_26_30,_31_60,_60plus,total_vehicles,average_age_of_fleet__in_years_,average_lifetime_miles_per_vehicle,_0_9,_10_12,vehicle_groups
25,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Van,...,0,0,0,0,1208,1.440397,31456.61,1208,0,Van
26,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Trucks and other Rubber Tire Vehicles (Service),...,8,5,6,1,961,,,467,273,Service
27,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Steel Wheel Vehicles (Service),...,2,0,0,0,8,,,5,1,Service
28,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Light Rail Vehicle,...,6,9,0,0,286,7.234266,516162.5,193,42,Train
29,Los Angeles County Metropolitan Transportation...,Los Angeles,CA,9154,90154,Independent Public Agency or Authority of Tran...,Full Reporter,12150996,3482,Heavy Rail Passenger Car,...,62,28,0,0,94,23.382979,1426841.0,0,0,Train


In [24]:
types = (vehicles2
    >>select(_.agency, _.vehicle_groups, _._0_9, _._10_12, _._13_15, _._16_20, _._21_25, _._26_30, _._31_60, _._60plus))

In [25]:
types['sum_type'] = types[['_0_9', '_10_12', '_13_15', '_16_20', '_21_25','_26_30','_31_60','_60plus']].sum(axis=1)

In [26]:
types = (types>>select(_.agency, _.vehicle_groups, _.sum_type))

In [27]:
#https://towardsdatascience.com/pandas-pivot-the-ultimate-guide-5c693e0771f3
types = (types.pivot_table(index="agency", columns="vehicle_groups", values="sum_type", aggfunc=np.sum, fill_value=0)).reset_index()

In [28]:
types

vehicle_groups,agency,Automobiles,Bus,Other,Service,Train,Van
0,Access Services,12,0,0,17,0,900
1,"Alameda-Contra Costa Transit District, dba: AC...",0,641,0,161,0,240
2,Alpine County Local Transportation Commission,0,0,0,0,0,2
3,Altamont Corridor Express,0,0,0,10,33,0
4,Amador Regional Transit System,0,0,0,3,0,21
...,...,...,...,...,...,...,...
213,Western Contra Costa Transit Authority,2,55,0,5,0,20
214,"Yolo County Transportation District, dba: Yolobus",0,64,0,9,0,12
215,Yosemite Area Regional Transportation System,0,10,0,0,0,0
216,Yuba-Sutter Transit Authority,0,35,0,1,0,16


## How many agencies have vehicles over 15 years old?

In [None]:
# summary of the vehicle counts

In [29]:
age =geography_utils.aggregate_by_geography(vehicles2, 
                       group_cols = ["agency"],
                       sum_cols = ["total_vehicles", "_0_9","_10_12", "_13_15", "_16_20","_21_25","_26_30","_31_60","_60plus"],
                       mean_cols = ["average_age_of_fleet__in_years_", "average_lifetime_miles_per_vehicle"]
                                      ).sort_values(["agency","total_vehicles"], ascending=[True, True])

In [30]:
age

Unnamed: 0,agency,_0_9,_10_12,_13_15,_16_20,_21_25,_26_30,_31_60,_60plus,total_vehicles,average_age_of_fleet__in_years_,average_lifetime_miles_per_vehicle
2,Access Services,918,9,2,0,0,0,0,0,929,5.964027,154171.849925
6,"Alameda-Contra Costa Transit District, dba: AC...",708,109,103,112,8,2,0,0,1042,8.244583,256035.690100
203,Alpine County Local Transportation Commission,1,1,0,0,0,0,0,0,2,9.000000,0.000000
67,Altamont Corridor Express,8,4,5,16,10,0,0,0,43,19.000000,499140.935350
125,Amador Regional Transit System,18,3,1,1,1,0,0,0,24,4.065359,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
47,Western Contra Costa Transit Authority,56,12,7,6,1,0,0,0,82,7.480000,223090.097300
50,"Yolo County Transportation District, dba: Yolobus",48,21,0,8,8,0,0,0,85,7.554598,328419.482767
132,Yosemite Area Regional Transportation System,9,1,0,0,0,0,0,0,10,7.600000,0.000000
66,Yuba-Sutter Transit Authority,48,3,0,1,0,0,0,0,52,4.035256,134500.657200


In [31]:
age>>select(_.agency,_.average_age_of_fleet__in_years_)>>arrange(-_.average_age_of_fleet__in_years_)

Unnamed: 0,agency,average_age_of_fleet__in_years_
25,"Peninsula Corridor Joint Powers Board, dba: Ca...",28.864771
3,"City and County of San Francisco, dba: San Fra...",20.247938
8,Santa Clara Valley Transportation Authority,19.841354
4,San Diego Metropolitan Transit System,19.713625
67,Altamont Corridor Express,19.000000
...,...,...
26,San Joaquin Council,1.034883
157,City of Bell Gardens,1.000000
199,Elk Valley Rancheria,1.000000
7,San Diego Association of Governments,0.783883


In [32]:
print(f"the average fleet age of Rural Operators in California is {age.average_age_of_fleet__in_years_.mean()}")

the average fleet age of Rural Operators in California is 7.0317539832406135


In [33]:
print(f"there are {(len(age>>filter(_.average_age_of_fleet__in_years_<=5)))} rural operators with an average fleet age less than 5 years in California")

there are 58 rural operators with an average fleet age less than 5 years in California


In [34]:
print(f"there are {(len(age>>filter(_.average_age_of_fleet__in_years_>=10)))} rural operators with an average fleet age more than 5 years in California")

there are 31 rural operators with an average fleet age more than 5 years in California


In [35]:
age>>filter(_.average_age_of_fleet__in_years_>=10)>>select(_.agency)


Unnamed: 0,agency
67,Altamont Corridor Express
197,Chemehuevi Indian Tribe
3,"City and County of San Francisco, dba: San Fra..."
167,City of Bellflower
168,City of Beverly Hills
188,City of California City
97,City of Cerritos
151,"City of Corcoran, dba: Corcoran Area Transit"
149,City of Cudahy
215,City of El Segundo


In [36]:
older = (age.query('_21_25 != 0 or _26_30 != 0 or _31_60 != 0 or _60plus!=0'))

In [37]:
print(f"There are {len(older>>count(_.agency))} agencies with vehicles over 15 years old")

There are 36 agencies with vehicles over 15 years old


In [38]:
older.describe()

Unnamed: 0,_0_9,_10_12,_13_15,_16_20,_21_25,_26_30,_31_60,_60plus,total_vehicles,average_age_of_fleet__in_years_,average_lifetime_miles_per_vehicle
count,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0
mean,283.166667,58.027778,23.361111,28.694444,16.138889,6.361111,19.388889,2.305556,437.444444,10.537363,286812.1
std,650.194059,134.723948,56.848245,61.27867,40.30041,16.54285,82.316906,12.644184,919.584951,5.521069,258076.7
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.439955,0.0
25%,21.0,4.75,1.0,0.75,1.0,0.0,0.0,0.0,41.5,6.996267,149048.8
50%,52.0,19.5,6.5,3.0,2.0,0.5,0.0,0.0,84.5,8.726812,253338.9
75%,178.0,53.25,16.0,32.75,8.5,1.25,1.0,0.0,384.25,12.676372,344382.7
max,3668.0,798.0,324.0,283.0,218.0,84.0,479.0,76.0,5142.0,28.864771,1293594.0


In [39]:
older["sum_15plus"] = older[["_16_20","_21_25","_26_30","_31_60","_60plus"]].sum(axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [40]:
older>>select(_.agency, _.sum_15plus)>>arrange(-_.sum_15plus)

Unnamed: 0,agency,sum_15plus
9,San Francisco Bay Area Rapid Transit District,624
3,"City and County of San Francisco, dba: San Fra...",385
0,Los Angeles County Metropolitan Transportation...,352
8,Santa Clara Valley Transportation Authority,319
25,"Peninsula Corridor Joint Powers Board, dba: Ca...",217
19,"Southern California Regional Rail Authority, d...",149
6,"Alameda-Contra Costa Transit District, dba: AC...",122
12,"Sacramento Regional Transit District, dba: Sac...",114
17,North County Transit District,100
4,San Diego Metropolitan Transit System,60


* Nine agencies have more than 100 vehicles older than 15 years

In [41]:
older = (older>>select(_.agency, _.sum_15plus))

In [42]:
age = pd.merge(age, older, on=['agency'], how='left')

In [43]:
age

Unnamed: 0,agency,_0_9,_10_12,_13_15,_16_20,_21_25,_26_30,_31_60,_60plus,total_vehicles,average_age_of_fleet__in_years_,average_lifetime_miles_per_vehicle,sum_15plus
0,Access Services,918,9,2,0,0,0,0,0,929,5.964027,154171.849925,
1,"Alameda-Contra Costa Transit District, dba: AC...",708,109,103,112,8,2,0,0,1042,8.244583,256035.690100,122.0
2,Alpine County Local Transportation Commission,1,1,0,0,0,0,0,0,2,9.000000,0.000000,
3,Altamont Corridor Express,8,4,5,16,10,0,0,0,43,19.000000,499140.935350,26.0
4,Amador Regional Transit System,18,3,1,1,1,0,0,0,24,4.065359,0.000000,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,Western Contra Costa Transit Authority,56,12,7,6,1,0,0,0,82,7.480000,223090.097300,7.0
214,"Yolo County Transportation District, dba: Yolobus",48,21,0,8,8,0,0,0,85,7.554598,328419.482767,16.0
215,Yosemite Area Regional Transportation System,9,1,0,0,0,0,0,0,10,7.600000,0.000000,
216,Yuba-Sutter Transit Authority,48,3,0,1,0,0,0,0,52,4.035256,134500.657200,


## Get a count of doors

In [None]:
# vehicles.sample()

In [None]:
# vehicles>>count(_.vehicle_type)>>arrange(-_.n)

In [44]:
#counting the number of doors on each individual vehicle. 
d = {
    'Articulated Bus':3,
    'Automobile':1,
    'Automated Guideway Vehicle':2,
    'Automobiles (Service)':0,
    'Bus':2,
    'Cable Car':2,
    'Commuter Rail Locomotive':0,
    'Commuter Rail Passenger Coach':2,
    'Commuter Rail Self-Propelled Passenger Car':2,
    'Cutaway':1,
    'Double Decker Bus':2,
    'Ferryboat':4,
    'Heavy Rail Passenger Car':2,
    'Light Rail Vehicle':2,
    'Minivan':1,
    'Other':0,
    'Over-the-road Bus':1,
    'Sports Utility Vehicle':1,
    'Steel Wheel Vehicles (Service)':0,
    'Trolleybus':2,
    'Trucks and other Rubber Tire Vehicles (Service)':0,
    'Van':1,
    'Vintage Trolley':2
}

# # dictionary = {
# #     'Articulated Bus':'articulated_bus',
# #     'Automobiles':'automobile',
# #     'Automated Guideway Vehicle':'automated_guideway_vehicle',
# #     'Automobiles (Service)':'automobile_service',
# #     'Bus':'bus',
# #     'Cable Car':'cable_car',
# #     'Commuter Rail Locomotive':'commuter_rail_locomotive',
# #     'Commuter Rail Passenger Coach':'commuter_rail_passenger_coach',
# #     'Commuter Rail Self-Propelled Passenger Car':'commuter_rail_selfp_passenger_coach',
# #     'Cutaway':'cutaway',
# #     'Double Decker Bus':'double_decker_bus',
# #     'Ferryboat':'ferryboat',
# #     'Heavy Rail Passenger Car':'heavy_rail_passenger_car',
# #     'Light Rail Vehicle':'light_rail_vehicle',
# #     'Minivan':'minivan',
# #     'Other':'other',
# #     'Over-the-road Bus':'over_the_road_bus',
# #     'Sports Utility Vehicle':'sport_utility_vehicle',
# #     'Steel Wheel Vehicles (Service)':'steel_wheel_vehicles_service',
# #     'Trolleybus':'trolleybus',
# #     'Trucks and other Rubber Tire Vehicles (Service)':'trucks_and_other_rubber_tire_vehicles_service',
# #     'Van':'van',
# #     'Vintage Trolley':'vintage_historic_trolley'
# # }

In [45]:
d2 = {'van':1,
'service':0,
'train':2,
'bus':2,
'other':0,
'automobiles':1}

In [46]:
vehicles2['doortype'] = (vehicles2['vehicle_groups'].map(d))

In [None]:
#pd.to_numeric(vehicles['doortype'])

In [47]:
vehicles2['door_count']= vehicles2['total_vehicles']*vehicles2['doortype']

In [48]:
vehicles2.sample()

Unnamed: 0,agency,city,state,legacy_ntd_id,ntd_id,organization_type,reporter_type,primary_uza_population,agency_voms,vehicle_type,...,_31_60,_60plus,total_vehicles,average_age_of_fleet__in_years_,average_lifetime_miles_per_vehicle,_0_9,_10_12,vehicle_groups,doortype,door_count
1790,Transit Joint Powers Authority for Merced Coun...,Merced,CA,9173,90173,Independent Public Agency or Authority of Tran...,Full Reporter,136969,53,Bus,...,0,0,23,6.608696,331758.3043,23,0,Bus,2.0,46.0


In [49]:
doors = geography_utils.aggregate_by_geography(vehicles2, 
                       group_cols = ["agency"],
                       sum_cols = ["door_count"],
                       mean_cols = ["door_count"], 
                       nunique_cols = ["vehicle_type"])
doors.rename(columns={'door_count_x': 'sum_doors', 'door_count_y': 'avg_doors'}, inplace=True)

In [50]:
doors.sample(5)

Unnamed: 0,agency,sum_doors,avg_doors,vehicle_type
148,City of Azusa,8.0,4.0,2
140,"City of Baldwin Park, dba: Baldwin Park Transit",15.0,7.5,2
71,El Dorado County Transit Authority,84.0,21.0,6
211,City of South El Monte,7.0,2.333333,4
1,Orange County Transportation Authority,1648.0,329.6,8


## Adding dfs together

In [51]:
agency_counts = pd.merge(age, doors, on=['agency'], how='left')

In [52]:
agency_counts.head()

Unnamed: 0,agency,_0_9,_10_12,_13_15,_16_20,_21_25,_26_30,_31_60,_60plus,total_vehicles,average_age_of_fleet__in_years_,average_lifetime_miles_per_vehicle,sum_15plus,sum_doors,avg_doors,vehicle_type
0,Access Services,918,9,2,0,0,0,0,0,929,5.964027,154171.849925,,900.0,300.0,6
1,"Alameda-Contra Costa Transit District, dba: AC...",708,109,103,112,8,2,0,0,1042,8.244583,256035.6901,122.0,1522.0,253.666667,8
2,Alpine County Local Transportation Commission,1,1,0,0,0,0,0,0,2,9.0,0.0,,2.0,1.0,2
3,Altamont Corridor Express,8,4,5,16,10,0,0,0,43,19.0,499140.93535,26.0,0.0,,5
4,Amador Regional Transit System,18,3,1,1,1,0,0,0,24,4.065359,0.0,2.0,21.0,7.0,5


## Trying Counts another way

In [53]:
types.head()

vehicle_groups,agency,Automobiles,Bus,Other,Service,Train,Van
0,Access Services,12,0,0,17,0,900
1,"Alameda-Contra Costa Transit District, dba: AC...",0,641,0,161,0,240
2,Alpine County Local Transportation Commission,0,0,0,0,0,2
3,Altamont Corridor Express,0,0,0,10,33,0
4,Amador Regional Transit System,0,0,0,3,0,21


In [54]:
d2 = {'van':1,
'service':0,
'train':2,
'bus':2,
'other':0,
'automobiles':1}

In [55]:
types['automobiles_door']= (types['Automobiles']*2)
types['bus_doors']= (types['Bus']*2)
types['train_doors']=(types['Train']*2)
types['van_doors']=(types['Van']*1)

In [56]:
types.head()

vehicle_groups,agency,Automobiles,Bus,Other,Service,Train,Van,automobiles_door,bus_doors,train_doors,van_doors
0,Access Services,12,0,0,17,0,900,24,0,0,900
1,"Alameda-Contra Costa Transit District, dba: AC...",0,641,0,161,0,240,0,1282,0,240
2,Alpine County Local Transportation Commission,0,0,0,0,0,2,0,0,0,2
3,Altamont Corridor Express,0,0,0,10,33,0,0,0,66,0
4,Amador Regional Transit System,0,0,0,3,0,21,0,0,0,21


In [57]:
types["doors_sum"] = types[["automobiles_door","bus_doors","train_doors","van_doors"]].sum(axis=1)

In [58]:
types.head()

vehicle_groups,agency,Automobiles,Bus,Other,Service,Train,Van,automobiles_door,bus_doors,train_doors,van_doors,doors_sum
0,Access Services,12,0,0,17,0,900,24,0,0,900,924
1,"Alameda-Contra Costa Transit District, dba: AC...",0,641,0,161,0,240,0,1282,0,240,1522
2,Alpine County Local Transportation Commission,0,0,0,0,0,2,0,0,0,2,2
3,Altamont Corridor Express,0,0,0,10,33,0,0,0,66,0,66
4,Amador Regional Transit System,0,0,0,3,0,21,0,0,0,21,21


In [59]:
agency_counts2 = pd.merge(age, types, on=['agency'], how='left')

In [60]:
agency_counts2.head()

Unnamed: 0,agency,_0_9,_10_12,_13_15,_16_20,_21_25,_26_30,_31_60,_60plus,total_vehicles,...,Bus,Other,Service,Train,Van,automobiles_door,bus_doors,train_doors,van_doors,doors_sum
0,Access Services,918,9,2,0,0,0,0,0,929,...,0,0,17,0,900,24,0,0,900,924
1,"Alameda-Contra Costa Transit District, dba: AC...",708,109,103,112,8,2,0,0,1042,...,641,0,161,0,240,0,1282,0,240,1522
2,Alpine County Local Transportation Commission,1,1,0,0,0,0,0,0,2,...,0,0,0,0,2,0,0,0,2,2
3,Altamont Corridor Express,8,4,5,16,10,0,0,0,43,...,0,0,10,33,0,0,0,66,0,66
4,Amador Regional Transit System,18,3,1,1,1,0,0,0,24,...,0,0,3,0,21,0,0,0,21,21


## Put it in a script:

In [61]:
def get_age_and_doors(df):   

    df = df.rename(columns={'_60+': '_60plus'})
    
    age = geography_utils.aggregate_by_geography(df, 
                           group_cols = ["agency"],
                           sum_cols = ["total_vehicles", "_0_9","_10_12", "_13_15", "_16_20","_21_25","_26_30","_31_60","_60plus"],
                           mean_cols = ["average_age_of_fleet__in_years_", "average_lifetime_miles_per_vehicle"]
                                          ).sort_values(["agency","total_vehicles"], ascending=[True, True])
    

    older = (age.query('_21_25 != 0 or _26_30 != 0 or _31_60 != 0 or _60plus!=0'))
    older["sum_15plus"] = older[["_16_20","_21_25","_26_30","_31_60","_60plus"]].sum(axis=1)
    older = (older>>select(_.agency, _.sum_15plus))

    age = pd.merge(age, older, on=['agency'], how='left')

    types = (df
        >>select(_.agency, _.vehicle_groups, _._0_9, _._10_12, _._13_15, _._16_20, _._21_25, _._26_30, _._31_60, _._60plus))
    types['sum_type'] = types[['_0_9', '_10_12', '_13_15', '_16_20', '_21_25','_26_30','_31_60','_60plus']].sum(axis=1)
    #https://towardsdatascience.com/pandas-pivot-the-ultimate-guide-5c693e0771f3
    types = (types.pivot_table(index="agency", columns="vehicle_groups", values="sum_type", aggfunc=np.sum, fill_value=0)).reset_index()

    types['automobiles_door']= (types['Automobiles']*2)
    types['bus_doors']= (types['Bus']*2)
    types['train_doors']=(types['Train']*2)
    types['van_doors']=(types['Van']*1)

    types["doors_sum"] = types[["automobiles_door","bus_doors","train_doors","van_doors"]].sum(axis=1)

    agency_counts = pd.merge(age, types, on=['agency'], how='left')
    
    # df['doortype'] = (df['vehicle_type'].map(d))
    # df['door_count']= df['total_vehicles']*df['doortype']

#     doors = geography_utils.aggregate_by_geography(df, 
#                            group_cols = ["agency"],
#                            sum_cols = ["door_count"],
#                            mean_cols = ["door_count"], 
#                            nunique_cols = ["vehicle_type"])
#     doors.rename(columns={'door_count_x': 'sum_doors', 'door_count_y': 'avg_doors'}, inplace=True)

#     agency_counts = pd.merge(age, doors, on=['agency'], how='left')

    return agency_counts

In [62]:
agency_counts = get_age_and_doors(vehicles2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [63]:
agency_counts

Unnamed: 0,agency,_0_9,_10_12,_13_15,_16_20,_21_25,_26_30,_31_60,_60plus,total_vehicles,...,Bus,Other,Service,Train,Van,automobiles_door,bus_doors,train_doors,van_doors,doors_sum
0,Access Services,918,9,2,0,0,0,0,0,929,...,0,0,17,0,900,24,0,0,900,924
1,"Alameda-Contra Costa Transit District, dba: AC...",708,109,103,112,8,2,0,0,1042,...,641,0,161,0,240,0,1282,0,240,1522
2,Alpine County Local Transportation Commission,1,1,0,0,0,0,0,0,2,...,0,0,0,0,2,0,0,0,2,2
3,Altamont Corridor Express,8,4,5,16,10,0,0,0,43,...,0,0,10,33,0,0,0,66,0,66
4,Amador Regional Transit System,18,3,1,1,1,0,0,0,24,...,0,0,3,0,21,0,0,0,21,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,Western Contra Costa Transit Authority,56,12,7,6,1,0,0,0,82,...,55,0,5,0,20,4,110,0,20,134
214,"Yolo County Transportation District, dba: Yolobus",48,21,0,8,8,0,0,0,85,...,64,0,9,0,12,0,128,0,12,140
215,Yosemite Area Regional Transportation System,9,1,0,0,0,0,0,0,10,...,10,0,0,0,0,0,20,0,0,20
216,Yuba-Sutter Transit Authority,48,3,0,1,0,0,0,0,52,...,35,0,1,0,16,0,70,0,16,86


In [None]:
#agency_counts.to_parquet("agency_door_vehicle_counts.parquet")