# Analysis of US Department of Commerce (USDOC) American Community Survey on Commute, from (2010-2019)
### for predicting ideal Robotaxi network locations
The goal of this analysis is to best predict ideal locations for deploying Tesla's Robotaxi network. Using the USDOC data, ideal Robotaxi service locations are best characterized using trends in usage of public transportation per state and usage of taxi. It may also be useful to look at carpool usage trends.
_____
## Import data
Data downloaded from US Bureau of Transportation Statistics (USBTS), which retrieved data from the USDOC annual study on Commute. Data needed to be converted to UTF-8 to be readable by pandas.


In [94]:
import pandas as pd
import us

raw = pd.read_csv('data/commute_survey_data_utf8.csv')
raw = raw.rename(columns={'Commute mode share (percent)': 'percent'})

# Exclude US state record, so only values for individual states
raw = raw[raw.State != 'United States']
raw = raw.reset_index()

index = 0
for i in raw['State']:
    raw.loc[index, 'StateAbbr'] = us.states.lookup(i).abbr
    index += 1
raw



Unnamed: 0,index,State,Mode,Year,percent,StateAbbr
0,0,Alabama,Bicycle,2010,0.001190,AL
1,1,Alaska,Bicycle,2010,0.013226,AK
2,2,Arizona,Bicycle,2010,0.008507,AZ
3,3,Arkansas,Bicycle,2010,0.000608,AR
4,4,California,Bicycle,2010,0.010050,CA
...,...,...,...,...,...,...
3572,3635,Puerto Rico,Public transportation,2019,0.011859,PR
3573,3636,Puerto Rico,Bicycle,2019,0.001498,PR
3574,3637,Puerto Rico,Walked,2019,0.027974,PR
3575,3638,Puerto Rico,"Taxi, motorcycle, or other",2019,0.014367,PR


## Analysis
The data is already properly formatted for analysis, so no cleaning or restructuring is needed.

In [91]:
import plotly.express as px

# create working table in case any changes need to be made
wrk = raw.copy()

# visualize usage
import numpy as np
fig = px.sunburst(wrk, path=['State', 'Mode'], values='percent', color='State',
                  color_continuous_scale='RdBu', maxdepth=2)
fig.show()

In [102]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


state_count = wrk.groupby(['StateAbbr', 'Year']).size().reset_index(name='total')

years = wrk.Year.unique()

rows = 5
cols = 2
fig1 = make_subplots(
    rows=rows, cols=cols,
    specs = [[{'type': 'choropleth'} for c in np.arange(cols)] for r in np.arange(rows)],
    subplot_titles = list([str(x) for x in years]))

for i, year in enumerate(years):
    result = state_count[['StateAbbr', 'total']][state_count.Year == year]
    fig1.add_trace(go.Choropleth(
        locations=result.StateAbbr,
        z = result.total,
        locationmode = 'USA-states', # set of locations match entries in `locations`
        marker_line_color='white',
        zmin = 0,
        zmax = max(state_count['total']),
        colorbar_title = "Percent Using Public Transport",
    ), row = i//cols+1, col = i%cols+1)

fig1.update_layout(
    title_text = 'Percent of Respondents Using Public Transit ',
    **{'geo' + str(i) + '_scope': 'usa' for i in [''] + np.arange(2,rows*cols+1).tolist()},
)

for index, trace in enumerate(fig.data):
    fig.data[index].hovertemplate = 'State: %{location}<br>Percent: %{z:.2f}<extra></extra>'
fig1.show()

KeyError: False