### Importing Libraries 

In [2]:
# visualizations
from data_processing import process_data, smoothTriangle
import warnings
import pandas as pd
import zipfile
import numpy as np
import colorlover as cl
import plotly.figure_factory as ff
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
from plotly import tools
import chart_studio.plotly as py
import plotly_express as px
init_notebook_mode(connected=True)
# eda
# tweaks
%load_ext autoreload
%autoreload 2
warnings.filterwarnings('ignore')
# sets up pandas table display
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
pd.set_option('precision', 5)

# Importing Processed Data

In [3]:
# importing dataframes between 2015-2018 and concatenating them into one df
accidents = {}
for year in range(2015, 2019):
    accidents[year] = process_data(year)
    accidents_1815 = pd.concat(accidents.values())

# Visualizations 

## I. Top 5 States with Highest Fatality Counts in 2018

In [11]:
# creating a datetime mask for year 2018
mask_18 = accidents_1815.date.dt.year == 2018
# creating the sum of fatalitites per state
state_fatals_18 = accidents_1815[mask_18].groupby(['state']).agg(
    {'fatals': sum}).reset_index()

In [23]:
# average fatals group by state
fig = px.bar(state_fatals_18.sort_values('fatals', ascending=False).head(),
             x='state', y='fatals', 
             labels={'state': 'State', 'fatals': 'Fatalities'},
             title='Traffic Fatalities, USA (2018)',
             template='plotly_white', 
             color_discrete_sequence=px.colors.qualitative.Set1)
fig.update_traces(marker_color='rgb(178,171,210)', marker_opacity=0.8)
fig.show()

## IV - Time Series 

In [4]:
# aggregating per day and summing fatalities for time series visualization
accidents_1815_sum = accidents_1815.set_index('date').resample('D').agg(
    {'fatals': sum}).reset_index()

In [5]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=accidents_1815_sum.date, y=accidents_1815_sum.fatals,
                         name="Fatalities[Daily]",
                         line_color='rgb(130,109,186)', opacity=0.3))


fig.add_trace(go.Scatter(x=accidents_1815_sum.date,
                         # setting degree to 10
                         y=smoothTriangle(accidents_1815_sum.fatals, 7),
                         mode='lines',
                         marker=dict(
                             size=6,
                             color='rgb(178,171,210)',
                             symbol='triangle-up'),
                         name='Fatalities[Weekly]'))

fig.add_trace(go.Scatter(x=accidents_1815_sum.date,
                         # setting degree to 10
                         y=smoothTriangle(accidents_1815_sum.fatals, 30),
                         mode='lines',
                         marker=dict(
                             size=6,
                             color='rgb(45,0,75)',
                             symbol='triangle-up'),
                         name='Fatalities[Monthly]'))


fig.update_layout(title_text='<br>Traffic Fatalities Per Day in USA (2015-2018)<br>'
                  '<sub>Fatality Count<sub>',
                  xaxis_rangeslider_visible=True, width=1000, height=500)
fig.layout.template = 'plotly_white'
fig.show()

# EDA & Visualizations 

In [None]:
# creating a text column for visualization on the heatmap scatter points
accidents_18['text'] = accidents_18.state + ' - ' + accidents_18.date.dt.month_name() + ' ' + \
    accidents_18.date.dt.day.astype(
        str) + ', ' + accidents_18.fatals.astype(str) + ' Killed'

In [None]:
# creating filtered dataframes for accidents ==1 and higher than 1
accidents_18_f1 = accidents_18[accidents_18.fatals == 1]
accidents_18_f2 = accidents_18[accidents_18.fatals > 1]

In [None]:
# Average U.S population estimates by census
# https://www.census.gov/newsroom/press-kits/2018/pop-estimates-national-state.html
us_pop_2018 = pd.read_csv('../info/2018_POP ESTIMATES.csv')
# merging state_fatals_18_sum with the population data
state_fatals_18 = state_fatals_18.merge(us_pop_2018, left_on='state',
                                        right_on='STATE_CODE').drop('STATE_CODE', axis=1)
# creating new column of percapita per 100,000 people fatalities by state and rounding by 2 decimals
state_fatals_18['percap_fatals'] = np.round(
    state_fatals_18.fatals/state_fatals_18.POP*100000, 2)

In [None]:
purples = px.colors.sequential.Purples
colorscale = cl.interp(purples, 17)


# agsunset = px.colors.sequential.Agsunset
# colorscale = cl.interp(agsunset, 17)


fig = go.Figure(data=go.Choropleth(
    locations=state_fatals_18['state'],
    z=state_fatals_18['percap_fatals'],
    locationmode='USA-states',
    colorscale=colorscale,
    marker_line_color='white',
    colorbar_title="Killed per 100,000"
))

fig.add_trace(go.Scattergeo(lon=accidents_18_f1.longitud,
                            lat=accidents_18_f1.latitude,
                            text=accidents_18_f1.text,
                            marker_size=accidents_18_f1.fatals ** 0.5 * 5,
                            marker_opacity=0.5, marker_color='hsl(243.5, 29.5%, 64.5%)',
                            name='Fatalities = 1'))

fig.add_trace(go.Scattergeo(lon=accidents_18_f2.longitud,
                            lat=accidents_18_f2.latitude,
                            text=accidents_18_f2.text,
                            marker_size=accidents_18_f2.fatals ** 0.5 * 5,
                            marker_opacity=0.5, marker_color='hsl(270.0, 100.0%, 24.0%)',
                            name='Fatalities > 1'))


fig.update_layout(height=600, width=1000, legend_orientation="h",
                  title_text='Traffic Fatalities per 100,000 People in United States (2018)',
                  geo=dict(
                      scope='usa',
                      projection=go.layout.geo.Projection(type='albers usa')))

fig.show()

In [None]:
fig = px.bar(state_fatals_18.sort_values('percap_fatals', ascending=False).head(5),
             x='state', y='percap_fatals', labels={'state': 'State', 'percap_fatals': 'Fatalities per 100k People'},
             title='Traffic Fatalities by State per 100,000 People, USA (2018)',
             template='plotly_white', color_discrete_sequence=px.colors.qualitative.Set1)
fig.update_traces(marker_color='rgb(84,39,136)', marker_opacity=0.8)
fig.show()

In [None]:
# Doughnut Chart for Confusion Matrices


marker_colors = px.colors.sequential.Agsunset

labels = ['True Positive', 'True Negative', 'False Positive', 'False Negative']
values = [4500, 2500, 1053, 500]

# Use `hole` to create a donut-like pie chart
fig = go.Figure(
    data=[go.Pie(labels=labels, values=values,
                 hole=.5, marker_colors=marker_colors,
                 title='Confusion Matrix')])

# purples = px.colors.sequential.Purples
# colorscale = cl.interp(purples, 17)

fig.update_layout(height=700, legend_orientation="v",
                  title_text='Title ???', width=700)

fig.show()

### test Visualization

In [None]:
# traffic fatalities from sober drivers per 100,000 people in state
sober_perstate = np.asarray(traffic_data[traffic_data.drunk_drivers == 0].groupby(
    'state')['fatalities'].sum())
sober_percapita = np.round(sober_perstate / state_population * 100000, 2)

# traffic fatalities from drunk drivers per 100,000 people in state
drunk_perstate = np.asarray(traffic_data[traffic_data.drunk_drivers > 0].groupby(
    'state')['fatalities'].sum())
drunk_percapita = np.round(drunk_perstate / state_population * 100000, 2)

trace_dot = go.Scatter(
    x=sober_percapita,
    y=drunk_percapita,
    text=us_states,
    mode='markers+text',
    textposition='bottom',
    hoverinfo='x+y+text',
    marker=dict(
        color='rgb(215, 0, 0)',
        size=8)
)

trace_dash = go.Scatter(
    x=[0.1, 10.5],
    y=[0.1, 10.5],
    mode='lines',
    hoverinfo='none',
    line=dict(
        color='rgb(68, 68, 68)',
        width=1.5,
        dash='dot')
)

layout = go.Layout(
    title='Traffic Fatalities per Capita by Driver Intoxication '
    'in United States (2015)',
    showlegend=False,
    xaxis=dict(
        title='Fatalities per 100,000 People (Sober Drivers)',
        range=[0.1, 18.5],
        autotick=False,
        tick0=2,
        dtick=2,
        showline=True,
        showgrid=False
    ),
    yaxis=dict(
        title='Fatalities per 100,000 People (Drunk Drivers)',
        range=[0.1, 10],
        autotick=False,
        tick0=2,
        dtick=2,
        showline=True,
        showgrid=False)
)

data = [trace_dot, trace_dash]
figure = dict(data=data, layout=layout)
iplot(figure)