# Neural Network Model

 * [Imports and Model Specific Cleaning](#IMSC)
 * [Model](#mod)
 * [Save Model](#sm)

<a id='IMSC'></a>
## Imports and Model Specific Cleaning

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,GridSearchCV

from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

In [2]:
# set view columns to 250
pd.set_option('display.max_columns',250)

In [3]:
# read in data
accidents = pd.read_csv('../project-5/data/severity_model_data/selected.csv',index_col = 0)

In [4]:
# create balanced classes (severity 1 is the smallest with 29312 instances)
sev_1 = accidents[accidents['severity'] == 1]
sev_2 = accidents[accidents['severity'] == 2].head(29312)
sev_3 = accidents[accidents['severity'] == 3].head(29312)
sev_4 = accidents[accidents['severity'] == 4].head(29312)
selected = pd.concat([sev_1,sev_2,sev_3,sev_4], axis = 0).reset_index()

In [5]:
# print only columns with nulls and their null values
for col in selected.columns:
    if selected[col].isnull().sum() > 0:
        print(col)
        print(selected[col].isnull().sum())

temperaturef
1810
humidity
2086
visibilitymi
2084
wind_speedmph
16969
precipitationin
76191
weather_condition
2172
wind_chillf
73105


In [6]:
# map side
selected['side'] = selected['side'].map({'R':1,'L':0})

In [7]:
# fill NaN values for side column
selected['side'] = selected['side'].fillna(1)

In [8]:
# fill NaN values for precipitationin column
selected['precipitationin'] = selected['precipitationin'].fillna(0)

In [9]:
# fill NaN values for temperaturef column
selected['temperaturef'].fillna(selected['temperaturef'].median(), inplace=True)

In [10]:
# fill NaN values for humidity column
selected['humidity'].fillna(100.0, inplace=True)

In [11]:
# fill NaN values for visibilitymi column
selected['visibilitymi'].fillna(selected['visibilitymi'].median(), inplace=True)

In [12]:
# fill NaN values for weather_condition column
selected['weather_condition'].fillna('Fair', inplace=True)

In [13]:
# caluculate wind_chillf column for nan values if temp and wind chill are provided
selected['wind_chillf'] = selected.apply(lambda row: row[
    'temperaturef'] - row['wind_speedmph'] if np.isnan(row['wind_chillf']) else row['wind_chillf'], axis=1)

In [14]:
# fill NaN values for wind_speedmph column
selected['wind_speedmph'].fillna(0, inplace=True)

In [15]:
# fill NaN values for wind_chillf column
selected['wind_chillf'].fillna(selected['temperaturef'], inplace=True)

In [16]:
# dummify columns
selected = pd.get_dummies(selected,columns = ['start_hour','month','end_hour','county','weather_condition'])

In [17]:
# drop index columns
selected.drop(columns = ['index','county.1'],inplace = True)

In [18]:
selected.head()

Unnamed: 0,severity,temperaturef,humidity,visibilitymi,wind_speedmph,precipitationin,traffic_signal,side,crossing,junction,stop,distancemi,wind_chillf,traffic_signal.1,sunrise_sunset,civil_twilight,nautical_twilight,astronomical_twilight,total_duration,start_hour_0,start_hour_1,start_hour_2,start_hour_3,start_hour_4,start_hour_5,start_hour_6,start_hour_7,start_hour_8,start_hour_9,start_hour_10,start_hour_11,start_hour_12,start_hour_13,start_hour_14,start_hour_15,start_hour_16,start_hour_17,start_hour_18,start_hour_19,start_hour_20,start_hour_21,start_hour_22,start_hour_23,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12,end_hour_0,end_hour_1,end_hour_2,end_hour_3,end_hour_4,end_hour_5,end_hour_6,end_hour_7,end_hour_8,end_hour_9,end_hour_10,end_hour_11,end_hour_12,end_hour_13,end_hour_14,end_hour_15,end_hour_16,end_hour_17,end_hour_18,end_hour_19,end_hour_20,end_hour_21,end_hour_22,end_hour_23,county_AcadiaLA,county_AccomackVA,county_AdaID,county_AdairIA,county_AdamsCO,county_AdamsIA,county_AdamsID,county_AdamsIN,county_AdamsNE,county_AdamsOH,county_AdamsPA,county_AdamsWA,county_AdamsWI,county_AddisonVT,county_AikenSC,county_AlachuaFL,county_AlamanceNC,county_AlamedaCA,county_AlamosaCO,county_AlbanyNY,county_AlbanyWY,county_AlbemarleVA,county_AlconaMI,county_AlexanderNC,county_AlexandriaVA,county_AllamakeeIA,county_AlleganMI,county_AlleganyMD,county_AlleganyNY,county_AlleghanyVA,county_AlleghenyPA,county_AllenIN,county_AllenOH,county_AlpenaMI,county_AlpineCA,county_AmadorCA,county_AmeliaVA,county_AmherstVA,county_AndersonKY,county_AndersonSC,county_AndersonTN,county_AndrewMO,county_AndroscogginME,county_Anne ArundelMD,county_AnokaMN,county_AnsonNC,...,county_WillIL,county_WilliamsOH,county_WilliamsonIL,county_WilliamsonTN,county_WilliamsonTX,county_WilsonNC,county_WilsonTN,county_WindhamCT,county_WindhamVT,county_WindsorVT,county_WinnebagoIA,county_WinnebagoIL,county_WinnebagoWI,county_WinneshiekIA,county_WinonaMN,county_WiseVA,county_WolfeKY,county_WoodOH,county_WoodWI,county_WoodWV,county_WoodburyIA,county_WoodfordKY,county_WorcesterMA,county_WorcesterMD,county_WorthGA,county_WorthIA,county_WrightIA,county_WrightMN,county_WrightMO,county_WyandotOH,county_WyandotteKS,county_WyomingNY,county_WyomingPA,county_WytheVA,county_YadkinNC,county_YakimaWA,county_YalobushaMS,county_YamhillOR,county_YatesNY,county_YavapaiAZ,county_YellowstoneMT,county_YoakumTX,county_YoloCA,county_YorkME,county_YorkNE,county_YorkPA,county_YorkSC,county_YorkVA,county_YubaCA,county_YumaAZ,county_YumaCO,weather_condition_Blowing Dust,weather_condition_Blowing Dust / Windy,weather_condition_Blowing Sand,weather_condition_Blowing Snow,weather_condition_Clear,weather_condition_Cloudy,weather_condition_Cloudy / Windy,weather_condition_Drizzle,weather_condition_Fair,weather_condition_Fair / Windy,weather_condition_Fog,weather_condition_Fog / Windy,weather_condition_Freezing Rain / Windy,weather_condition_Hail,weather_condition_Haze,weather_condition_Haze / Windy,weather_condition_Heavy Drizzle,weather_condition_Heavy Rain,weather_condition_Heavy Rain / Windy,weather_condition_Heavy Snow,weather_condition_Heavy Snow / Windy,weather_condition_Heavy T-Storm,weather_condition_Heavy T-Storm / Windy,weather_condition_Heavy Thunderstorms and Rain,weather_condition_Heavy Thunderstorms and Snow,weather_condition_Light Drizzle,weather_condition_Light Drizzle / Windy,weather_condition_Light Freezing Drizzle,weather_condition_Light Freezing Fog,weather_condition_Light Freezing Rain,weather_condition_Light Haze,weather_condition_Light Ice Pellets,weather_condition_Light Rain,weather_condition_Light Rain / Windy,weather_condition_Light Rain Shower,weather_condition_Light Rain Shower / Windy,weather_condition_Light Rain Showers,weather_condition_Light Rain with Thunder,weather_condition_Light Sleet,weather_condition_Light Snow,weather_condition_Light Snow / Windy,weather_condition_Light Snow Showers,weather_condition_Light Thunderstorms and Rain,weather_condition_Mist,weather_condition_Mostly Cloudy,weather_condition_Mostly Cloudy / Windy,weather_condition_N/A Precipitation,weather_condition_Overcast,weather_condition_Partly Cloudy,weather_condition_Partly Cloudy / Windy,weather_condition_Patches of Fog,weather_condition_Rain,weather_condition_Rain / Windy,weather_condition_Rain Shower,weather_condition_Rain Showers,weather_condition_Scattered Clouds,weather_condition_Shallow Fog,weather_condition_Showers in the Vicinity,weather_condition_Smoke,weather_condition_Snow,weather_condition_Snow / Windy,weather_condition_Squalls / Windy,weather_condition_T-Storm,weather_condition_T-Storm / Windy,weather_condition_Thunder,weather_condition_Thunder / Windy,weather_condition_Thunder and Hail,weather_condition_Thunder in the Vicinity,weather_condition_Thunderstorm,weather_condition_Thunderstorms and Rain,weather_condition_Volcanic Ash,weather_condition_Widespread Dust,weather_condition_Wintry Mix,weather_condition_Wintry Mix / Windy
0,1,33.1,82.0,7.0,5.8,0.0,0,1.0,0,0,0,0.0,27.7,0,1,1,1,1,45.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,46.0,86.0,7.0,11.5,0.08,0,0.0,0,0,0,0.0,40.6,0,1,1,1,1,30.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,66.0,42.0,10.0,6.9,0.0,0,1.0,0,0,0,0.0,59.1,0,0,0,0,0,45.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,71.6,50.0,10.0,0.0,0.0,0,1.0,0,0,0,0.0,71.6,0,1,1,1,1,30.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,55.4,38.0,10.0,0.0,0.0,1,1.0,0,0,0,0.01,55.4,1,1,1,1,1,30.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


<a id='mod'></a>
## Model
### Keras

In [17]:
# create a features variable
features = [col for col in selected.columns if col not in ['severity']]

In [18]:
# set X and y variables
X_nn = selected[features]
y_nn = selected[['severity']]

In [19]:
# subtract 1 from severity so it to_categorical will work
y_nn['severity'] = y_nn['severity'] -1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_nn['severity'] = y_nn['severity'] -1


In [20]:
# train_test_split data
X_nn_train,X_nn_test,y_nn_train,y_nn_test = train_test_split(X_nn,y_nn,random_state = 42)

In [21]:
# to_categorical y data since it is a multiclassification
y_nn_train = to_categorical(y_nn_train,4)
y_nn_test = to_categorical(y_nn_test,4)

In [22]:
# scale data
sc = StandardScaler()
X_nn_train_sc = sc.fit_transform(X_nn_train)
X_nn_test_sc = sc.transform(X_nn_test)

In [23]:
# define a function that will go in the scikit-learn wrapper w/ arbitrary layers, neurons
def model_func(hidden_layers,layer_odd_neurons = 20,layer_even_neurons = 10, 
               layer_odd_dropout = 0.5,layer_even_dropout=0.5,e_stop = False):
    
    # instantiate Sequential
    model = Sequential()
    
    # set up hidden layers
    for x in range(hidden_layers):
        if x == 0: # input layer
            model.add(Dense(units = layer_even_neurons, activation = 'relu', 
                            input_shape = (X_nn_train_sc.shape[1],)))
        elif x%2 == 0: # even hidden layers
            model.add(Dense(units = layer_even_neurons,activation = 'relu'))
            model.add(Dropout(layer_even_dropout))
        else: # odd hidden layers
            model.add(Dense(units = layer_odd_neurons,activation = 'relu'))
            model.add(Dropout(layer_odd_dropout))
        
    # output layer
    model.add(Dense(4, activation = 'softmax'))
    
    # compile model
    model.compile(loss = 'categorical_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
    
    # add EarlyStopping
    if e_stop == True:
        early_nn_stop = EarlyStopping(monitor = 'val_loss',min_delta = 0, 
                                   patience = 5,verbose = 1,mode = 'auto')
    else:
        pass

    return model

# originally used these parameters
#     {'epochs':[100],
#     'hidden_layers': [5,7],
#     'layer_odd_neurons':[8,16],
#     'layer_even_neurons':[16,32],
#     'layer_odd_dropout':[0.3,0.5],
#     'layer_even_dropout':[0.2,0.5],
#     'e_stop': [False,True]}

# got these params as the gs_nn.best_params_ and ran twice to get the best model
params = {
    'e_stop': [True],
    'epochs': [100],
    'hidden_layers': [2],
    'layer_even_dropout': [0.5],
    'layer_even_neurons': [32],
    'layer_odd_dropout': [0.3],
    'layer_odd_neurons': [16]
}

In [24]:
#wrap in a KerasClassifier object
nn = KerasClassifier(build_fn = model_func,epochs = 10,batch_size = 512,verbose = 1)

In [25]:
# set up GridSearch
gs_nn = GridSearchCV(estimator = nn,param_grid = params,cv = 5,n_jobs = -1)

In [26]:
# fit model
gs_nn.fit(X_nn_train_sc, y_nn_train)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

GridSearchCV(cv=5,
             estimator=<tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier object at 0x7fdcbd815670>,
             n_jobs=-1,
             param_grid={'e_stop': [True], 'epochs': [100],
                         'hidden_layers': [2], 'layer_even_dropout': [0.5],
                         'layer_even_neurons': [32], 'layer_odd_dropout': [0.3],
                         'layer_odd_neurons': [16]})

In [27]:
# get cross val score
gs_nn.best_score_

0.8740447521209717

In [28]:
# get test score
gs_nn.score(X_nn_test_sc,y_nn_test)



0.880253791809082

In [29]:
# get train score
gs_nn.score(X_nn_train_sc,y_nn_train)



0.9046351909637451

<a id='sm'></a>
### Save Model

In [None]:
gs_nn_scores = {
    'train score': gs_nn.score(X_nn_train_sc,y_nn_train),
    'test score': gs_nn.score(X_nn_test_sc,y_nn_test)
}

In [None]:
nn = pd.DataFrame(gs_nn_scores,index = ['Neural Network'])

In [None]:
nn.to_csv('./data/severity_model_data/nn_results.csv')

In [None]:
# save model
gs_nn.best_estimator_.model.save('gs_neural_net')

In [None]:
# instantiate model
model = Sequential()

In [None]:
# load model to check to see it works
model = load_model('./gs_neural_net/')