#Neural Network
This notebook contains our training of a neural network as an attempt to predict accident severity. Crash Severity can be broken into 5 categories:

K. Fatal Injury

A. Severe Injury

B. Visible Injury

C. Nonvisible Injury

O. Property Damage Only

In [1]:
!pip install tensorflow



In [2]:
import pandas as pd
import re
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix

import seaborn as sns
import matplotlib.pyplot as plt

from tensorflow import keras


In [None]:
df = pd.read_csv("crash_data.csv")

In [None]:
df.shape

In [None]:
X = df.drop(columns=['Crash_Severity'])
y = df['Crash_Severity']

In [None]:
le = LabelEncoder()
y_enc = le.fit_transform(y)

print(y_enc)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_enc,
    test_size=0.30,
    # stratify=y_enc,
    random_state=42
)

In [None]:
numeric_feats = ['Vehicle_Count','Speed_Posted_max']
numeric_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='median')),
    ('scale', StandardScaler())
])

categorical_feats = [
    'Intersection_Type','Area_Type','Roadway_Alignment',
    'Light_Condition','Weather_Condition',
    'Roadway_Surface_Condition','Roadway_Surface_Type',
    'Traffic_Control_Type','Traffic_Control_Status',
    'Collision_Type','First_Harmful_Event',
    'day_of_week','time_of_day',
]
categorical_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', numeric_pipe, numeric_feats),
    ('cat', categorical_pipe, categorical_feats)
])

#Data Preparation

In [None]:
y_class_names = ['O', 'B', 'C', 'A', 'K']


X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)
y_train_processed = y_train
y_test_processed = y_test

X_valid = X_train_processed[:5000]
new_X_train = X_train_processed[5000:]
y_valid = y_train[:5000]
new_y_train = y_train[5000:]



#Model Creation

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[new_X_train.shape[1]]))
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))

model.summary()

In [None]:
model.compile(loss=keras.losses.sparse_categorical_crossentropy,
              optimizer=keras.optimizers.SGD(),
              metrics=[keras.metrics.sparse_categorical_accuracy])

#Training

In [None]:
history = model.fit(new_X_train, new_y_train, batch_size=64, epochs=30,
                    validation_data=(X_valid, y_valid))

#Results

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()