In [1]:
import os
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from tensorflow.keras.optimizers import Adam
from sklearn.neighbors import KNeighborsClassifier
from Classification_Project_DE_Module import DE_fn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score

In [2]:
if os.path.exists('./2018_Accidents_UK_Final.csv'):
    x = np.nan
else:
    DE_fn()

In [3]:
df = pd.read_csv('./2018_Accidents_UK_Final.csv', low_memory=False)

In [4]:
shuffled_df = shuffle(df)

In [5]:
df_copy = shuffled_df[['number_of_vehicles','number_of_casualties','day_of_week_0','day_of_week_1','day_of_week_2',
        'first_road_class_0','first_road_class_1','first_road_class_2','road_type_0','road_type_1',
        'road_type_2','junction_detail_0','junction_detail_1','junction_detail_2','junction_detail_3',
        'junction_control_0','junction_control_1','junction_control_2','pedestrian_crossing_human_control_0',
        'pedestrian_crossing_human_control_1','pedestrian_crossing_physical_facilities_0',
        'pedestrian_crossing_physical_facilities_1','pedestrian_crossing_physical_facilities_2','light_conditions_0',
        'light_conditions_1','light_conditions_2','weather_conditions_0','weather_conditions_1','weather_conditions_2',
        'road_surface_conditions_0','road_surface_conditions_1','trunk_road_flag_0','trunk_road_flag_1',
        'week_number_0', 'week_number_1', 'week_number_2','week_number_3', 'week_number_4',
        'week_number_5','special_conditions_at_site_None','special_conditions_at_site_Rare',
        'carriageway_hazards_None', 'carriageway_hazards_Rare','urban_or_rural_area_Rural',
        'urban_or_rural_area_Urban','week_end', 'time_in_mins']]

In [6]:
# We will write here the feature that will be feeded into the models
X = df_copy
y = shuffled_df['accident_severity']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

k = round(math.sqrt(len(df_copy)))
knn = KNeighborsClassifier(n_neighbors=k)

cv_scores = cross_val_score(knn, X_train, y_train, cv=10)
print("Cross-Validation Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Cross-Validation Scores: [0.79919679 0.79919679 0.79919679 0.79919679 0.79919679 0.79909381
 0.79909381 0.79917611 0.79917611 0.79917611]
Mean CV Accuracy: 0.7991699879345208
Accuracy: 0.7986159739671294


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)
y_pred = naive_bayes.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.4652551797998105


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7991102689788689


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

input_dim = X_train.shape[1]
num_classes = len(set(y_train))

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(input_dim,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

learning_rate = 0.0001

model.compile(optimizer=Adam(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=15, batch_size=1000, verbose=1)

_, accuracy = model.evaluate(X_test, y_test, verbose=1)
print("Accuracy:", accuracy)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Accuracy: 0.7958149909973145
