# Imports


In [1]:
import numpy as np 
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score,precision_score,recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from imblearn.over_sampling import SMOTENC

Using TensorFlow backend.


# Loading the dataset

In [2]:
df = pd.read_csv('teuonot.csv')

In [3]:
df.head()

Unnamed: 0,THUM_GEOGRAFI,SUG_DEREH,HODESH_TEUNA,SHAA,SUG_YOM,YOM_LAYLA,YOM_BASHAVUA,HUMRAT_TEUNA,SUG_TEUNA,HAD_MASLUL,RAV_MASLUL,MEHIRUT_MUTERET,TKINUT,ROHAV,SIMUN_TIMRUR,TEURA,MEZEG_AVIR,PNE_KVISH
0,1,2,1,75,4,5,5,2,2,0,3,1,1,2,3,3,1,1
1,1,4,7,67,4,1,5,3,11,0,4,1,1,0,3,1,1,1
2,1,2,6,82,4,5,2,3,1,9,0,0,0,0,5,6,9,9
3,1,1,7,54,4,1,4,1,4,0,3,1,1,3,3,1,1,1
4,1,2,3,47,4,1,1,3,2,1,0,1,1,0,3,1,1,1


# Setting features and Target

In [4]:
X = df.drop('HUMRAT_TEUNA',axis=1).values
y = df['HUMRAT_TEUNA'].values


# Checking for imblance in the data and fixing it using SMOTENC

In [5]:
print(len(df[df['HUMRAT_TEUNA'] == 1])) # Deadly accident
print(len(df[df['HUMRAT_TEUNA'] == 2])) # Hard accident
print(len(df[df['HUMRAT_TEUNA'] == 3])) #Easy accident

323
1939
11073


In [6]:
smote = SMOTENC(categorical_features=[0,16],sampling_strategy='not majority')
X,y = smote.fit_sample(X,y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

# Running classification models

In [8]:
models = {'svm':SVC(),
         'dtree':DecisionTreeClassifier(),
         'rfc' : RandomForestClassifier(n_estimators=100),
         'xgb':XGBClassifier(),
         'log':LogisticRegression(),
         'KNN':KNeighborsClassifier()}


In [9]:
for name,model in models.items():
    model = model 
    model.fit(X_train,y_train)
    pred = model.predict(X_test)
    print (f'____the results of {name}_____:')
    print('Accuracy:{0:.2f}'.format(accuracy_score(y_test,pred)))
    print('Precision:{0:.2f}'.format(precision_score(y_test,pred,average='macro')))
    print('Recall:{0:.2f}'.format(recall_score(y_test,pred,average='macro')))
    print('\n')
    print(classification_report(y_test,pred))
    print('\n')
    



____the results of svm_____:
Accuracy:0.87
Precision:0.87
Recall:0.87


              precision    recall  f1-score   support

           1       0.93      0.99      0.96      2199
           2       0.81      0.86      0.83      2220
           3       0.86      0.74      0.80      2225

    accuracy                           0.87      6644
   macro avg       0.87      0.87      0.86      6644
weighted avg       0.87      0.87      0.86      6644



____the results of dtree_____:
Accuracy:0.84
Precision:0.84
Recall:0.84


              precision    recall  f1-score   support

           1       0.89      0.92      0.91      2199
           2       0.79      0.78      0.79      2220
           3       0.84      0.81      0.82      2225

    accuracy                           0.84      6644
   macro avg       0.84      0.84      0.84      6644
weighted avg       0.84      0.84      0.84      6644



____the results of rfc_____:
Accuracy:0.92
Precision:0.92
Recall:0.92


              pr



____the results of log_____:
Accuracy:0.47
Precision:0.46
Recall:0.47


              precision    recall  f1-score   support

           1       0.50      0.61      0.55      2199
           2       0.42      0.24      0.30      2220
           3       0.47      0.58      0.52      2225

    accuracy                           0.47      6644
   macro avg       0.46      0.47      0.46      6644
weighted avg       0.46      0.47      0.46      6644



____the results of KNN_____:
Accuracy:0.82
Precision:0.85
Recall:0.82


              precision    recall  f1-score   support

           1       0.86      1.00      0.92      2199
           2       0.73      0.95      0.83      2220
           3       0.94      0.51      0.67      2225

    accuracy                           0.82      6644
   macro avg       0.85      0.82      0.81      6644
weighted avg       0.85      0.82      0.81      6644



