Building the 4 classification models

In [141]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn import ensemble
from sklearn import neighbors
import xgboost
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [142]:
# Load data
df = pd.read_excel('../data/clean_data.xlsx')
X = df.drop('flood', axis=1)
y = df['flood']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [143]:
# importing and building the Decision Tree model 
dtree = tree.DecisionTreeClassifier()
dtree.fit(X_train_scaled,y_train)

p_1=dtree.predict(X_train_scaled)
print("Training accuracy through the decision tree method is:", accuracy_score(y_train, p_1))
p1 = dtree.predict(X_test_scaled)
print("The accuracy through the decision tree method is:", accuracy_score(y_test, p1))


Training accuracy through the decision tree method is: 1.0
The accuracy through the decision tree method is: 1.0


In [144]:
# importing and building the Random Forest model 
Rf = ensemble.RandomForestClassifier()
Rf.fit(X_train_scaled,y_train)
p_2=dtree.predict(X_train_scaled)
print("Training accuracy through the random forest method is:", accuracy_score(y_train, p_2))
p2 = Rf.predict(X_test_scaled)
print("The accuracy through the random forest method is:",accuracy_score(y_test, p2))




Training accuracy through the random forest method is: 1.0
The accuracy through the random forest method is: 1.0


In [145]:
# importing and building the Knn model 
knn = neighbors.KNeighborsClassifier()
knn.fit(X_train_scaled,y_train)
p_3=knn.predict(X_train_scaled)
print("Training accuracy through the Knn method is:",accuracy_score(y_train, p_3))
p3 = knn.predict(X_test_scaled)
print("The accuracy through the random forest method is:",accuracy_score(y_test, p3))


Training accuracy through the Knn method is: 0.9239130434782609
The accuracy through the random forest method is: 0.9130434782608695


In [146]:
# importing and building the Xgb model 
xgb = xgboost.XGBClassifier()
xgb.fit(X_train_scaled,y_train)
p_4=xgb.predict(X_train_scaled)
print("Training accuracy through the Xgb method is:",accuracy_score(y_train, p_3))
p4 = xgb.predict(X_test_scaled)
print("The accuracy through the Xgb method is:",accuracy_score(y_test, p3))


Training accuracy through the Xgb method is: 0.9239130434782609
The accuracy through the Xgb method is: 0.9130434782608695


Evaluating Performance of the Model

In [147]:
print(classification_report(y_test,p1))
print(confusion_matrix(y_test,p1))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00         3

    accuracy                           1.00        23
   macro avg       1.00      1.00      1.00        23
weighted avg       1.00      1.00      1.00        23

[[20  0]
 [ 0  3]]


In [148]:
print(classification_report(y_test,p2))
print(confusion_matrix(y_test,p2))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00         3

    accuracy                           1.00        23
   macro avg       1.00      1.00      1.00        23
weighted avg       1.00      1.00      1.00        23

[[20  0]
 [ 0  3]]


In [149]:
print(classification_report(y_test,p3))
print(confusion_matrix(y_test,p3))

              precision    recall  f1-score   support

           0       0.91      1.00      0.95        20
           1       1.00      0.33      0.50         3

    accuracy                           0.91        23
   macro avg       0.95      0.67      0.73        23
weighted avg       0.92      0.91      0.89        23

[[20  0]
 [ 2  1]]


In [150]:
print(classification_report(y_test,p4))
print(confusion_matrix(y_test,p4))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00         3

    accuracy                           1.00        23
   macro avg       1.00      1.00      1.00        23
weighted avg       1.00      1.00      1.00        23

[[20  0]
 [ 0  3]]


In [151]:
import pickle

In [152]:
with open('xgb_model.pkl', 'wb') as f:
    pickle.dump(xgb, f)