In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
dataset = pd.read_csv("dataset.csv")

In [3]:
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,48,49,50,51,52,53,54,55,56,57
0,0.0,0.64,0.64,0.0,0.32,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.778,0.0,0.0,3.756,61.0,278.0,1.0
1,0.21,0.28,0.5,0.0,0.14,0.28,0.21,0.07,0.0,0.94,...,0.0,0.132,0.0,0.372,0.18,0.048,5.114,101.0,1028.0,1.0
2,0.06,0.0,0.71,0.0,1.23,0.19,0.19,0.12,0.64,0.25,...,0.01,0.143,0.0,0.276,0.184,0.01,9.821,485.0,2259.0,1.0
3,0.0,0.0,0.0,0.0,0.63,0.0,0.31,0.63,0.31,0.63,...,0.0,0.137,0.0,0.137,0.0,0.0,3.537,40.0,191.0,1.0
4,0.0,0.0,0.0,0.0,0.63,0.0,0.31,0.63,0.31,0.63,...,0.0,0.135,0.0,0.135,0.0,0.0,3.537,40.0,191.0,1.0


In [4]:
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1:].values

In [5]:
print(X.shape, Y.shape)

(4601, 57) (4601, 1)


In [6]:
sc = StandardScaler()
X_norm = sc.fit_transform(X)

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X_norm, Y, test_size = 0.20, random_state = 0)

In [8]:
lr_model = LogisticRegression()
lr_model.fit(X_train, Y_train)
Y_pred_lr = lr_model.predict(X_test)

  y = column_or_1d(y, warn=True)


In [9]:
results = confusion_matrix(Y_test, Y_pred_lr)
acc_score = accuracy_score(Y_test, Y_pred_lr)
class_report = classification_report(Y_test, Y_pred_lr)
print("Confusion Matrix:\n", results)
print("Accuracy: ", acc_score)

Confusion Matrix:
 [[505  33]
 [ 49 334]]
Accuracy:  0.9109663409337676


In [10]:
dt_model = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
dt_model.fit(X_train, Y_train)
Y_pred_dt = dt_model.predict(X_test)

In [11]:
results = confusion_matrix(Y_test, Y_pred_dt)
acc_score = accuracy_score(Y_test, Y_pred_dt)
class_report = classification_report(Y_test, Y_pred_dt)
print("Confusion Matrix:\n", results)
print("Accuracy: ", acc_score)

Confusion Matrix:
 [[500  38]
 [ 43 340]]
Accuracy:  0.9120521172638436


In [12]:
nb_model = GaussianNB()
nb_model.fit(X_train, Y_train)
Y_pred_nb = nb_model.predict(X_test)

  y = column_or_1d(y, warn=True)


In [13]:
results = confusion_matrix(Y_test, Y_pred_nb)
acc_score = accuracy_score(Y_test, Y_pred_nb)
class_report = classification_report(Y_test, Y_pred_nb)
print("Confusion Matrix:\n", results)
print("Accuracy: ", acc_score)

Confusion Matrix:
 [[382 156]
 [ 28 355]]
Accuracy:  0.8002171552660152


In [14]:
rf_model = RandomForestClassifier(n_estimators = 15, criterion = 'entropy', random_state = 0)
rf_model.fit(X_train, Y_train)
Y_pred_rf = rf_model.predict(X_test)

  


In [15]:
results = confusion_matrix(Y_test, Y_pred_rf)
acc_score = accuracy_score(Y_test, Y_pred_rf)
class_report = classification_report(Y_test, Y_pred_nb)
print("Confusion Matrix:\n", results)
print("Accuracy: ", acc_score)

Confusion Matrix:
 [[518  20]
 [ 29 354]]
Accuracy:  0.9467969598262758


In [16]:
knn_model = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
knn_model.fit(X_train, Y_train)
Y_pred_knn = knn_model.predict(X_test)

  


In [17]:
results = confusion_matrix(Y_test, Y_pred_knn)
acc_score = accuracy_score(Y_test, Y_pred_knn)
class_report = classification_report(Y_test, Y_pred_knn)
print("Confusion Matrix:\n", results)
print("Accuracy: ", acc_score)

Confusion Matrix:
 [[510  28]
 [ 50 333]]
Accuracy:  0.9153094462540716


In [18]:
svm_model = SVC(kernel = 'rbf', random_state = 0)
svm_model.fit(X_train, Y_train)
Y_pred_svm = svm_model.predict(X_test)

  y = column_or_1d(y, warn=True)


In [19]:
results = confusion_matrix(Y_test, Y_pred_svm)
acc_score = accuracy_score(Y_test, Y_pred_svm)
class_report = classification_report(Y_test, Y_pred_svm)
print("Confusion Matrix:\n", results)
print("Accuracy: ", acc_score)

Confusion Matrix:
 [[513  25]
 [ 43 340]]
Accuracy:  0.9261672095548317


In [20]:
Y_pred_ensemble = (Y_pred_lr + Y_pred_dt + Y_pred_nb + Y_pred_rf + Y_pred_knn + Y_pred_svm)/6

In [21]:
Y_pred_ensemble = (Y_pred_ensemble>=0.5)*1

In [22]:
results = confusion_matrix(Y_test, Y_pred_ensemble)
acc_score = accuracy_score(Y_test, Y_pred_ensemble)
class_report = classification_report(Y_test, Y_pred_ensemble)
print("Confusion Matrix:\n", results)
print("Accuracy: ", acc_score)

Confusion Matrix:
 [[513  25]
 [ 27 356]]
Accuracy:  0.9435396308360477
