## Classification Models


In [3]:
##################################
# IMPORTING BASE AND API LIBRARIES
##################################

from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot


# Pre-processing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler


# model eval, & metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score


# Classification models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

#### Create target and features for train and test

In [None]:
train_array = final_df.values
test_array = final_test_df.values

In [None]:
Y_train = train_array[0:,0]
Y_test = test_array[0:,0]

In [None]:
X_train = train_array[0:,1:]
X_test = test_array[0:,1:]

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
X = X_train
Y = Y_train

#### Classification Algos

In [None]:
# Classification: Logisitc, LDA, k-nearest neighbors, Naive bayes, & SVM

models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
# evaluate each model in turn
results = []
names = []
best = -np.inf
best_model = None
for name, model in models:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, X, Y, cv=kfold, scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    if cv_results.mean() > best:
        best = cv_results.mean()
        best_model = model
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
# boxplot algorithm comparison
fig = pyplot.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()

In [None]:
model = best_model
model.fit(X_train, Y_train)


In [None]:
# Make predictions from the x test dataset
predictions = model.predict(X_test)

In [None]:
# Print the classification report
results = (classification_report(Y_test, predictions))
print("Classification Report")
print(results)

In [None]:
# Calculating the confusion matrix
cm = confusion_matrix(Y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = round(accuracy_score(Y_test, predictions),2)*100

In [None]:
print(f"Accuracy Score: {acc_score}%")