# **Setup**

## Import notebooks

In [1]:
%%capture
# Note the python import here
import reuse, sys

# This is the Ipython hook
sys.meta_path.append(reuse.NotebookFinder())
from feature_selection import X_train, y_train, X_test, y_test

## Import libraries

In [2]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, StackingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.metrics import classification_report

# **Test models**

## SVM

In [3]:
model = svm.SVC(random_state=0).fit(X_train,y_train)
predictions = model.predict(X_test)
df = pd.DataFrame({'actual':y_test,'predicted':predictions})
result = classification_report(df['actual'],df['predicted'])
print(f"SVM\n{result}")

SVM
              precision    recall  f1-score   support

           0       0.81      0.95      0.87     11360
           1       0.67      0.29      0.41      3700

    accuracy                           0.79     15060
   macro avg       0.74      0.62      0.64     15060
weighted avg       0.77      0.79      0.76     15060



## KNN

In [4]:
np.random.seed(0)
model = KNeighborsClassifier().fit(X_train,y_train)
predictions = model.predict(X_test)
df = pd.DataFrame({'actual':y_test,'predicted':predictions})
result = classification_report(df['actual'],df['predicted'])
print(f"KNN\n{result}")

KNN
              precision    recall  f1-score   support

           0       0.92      0.82      0.87     11360
           1       0.59      0.79      0.68      3700

    accuracy                           0.82     15060
   macro avg       0.76      0.81      0.78     15060
weighted avg       0.84      0.82      0.82     15060



## Naive Bayes

In [5]:
model = GaussianNB().fit(X_train,y_train)
predictions = model.predict(X_test)
df = pd.DataFrame({'actual':y_test,'predicted':predictions})
result = classification_report(df['actual'],df['predicted'])
print(f"Naive Bayes\n{result}")

Naive Bayes
              precision    recall  f1-score   support

           0       0.87      0.90      0.88     11360
           1       0.65      0.58      0.61      3700

    accuracy                           0.82     15060
   macro avg       0.76      0.74      0.75     15060
weighted avg       0.82      0.82      0.82     15060



## AdaBoost

In [6]:
model = AdaBoostClassifier(random_state=0).fit(X_train,y_train)
predictions = model.predict(X_test)
df = pd.DataFrame({'actual':y_test,'predicted':predictions})
result = classification_report(df['actual'],df['predicted'])
print(f"Adaboost\n{result}")

Adaboost
              precision    recall  f1-score   support

           0       0.94      0.79      0.86     11360
           1       0.57      0.85      0.68      3700

    accuracy                           0.81     15060
   macro avg       0.76      0.82      0.77     15060
weighted avg       0.85      0.81      0.82     15060



## Bagging

In [7]:
model = BaggingClassifier(base_estimator=SVC(),n_estimators=2, random_state=0).fit(X_train,y_train)
predictions = model.predict(X_test)
df = pd.DataFrame({'actual':y_test,'predicted':predictions})
result = classification_report(df['actual'],df['predicted'])
print(f"Bagging\n{result}")

Bagging
              precision    recall  f1-score   support

           0       0.81      0.95      0.87     11360
           1       0.68      0.29      0.41      3700

    accuracy                           0.79     15060
   macro avg       0.74      0.62      0.64     15060
weighted avg       0.78      0.79      0.76     15060



## Stacking

In [8]:
model = StackingClassifier([('ab',AdaBoostClassifier(random_state=0)),('nb', GaussianNB())], final_estimator=LogisticRegression(random_state=0)).fit(X_train,y_train)
predictions = model.predict(X_test)
df = pd.DataFrame({'actual':y_test,'predicted':predictions})
result = classification_report(df['actual'],df['predicted'])
print(f"Stacking\n{result}") 

Stacking
              precision    recall  f1-score   support

           0       0.90      0.85      0.87     11360
           1       0.61      0.70      0.65      3700

    accuracy                           0.82     15060
   macro avg       0.75      0.78      0.76     15060
weighted avg       0.83      0.82      0.82     15060

