# Importing the libraries

In [None]:
import numpy as np
import pandas as pd
from keras.datasets import mnist
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, StackingClassifier
from sklearn.metrics import classification_report, precision_recall_fscore_support

# Preprocessing

## Importing the dataset

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


## Data reshaping

In [None]:
X_train = X_train.reshape(X_train.shape[0],-1)
X_test = X_test.reshape(X_test.shape[0],-1)

## Data normalization

In [None]:
X_train = X_train / 255.0
X_test = X_test / 255.0

## Generating sample dataset

In [None]:
classes = dict()
for i in range(len(np.unique(y_train))):
  classes[i] = np.where(y_train == i)[0]
X_train_sample = []
y_train_sample = []
for key in classes:
  length = round(len(classes[key]) / 10)
  for i,index in enumerate(classes[key]):
    if i >= length:
      break
    else:
      X_train_sample.append(X_train[index])
      y_train_sample.append(y_train[index])

# Training the classification model on the Training set

## Default Decision Tree

In [None]:
classifier_DT = DecisionTreeClassifier()
classifier_DT.fit(X_train_sample, y_train_sample)

DecisionTreeClassifier()

## Default Sigmoid SVC 

In [None]:
classifier_SVC = SVC(kernel = 'sigmoid')
classifier_SVC.fit(X_train_sample, y_train_sample)

SVC(kernel='sigmoid')

## Default Logistic Regression

In [None]:
classifier_LR = LogisticRegression()
classifier_LR.fit(X_train_sample, y_train_sample)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression()

## Default KNN

In [None]:
classifier_KNN = KNeighborsClassifier(n_neighbors = 245)
classifier_KNN.fit(X_train_sample, y_train_sample)

KNeighborsClassifier(n_neighbors=245)

## Bagging

In [None]:
classifier_bag = BaggingClassifier(base_estimator = DecisionTreeClassifier(), n_estimators=10)
classifier_bag.fit(X_train_sample, y_train_sample)

BaggingClassifier(base_estimator=DecisionTreeClassifier())

## Stacking

In [None]:
estimators = [('DT', DecisionTreeClassifier()), ('SVC', SVC(kernel = 'sigmoid')),('LR',LogisticRegression()),('KNN',KNeighborsClassifier(n_neighbors = 245))]
classifier_stack = StackingClassifier(estimators = estimators, final_estimator=LogisticRegression(), cv=10)
classifier_stack.fit(X_train_sample, y_train_sample)

## Adaptive Boosting

In [None]:
classifier_Ada = AdaBoostClassifier(base_estimator = DecisionTreeClassifier(), n_estimators=10)
classifier_Ada.fit(X_train_sample, y_train_sample)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=10)

# Models evaluation

## Default Decision Tree

In [None]:
y_pred_DT = classifier_DT.predict(X_test)
score_DT = round(precision_recall_fscore_support(y_test, y_pred_DT, average = 'weighted')[2]*100,2)
print(f"Decision Tree F1-score: {score_DT}%")

Decision Tree F1-score: 77.86%


## Default Sigmoid SVC 

In [None]:
y_pred_SVC = classifier_SVC.predict(X_test)
score_SVC = round(precision_recall_fscore_support(y_test, y_pred_SVC, average = 'weighted')[2]*100,2)
print(f"Sigmoid SVC F1-score: {score_SVC}%")

Sigmoid SVC F1-score: 84.08%


## Default Logistic Regression

In [None]:
y_pred_LR = classifier_LR.predict(X_test)
score_LR = round(precision_recall_fscore_support(y_test, y_pred_LR, average = 'weighted')[2]*100,2)
print(f"Logistic regression F1-score: {score_LR}%")

Logistic regression F1-score: 89.69%


## Default KNN

In [None]:
y_pred_KNN = classifier_KNN.predict(X_test)
score_KNN = round(precision_recall_fscore_support(y_test, y_pred_KNN, average = 'weighted')[2]*100,2)
print(f"KNN F1-score: {score_KNN}%")

KNN F1-score: 80.6%


## Bagging

In [None]:
y_pred_bag = classifier_bag.predict(X_test)
score_bag = round(precision_recall_fscore_support(y_test, y_pred_bag, average = 'weighted')[2]*100,2)
print(f"Bagging F1-score: {score_bag}%")

Bagging F1-score: 89.55%


## Stacking

In [None]:
y_pred_stack = classifier_stack.predict(X_test)
score_stack = round(precision_recall_fscore_support(y_test, y_pred_stack, average = 'weighted')[2]*100,2)
print(f"Stacking F1-score: {score_stack}%")

Stacking F1-score: 91.27%


## Adaptive Boosting

In [None]:
y_pred_Ada = classifier_Ada.predict(X_test)
score_Ada = round(precision_recall_fscore_support(y_test, y_pred_Ada, average = 'weighted')[2]*100,2)
print(f"AdaBoost F1-score: {score_Ada}%")

AdaBoost F1-score: 78.35%


## Results comparison

In [None]:
f1_scores = {'Decision Tree': str(score_DT)+'%',
           'Sigmoid SVC': str(score_SVC)+'%',
           'Logistic regression': str(score_LR)+'%',
           'KNN': str(score_KNN)+'%',
           'Bagging': str(score_bag)+'%',
           'Stacking': str(score_stack)+'%',
           'AdaBoost': str(score_Ada)+'%'}
df = pd.DataFrame(data=f1_scores, index=['F1-scores'])
df

Unnamed: 0,Decision Tree,Sigmoid SVC,Logistic regression,KNN,Bagging,Stacking,AdaBoost
F1-scores,77.86%,84.08%,89.69%,80.6%,89.55%,91.27%,78.35%
