# Data Dictionary:
## **1. Download Data**
> train_dataset    |   torchvision.datasets.mnist.FashionMNIST<br>
eval_dataset       |   torchvision.datasets.mnist.FashionMNIST

## **2. Prepare Data**


> train_images     |   numpy array (60000, 784)
<br>train_labels   |   numpy array (60000, )
<br>eval_images    |   numpy array (10000, 784)
<br>eval_labels    |   numpy array (10000, )
<br> standardized_train_images | numpy array (60000, 784)
<br> standardized_eval_images |   numpy array (10000, 784)




# **To-Do**
1. Test PCA with sharpened data
2. Test Feature selection on PCs

Add in F1 Score & Confusion matrix

In [0]:
# a = []
# while(1):
#     a.append('1')

# Importing of Libraries

In [3]:
import torchvision
from PIL import Image

import pandas as pd
import numpy as np
import scipy as sp
from scipy import ndimage
from scipy.stats import norm
import scipy.signal as sig

from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from skimage import io
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import learning_curve
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV

from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import time, random, math


  import pandas.util.testing as tm


In [0]:
%matplotlib inline

# Functions

# Data Extraction

In [5]:
# download the Fashion MNIST training data
train_dataset = torchvision.datasets.FashionMNIST(root='./', train=True, download=True)

# download the Fashion MNIST evaluation data
eval_dataset = torchvision.datasets.FashionMNIST(root='./', train=False, download=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./FashionMNIST/raw/train-images-idx3-ubyte.gz to ./FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw
Processing...
Done!





In [0]:
labels_dict = {0: "T-shirt/top",
               1: "Trouser",
               2: "Pullover",
               3: "Dress",
               4: "Coat",
               5: "Sandal",
               6: "Shirt",
               7: "Sneaker",
               8: "Bag",
               9: "Ankle boot"}

# Data Preparation

**Reshape data from 28x28 into 784**

In [0]:
train_images = train_dataset.data.numpy().reshape(-1,28*28)
train_labels = train_dataset.targets.data.numpy()

In [0]:
train_images.shape

(60000, 784)

In [0]:
train_labels.shape

(60000,)

In [0]:
eval_images = eval_dataset.data.numpy().reshape(-1,28*28)
eval_labels = eval_dataset.targets.data.numpy()

In [0]:
eval_images.shape

(10000, 784)

In [0]:
eval_labels.shape

(10000,)

**Standardize data**

In [0]:
standardized_train_images = StandardScaler().fit_transform(train_images)
standardized_eval_images = StandardScaler().fit_transform(eval_images)

# Support Vector Machine

In [0]:
# optimal number for random_state
random_seed = 42

**Run SVM on Untreated Data**

In [0]:
svm = SVC(kernel='rbf', random_state=random_seed)

In [0]:
# cvs = cross_val_score(svm, train_images, train_labels,scoring='precision_macro')
# print np.mean(cvs)

In [10]:
%%time

svm.fit(train_images,train_labels)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=42, shrinking=True, tol=0.001,
    verbose=False)

In [11]:
y_pred_1 = svm.predict(eval_images)

print('Model classification accuracy: {}%'.format(str(metrics.accuracy_score(eval_labels, y_pred_1) * 100)))
f1 = f1_score(eval_labels, y_pred_1,average='macro')
print('F1 score: %f' % f1)

Model classification accuracy: 88.28%
F1 score: 0.882265


**Run SVM on Standardized Data**

In [0]:
svm = SVC(kernel='rbf', random_state=random_seed)

In [0]:
cvs = cross_val_score(svm, train_images, train_labels,scoring='precision_macro')
print(np.mean(cvs))

0.8890649801213758


In [13]:
%%time

svm.fit(standardized_train_images,train_labels)

CPU times: user 14min 35s, sys: 163 ms, total: 14min 35s
Wall time: 14min 35s


SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=42, shrinking=True, tol=0.001,
    verbose=False)

In [14]:
y_pred_2 = svm.predict(standardized_eval_images)

print('Model classification accuracy: {}%'.format(str(metrics.accuracy_score(eval_labels, y_pred_2) * 100)))
f1 = f1_score(eval_labels, y_pred_2,average='macro')
print('F1 score: %f' % f1)

Model classification accuracy: 88.36%
F1 score: 0.882864


**Run SVM with PCA**

In [0]:
pca = PCA()

pca.n_components = 11

pca_train_images = pca.fit_transform(standardized_train_images)
pca_eval_images = pca.fit_transform(standardized_eval_images)

In [0]:
svm = SVC(kernel='rbf', C=1000, random_state=random_seed)

In [17]:
%%time

svm.fit(pca_train_images,train_labels)

CPU times: user 3min 4s, sys: 327 ms, total: 3min 4s
Wall time: 3min 4s


SVC(C=1000, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=42, shrinking=True, tol=0.001,
    verbose=False)

In [18]:
y_pred_3 = svm.predict(pca_eval_images)

print('Model classification accuracy: {}%'.format(str(metrics.accuracy_score(eval_labels, y_pred_3) * 100)))
f1 = f1_score(eval_labels, y_pred_3,average='macro')
print('F1 score: %f' % f1)

Model classification accuracy: 80.53%
F1 score: 0.803440


**SVM GridSearchCV**

Tuning RBF C Values

In [0]:
c_values = [0.000001, 0.0001, 0.1, 1, 100, 10000]
gamma_values = ['auto','scale']
degree_values = np.arange(1, 6, 1)
# kernel_types = ['linear','rbf','poly','sigmoid']
# param_grid = dict(kernel=kernel_types, gamma=gamma_range, C=c_range,degree=degree_values)
# tuning_param_grid = [{'kernel':['linear'], 'C':c_values}]
tuning_param_grid = [{'kernel':['rbf'], 'C':c_values, 'gamma':gamma_values}]
                    #  {'kernel':['poly'],'degree':degree_values, 'C':c_values, 'gamma':gamma_values}]
                    #  {'kernel':['sigmoid'], 'C':c_values, 'gamma':gamma_values}]
svm = SVC()
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=random_seed)

In [20]:
%%time

grid = GridSearchCV(svm, param_grid=tuning_param_grid, cv=cv, verbose=10, n_jobs=-1)
grid.fit(pca_train_images,train_labels)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 14.5min
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed: 21.8min
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 36.5min
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 42.1min
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 50.2min
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 59.2min
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 69.5min
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 102.7min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 102.7min finished


CPU times: user 1min 3s, sys: 421 ms, total: 1min 4s
Wall time: 1h 43min 42s


In [22]:
print ("The best classifier is: " , grid.best_estimator_)
print ('Score of best classifier :', grid.score(pca_eval_images,eval_labels))

The best classifier is:  SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
Score of best classifier : 0.8112


In [0]:
# grid.best_param.get("kernel")
# grid.best_param.get("gamma")
# grid.best_param.get("C")
# grid.best_param.get("degree")
# grid.best_param

In [23]:
for param, score in zip(grid.cv_results_['params'], grid.cv_results_['mean_test_score']):
    print(param, score)

{'C': 1e-06, 'gamma': 'auto', 'kernel': 'rbf'} 0.32330000000000003
{'C': 1e-06, 'gamma': 'scale', 'kernel': 'rbf'} 0.6828666666666667
{'C': 0.0001, 'gamma': 'auto', 'kernel': 'rbf'} 0.32330000000000003
{'C': 0.0001, 'gamma': 'scale', 'kernel': 'rbf'} 0.6828666666666667
{'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'} 0.6247166666666666
{'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'} 0.7956166666666667
{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'} 0.7856500000000001
{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'} 0.82825
{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'} 0.7752666666666667
{'C': 100, 'gamma': 'scale', 'kernel': 'rbf'} 0.8544333333333334
{'C': 10000, 'gamma': 'auto', 'kernel': 'rbf'} 0.7752333333333334
{'C': 10000, 'gamma': 'scale', 'kernel': 'rbf'} 0.8441000000000001


Increase C tuning of RBF C parameter

In [0]:
c_values = [150,175,200,225,250]
gamma_values = ['scale']
degree_values = np.arange(1, 6, 1)
# kernel_types = ['linear','rbf','poly','sigmoid']
# param_grid = dict(kernel=kernel_types, gamma=gamma_range, C=c_range,degree=degree_values)
# tuning_param_grid = [{'kernel':['linear'], 'C':c_values}]
tuning_param_grid = [{'kernel':['rbf'], 'C':c_values, 'gamma':gamma_values}]

In [35]:
%%time

grid = GridSearchCV(svm, param_grid=tuning_param_grid, cv=cv, verbose=10, n_jobs=-1)
grid.fit(pca_train_images,train_labels)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  7.7min
[Parallel(n_jobs=-1)]: Done  21 out of  25 | elapsed:  9.4min remaining:  1.8min
[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed: 10.6min finished


CPU times: user 1min 14s, sys: 310 ms, total: 1min 14s
Wall time: 11min 48s


In [36]:
print ("The best classifier is: " , grid.best_estimator_)
print ('Score of best classifier :', grid.score(pca_eval_images,eval_labels))

The best classifier is:  SVC(C=150, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
Score of best classifier : 0.81


In [37]:
for param, score in zip(grid.cv_results_['params'], grid.cv_results_['mean_test_score']):
    print(param, score)

{'C': 150, 'gamma': 'scale', 'kernel': 'rbf'} 0.8548333333333333
{'C': 175, 'gamma': 'scale', 'kernel': 'rbf'} 0.8547
{'C': 200, 'gamma': 'scale', 'kernel': 'rbf'} 0.8546166666666666
{'C': 225, 'gamma': 'scale', 'kernel': 'rbf'} 0.8543666666666667
{'C': 250, 'gamma': 'scale', 'kernel': 'rbf'} 0.8543833333333334


Tuning Poly Parameters

In [0]:
c_values = [0.00001, 0.1, 1, 150, 1000]
gamma_values = ['scale']
degree_values = np.arange(1, 6, 1)
# kernel_types = ['linear','rbf','poly','sigmoid']
# param_grid = dict(kernel=kernel_types, gamma=gamma_range, C=c_range,degree=degree_values)
# tuning_param_grid = [{'kernel':['linear'], 'C':c_values}]
tuning_param_grid = [{'kernel':['poly'], 'C':c_values, 'gamma':gamma_values, 'degree':degree_values}]

In [40]:
%%time

poly_grid = GridSearchCV(svm, param_grid=tuning_param_grid, cv=cv, verbose=10, n_jobs=-1)
poly_grid.fit(pca_train_images,train_labels)

Fitting 5 folds for each of 25 candidates, totalling 125 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 11.0min
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed: 16.5min
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 27.2min
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 33.0min
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 35.9min
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 39.0min
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 42.1min
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed: 44.2min
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed: 48.4min
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed: 55.1min
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed: 73.4min
[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed: 132.1min finished


CPU times: user 9min 23s, sys: 488 ms, total: 9min 24s
Wall time: 2h 21min 31s


In [41]:
print ("The best classifier is: " , poly_grid.best_estimator_)
print ('Score of best classifier :', poly_grid.score(pca_eval_images,eval_labels))

The best classifier is:  SVC(C=1000, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
Score of best classifier : 0.7994


In [42]:
for param, score in zip(poly_grid.cv_results_['params'], poly_grid.cv_results_['mean_test_score']):
    print(param, score)

{'C': 1e-05, 'degree': 1, 'gamma': 'scale', 'kernel': 'poly'} 0.5223000000000001
{'C': 1e-05, 'degree': 2, 'gamma': 'scale', 'kernel': 'poly'} 0.4073666666666667
{'C': 1e-05, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'} 0.3287833333333333
{'C': 1e-05, 'degree': 4, 'gamma': 'scale', 'kernel': 'poly'} 0.21523333333333333
{'C': 1e-05, 'degree': 5, 'gamma': 'scale', 'kernel': 'poly'} 0.17116666666666663
{'C': 0.1, 'degree': 1, 'gamma': 'scale', 'kernel': 'poly'} 0.7710166666666668
{'C': 0.1, 'degree': 2, 'gamma': 'scale', 'kernel': 'poly'} 0.7806833333333334
{'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'} 0.7810666666666667
{'C': 0.1, 'degree': 4, 'gamma': 'scale', 'kernel': 'poly'} 0.7496166666666666
{'C': 0.1, 'degree': 5, 'gamma': 'scale', 'kernel': 'poly'} 0.7256333333333334
{'C': 1, 'degree': 1, 'gamma': 'scale', 'kernel': 'poly'} 0.7885333333333333
{'C': 1, 'degree': 2, 'gamma': 'scale', 'kernel': 'poly'} 0.8109
{'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'

In [0]:
c_values = [750, 1000, 2000]
gamma_values = ['scale']
# degree_values = np.arange(1, 6, 1)
degree_values = [3]
# kernel_types = ['linear','rbf','poly','sigmoid']
# param_grid = dict(kernel=kernel_types, gamma=gamma_range, C=c_range,degree=degree_values)
# tuning_param_grid = [{'kernel':['linear'], 'C':c_values}]
tuning_param_grid = [{'kernel':['poly'], 'C':c_values, 'gamma':gamma_values, 'degree':degree_values}]

In [18]:
%%time

poly_grid_2 = GridSearchCV(svm, param_grid=tuning_param_grid, cv=cv, verbose=10, n_jobs=-1)
poly_grid_2.fit(pca_train_images,train_labels)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 16.1min
[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed: 30.9min remaining: 15.4min
[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed: 41.9min remaining: 10.5min
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed: 54.7min finished


CPU times: user 18min 33s, sys: 329 ms, total: 18min 33s
Wall time: 1h 13min 13s


In [19]:
print ("The best classifier is: " , poly_grid_2.best_estimator_)
print ('Score of best classifier :', poly_grid_2.score(pca_eval_images,eval_labels))

The best classifier is:  SVC(C=2000, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
Score of best classifier : 0.7979


In [21]:
for param, score in zip(poly_grid_2.cv_results_['params'], poly_grid_2.cv_results_['mean_test_score']):
    print(param, score)

{'C': 750, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'} 0.84555
{'C': 1000, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'} 0.8456833333333333
{'C': 2000, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'} 0.8459


In [0]:
c_values = [2000, 5000, 10000]
gamma_values = ['scale']
# degree_values = np.arange(1, 6, 1)
degree_values = [3]
# kernel_types = ['linear','rbf','poly','sigmoid']
# param_grid = dict(kernel=kernel_types, gamma=gamma_range, C=c_range,degree=degree_values)
# tuning_param_grid = [{'kernel':['linear'], 'C':c_values}]
tuning_param_grid = [{'kernel':['poly'], 'C':c_values, 'gamma':gamma_values, 'degree':degree_values}]

In [0]:
%%time

poly_grid_3 = GridSearchCV(svm, param_grid=tuning_param_grid, cv=cv, verbose=10, n_jobs=-1)
poly_grid_3.fit(pca_train_images,train_labels)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 42.5min


In [0]:
print ("The best classifier is: " , poly_grid_3.best_estimator_)
print ('Score of best classifier :', poly_grid_3.score(pca_eval_images,eval_labels))

In [0]:
for param, score in zip(poly_grid_3.cv_results_['params'], poly_grid_3.cv_results_['mean_test_score']):
    print(param, score)

**Recursive feature elimination**


m = RFECV(RandomForestClassifier(), scoring='accuracy')
m.fit(X, y)