In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline

# Import from sklearn lib 
from sklearn.metrics import classification_report

# Import from pylearn_ml191 lib 
from pylearn_ml191.linear_regression import SoftmaxRegression
from pylearn_ml191.dimentional_reduction import PCA

# Fix random seed
np.random.seed(21)

import warnings
warnings.filterwarnings("ignore")

## Data Loader

In [2]:
plain_train_set = pd.read_csv("./mnist_data/train_set_plain.csv", header=None)
plain_val_set = pd.read_csv("./mnist_data/val_set_plain.csv", header=None)
plain_test_set = pd.read_csv("./mnist_data/val_set_plain.csv", header=None)

extracted_train_set = pd.read_csv("./mnist_data/train_set_extracted.csv", header=None)
extracted_val_set = pd.read_csv("./mnist_data/val_set_extracted.csv", header=None)
extracted_test_set = pd.read_csv("./mnist_data/val_set_extracted.csv", header=None)

In [3]:
plain_train_set = plain_train_set.values
plain_val_set = plain_val_set.values 
plain_test_set = plain_test_set.values

extracted_train_set = extracted_train_set.values 
extracted_val_set = extracted_val_set.values 
extracted_test_set = extracted_test_set.values

In [4]:
num_features_plain = plain_train_set.shape[1] - 1
num_features_extracted = extracted_train_set.shape[1] - 1 
print("Number original features: ", num_features_plain)
print("Number extracted features by CNN: ", num_features_extracted)

Number original features:  784
Number extracted features by CNN:  50


In [5]:
plain_train_features = plain_train_set[:, :num_features_plain]
plain_train_target = plain_train_set[:, num_features_plain]

plain_val_features = plain_val_set[:, :num_features_plain]
plain_val_target = plain_val_set[:, num_features_plain]

plain_test_features = plain_test_set[:, :num_features_plain]
plain_test_target = plain_test_set[:, num_features_plain]

extracted_train_features = extracted_train_set[:, :num_features_extracted]
extracted_train_target = extracted_train_set[:, num_features_extracted]
extracted_val_features = extracted_val_set[:, :num_features_extracted]
extracted_val_target = extracted_val_set[:, num_features_extracted]
extracted_test_features = extracted_test_set[:, :num_features_extracted]
extracted_test_target = extracted_test_set[:, num_features_extracted]

In [6]:
print("Plain data set:")
print("========================")
print("Train set: ")
print(plain_train_features.shape)
print(plain_train_target.shape)
print("------------------------")
print("Dev/Val set: ")
print(plain_val_features.shape)
print(plain_val_target.shape)
print("------------------------")
print("Test set: ")
print(plain_test_features.shape)
print(plain_test_target.shape)

print("\n")

print("Extracted data set:")
print("========================")
print("Train set: ")
print(extracted_train_features.shape)
print(extracted_train_target.shape)
print("------------------------")
print("Dev/Val set: ")
print(extracted_val_features.shape)
print(extracted_val_target.shape)
print("------------------------")
print("Test set: ")
print(extracted_test_features.shape)
print(extracted_test_target.shape)

Plain data set:
Train set: 
(50000, 784)
(50000,)
------------------------
Dev/Val set: 
(10000, 784)
(10000,)
------------------------
Test set: 
(10000, 784)
(10000,)


Extracted data set:
Train set: 
(50000, 50)
(50000,)
------------------------
Dev/Val set: 
(10000, 50)
(10000,)
------------------------
Test set: 
(10000, 50)
(10000,)


## Run on plain data set

In [7]:
%%time
classifier = SoftmaxRegression(num_classes=10, use_features_extractor=False)
history = classifier.fit(plain_train_features, plain_train_target, 
                         plain_val_features, plain_val_target,
                         max_steps=10000,
                         step_to_lr_decay=3000,
                         lr_decay=0.5,
                         batch_size=64, 
                         lr=4e-3, 
                         min_W_diff=1e-5,
                         verbose=200)

[Step: 200] Train-loss: 1.222657546183608, Val-loss: 1.2340570294907685
[Step: 400] Train-loss: 1.386364921940733, Val-loss: 0.9654684669068513
[Step: 600] Train-loss: 0.3025618962926608, Val-loss: 0.8825955847263796
[Step: 800] Train-loss: 0.7755798668761295, Val-loss: 0.845781594316727
[Step: 1000] Train-loss: 0.48550257053887996, Val-loss: 0.8123020901646454
[Step: 1200] Train-loss: 0.7509877575016007, Val-loss: 0.7683984914996647
[Step: 1400] Train-loss: 0.7192505239575575, Val-loss: 0.7458643381243852
[Step: 1600] Train-loss: 0.8960887533188935, Val-loss: 0.7532096705543584
[Step: 1800] Train-loss: 0.7474224004255632, Val-loss: 0.776563115729936
[Step: 2000] Train-loss: 0.7596355536195363, Val-loss: 0.703642314517599
[Step: 2200] Train-loss: 0.4560685727878856, Val-loss: 0.6668540436193755
[Step: 2400] Train-loss: 1.0406376094456204, Val-loss: 0.6554108398819152
[Step: 2600] Train-loss: 0.6933733690374054, Val-loss: 0.6338908414092711
[Step: 2800] Train-loss: 0.4907242358535622, V

In [8]:
predictions = classifier.eval(plain_test_features, categorical=True)
print(classification_report(plain_test_target, predictions))

              precision    recall  f1-score   support

         0.0       0.95      0.95      0.95       987
         1.0       0.94      0.96      0.95      1155
         2.0       0.87      0.88      0.87       972
         3.0       0.89      0.85      0.87      1027
         4.0       0.91      0.89      0.90      1012
         5.0       0.84      0.84      0.84       863
         6.0       0.94      0.94      0.94      1019
         7.0       0.91      0.91      0.91       999
         8.0       0.84      0.83      0.83       977
         9.0       0.85      0.87      0.86       989

    accuracy                           0.90     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.90      0.90      0.89     10000



## Run on features extracted from pretrained CNN on MNIST

In [9]:
%%time
classifier = SoftmaxRegression(num_classes=10, use_features_extractor=False)
history = classifier.fit(extracted_train_features, extracted_train_target, 
                         extracted_val_features, extracted_val_target, 
                         max_steps=10000,
                         step_to_lr_decay=3000,
                         lr_decay=0.5,
                         batch_size=64, 
                         lr=4e-3, 
                         min_W_diff=1e-5,
                         verbose=10)

[Step: 10] Train-loss: 2.431145220776398, Val-loss: 1.9128000192581567
[Step: 20] Train-loss: 0.33394817904258356, Val-loss: 0.38804332603425923
[Step: 30] Train-loss: 0.29347505734305235, Val-loss: 0.31534853296741905
[Step: 40] Train-loss: 0.4267453791784826, Val-loss: 0.3014880467849264
[Step: 50] Train-loss: 0.20049361397134605, Val-loss: 0.23789017839237156
[Step: 60] Train-loss: 0.1842431445806834, Val-loss: 0.19937038256606318
[Step: 70] Train-loss: 0.22556401885167557, Val-loss: 0.182598456381533
[Step: 80] Train-loss: 0.274340455274613, Val-loss: 0.17802297322595517
[Step: 90] Train-loss: 0.06415024941631156, Val-loss: 0.17159039247465468
[Step: 100] Train-loss: 0.17377152409144278, Val-loss: 0.1617344358056598
[Step: 110] Train-loss: 0.47413212547545613, Val-loss: 0.15059174531022215
[Step: 120] Train-loss: 0.4699488885223151, Val-loss: 0.15215742033897312
[Step: 130] Train-loss: 0.3110670438425659, Val-loss: 0.14325265468425377
[Step: 140] Train-loss: 0.2820642077628989, Val

In [10]:
predictions = classifier.eval(extracted_test_features, categorical=True)
print(classification_report(extracted_test_target, predictions))

              precision    recall  f1-score   support

         0.0       0.98      0.99      0.99       987
         1.0       0.98      0.97      0.98      1155
         2.0       0.96      0.98      0.97       972
         3.0       0.98      0.97      0.98      1027
         4.0       0.98      0.98      0.98      1012
         5.0       0.97      0.98      0.98       863
         6.0       0.98      0.99      0.98      1019
         7.0       0.96      0.98      0.97       999
         8.0       0.98      0.96      0.97       977
         9.0       0.97      0.97      0.97       989

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



## Dimentional Reduction 

In [11]:
%%time
pca = PCA(n_components=100)
plain_train_features_new = pca.fit_transform(plain_train_features)
plain_val_features_new = pca.transform(plain_val_features)
plain_test_features_new = pca.transform(plain_test_features)

CPU times: user 2.92 s, sys: 424 ms, total: 3.35 s
Wall time: 1.83 s


In [12]:
%%time
classifier = SoftmaxRegression(num_classes=10, use_features_extractor=False)
history = classifier.fit(plain_train_features_new, plain_train_target, 
                         plain_val_features_new, plain_val_target, 
                         max_steps=10000,
                         step_to_lr_decay=3000,
                         lr_decay=0.5,
                         batch_size=128, 
                         lr=4e-3, 
                         min_W_diff=1e-5,
                         verbose=200)

[Step: 200] Train-loss: (0.4676094858030694-0j), Val-loss: (0.7713511357410786+0j)
[Step: 400] Train-loss: (0.626555268064406-0j), Val-loss: (0.5968683793234283+0j)
[Step: 600] Train-loss: (0.7050300318266907-0j), Val-loss: (0.508990413939314+0j)
[Step: 800] Train-loss: (0.361955468353638-0j), Val-loss: (0.4460728663878713+0j)
[Step: 1000] Train-loss: (0.5230690161617236-0j), Val-loss: (0.44099704105884857+0j)
[Step: 1200] Train-loss: (0.4438966759612844-0j), Val-loss: (0.4024341596359742+0j)
[Step: 1400] Train-loss: (0.4666239023659501-0j), Val-loss: (0.38476903264658263+0j)
[Step: 1600] Train-loss: (0.39903517228672936-0j), Val-loss: (0.37861386610691705+0j)
[Step: 1800] Train-loss: (0.4146599616393418-0j), Val-loss: (0.37313172543151946+0j)
[Step: 2000] Train-loss: (0.3333583089214729-0j), Val-loss: (0.3741742390294824+0j)
[Step: 2200] Train-loss: (0.36241296991031174-0j), Val-loss: (0.37780923889727575+0j)
[Step: 2400] Train-loss: (0.3747339720222896-0j), Val-loss: (0.3935621279819

In [13]:
predictions = classifier.eval(plain_test_features_new, categorical=True)
print(classification_report(plain_test_target, predictions))

              precision    recall  f1-score   support

         0.0       0.95      0.97      0.96       987
         1.0       0.93      0.97      0.95      1155
         2.0       0.92      0.90      0.91       972
         3.0       0.91      0.90      0.91      1027
         4.0       0.91      0.93      0.92      1012
         5.0       0.90      0.84      0.87       863
         6.0       0.94      0.96      0.95      1019
         7.0       0.92      0.94      0.93       999
         8.0       0.88      0.84      0.86       977
         9.0       0.89      0.89      0.89       989

    accuracy                           0.92     10000
   macro avg       0.92      0.91      0.91     10000
weighted avg       0.92      0.92      0.92     10000

