In [None]:
import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('bmh')

# Loading Test Data
X_test = np.load('data_test.npy').T
t_test = np.load('labels_test.npy')

print(X_test.shape, t_test.shape)

(2880, 90000) (2880,)


In [None]:
from sklearn.feature_selection import RFE
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from PIL import Image

from scipy import stats
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings("ignore")

In [None]:
X_test_sampled = np.empty((len(X_test),28*28))
for image in range(len(X_test)):
  X_test_sampled[image] = np.reshape(Image.fromarray(X_test[image].reshape(300,300)).resize((28,28)), (784,))

X_test.shape, X_test_sampled.shape

((2880, 90000), (2880, 784))

In [None]:
X_test_scaled = MinMaxScaler().fit_transform(X_test_sampled)

In [None]:
import joblib

rfe_log_reg = joblib.load('rfe_log_reg.pkl')

In [None]:
scores = cross_val_score(rfe_log_reg, X_test_scaled, t_test,
                        scoring='accuracy',
                        cv=5)

confidence = 0.95
stats.t.interval(confidence,
                 len(scores)-1,
                 loc = scores.mean(),
                 scale=scores.std(ddof=1)/np.sqrt(len(scores)))

(0.37744180338351324, 0.3982526410609312)

In [None]:
y_test = rfe_log_reg.predict(X_test_scaled)
print('Accuracy in Test:', accuracy_score(t_test, y_test)*100,'%')

Accuracy in Test: 42.395833333333336 %


In [None]:
rfe_l1 = joblib.load('rfe_l1.pkl')

In [None]:
scores = cross_val_score(rfe_l1, X_test_scaled, t_test,
                        scoring='accuracy',
                        cv=5)

confidence = 0.95
stats.t.interval(confidence,
                 len(scores)-1,
                 loc = scores.mean(),
                 scale=scores.std(ddof=1)/np.sqrt(len(scores)))

(nan, nan)

In [None]:
y_test = rfe_l1.predict(X_test_scaled).astype('int')
print('Accuracy in Test:', accuracy_score(t_test, y_test)*100,'%')

Accuracy in Test: 10.729166666666666 %


In [None]:
svm_PCA = joblib.load('svm_PCA.pkl')

In [None]:
scores = cross_val_score(svm_PCA, X_test_scaled, t_test,
                        scoring='accuracy',
                        cv=5)

confidence = 0.95
stats.t.interval(confidence,
                 len(scores)-1,
                 loc = scores.mean(),
                 scale=scores.std(ddof=1)/np.sqrt(len(scores)))

(0.40733407018583145, 0.437110374258613)

In [None]:
y_test = svm_PCA.predict(X_test_scaled)
print('Accuracy in Test:', accuracy_score(t_test, y_test)*100,'%')

Accuracy in Test: 10.243055555555555 %


In [None]:
grid_pipe_manifold = joblib.load('grid_pipe_manifold.pkl')

In [None]:
scores = cross_val_score(grid_pipe_manifold, X_test_scaled, t_test,
                        scoring='accuracy',
                        cv=5)

confidence = 0.95
stats.t.interval(confidence,
                 len(scores)-1,
                 loc = scores.mean(),
                 scale=scores.std(ddof=1)/np.sqrt(len(scores)))

(0.11291103617799035, 0.11972785271089854)

In [None]:
y_test = grid_pipe_manifold.predict(X_test_scaled)
print('Accuracy in Test:', accuracy_score(t_test, y_test)*100,'%')

Accuracy in Test: 12.083333333333334 %
