In [1]:
### Comparison of Other Methods

In [2]:
# load necessary modules
from scipy import io 
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse.linalg import svds
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [3]:
# load datasets and matrices from usps.mat
data = io.loadmat('usps.mat')
train_patterns = data['train_patterns']
test_patterns = data['test_patterns']
train_labels = data['train_labels']
test_labels = data['test_labels']

In [4]:
# check shape of loaded matrices
print(train_patterns.shape)
print(test_patterns.shape)
print(train_labels.shape)
print(test_labels.shape) # they look as outlined

(256, 4649)
(256, 4649)
(10, 4649)
(10, 4649)


In [5]:
# data preparation
y_train = np.argmax(train_labels, axis=0) # index values corresponding to '1' to get true labels
X_train = pd.DataFrame(train_patterns.T) # transpose to convert each row into a single digit entry
y_train = pd.DataFrame(y_train) 

y_test = np.argmax(test_labels, axis=0) # same for test data
X_test = pd.DataFrame(test_patterns.T)
y_test = pd.DataFrame(y_test)

In [6]:
### Logistic Regression

In [7]:
model = LogisticRegression() # create a logistic regression model
model.fit(X_train, y_train)  # fit the model on the training data
y_pred = model.predict(X_test) # make predictions using testing data
accuracy = accuracy_score(y_test, y_pred) # get accuracy score
conf_matrix = confusion_matrix(y_test, y_pred) # get confusion matrix 

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [8]:
print(accuracy) # print 

0.9425682942568294


In [9]:
print(conf_matrix) # print

[[757   1   6   3   3   7   1   0   7   1]
 [  0 641   0   1   0   0   3   0   2   0]
 [  2   1 415  12  11   3   2   3   4   1]
 [  5   2   9 378   2   9   0   5   8   0]
 [  4   3   8   0 410   3   5   1   2   7]
 [  1   1   2   9   4 319   9   0   7   3]
 [  2   1   3   0   3   3 399   0   3   0]
 [  0   0   0   1   5   0   0 386   1   9]
 [  4   1  10   5   4   3   2   2 300   0]
 [  0   2   1   1   6   1   0  10   1 377]]


In [10]:
### Support Vector Machines
svm_model = SVC(kernel='linear') # using linear kernel for SVM (can use others such as polynomial as well)
svm_model.fit(X_train, y_train) # fit the model on the training data
y_pred = svm_model.predict(X_test) # predict
accuracy = accuracy_score(y_test, y_pred) # accuracy rates
conf_matrix = confusion_matrix(y_test, y_pred) # confusion matrix 

  y = column_or_1d(y, warn=True)


In [11]:
print(accuracy)

0.9518175951817596


In [12]:
print(conf_matrix)

[[766   0   5   2   3   5   2   1   2   0]
 [  0 643   0   1   0   0   2   0   1   0]
 [  3   1 429   6   7   4   0   1   2   1]
 [  3   0  15 380   2  12   0   0   5   1]
 [  3   3   9   0 418   3   0   3   1   3]
 [  3   0   3  11   3 328   5   0   2   0]
 [  5   0   3   0   2   5 399   0   0   0]
 [  1   1   1   0   6   1   0 383   2   7]
 [  5   1   6   7   3   1   1   2 305   0]
 [  0   0   5   2   4   2   0  12   0 374]]
