In [2]:
import numpy as np
from sklearn import svm, metrics
import gzip
import time

# Method to calculate the accuracy
def calcAccuracy(pred, actual):
    count = 0
    acc = 0.0
#   x = np.argmax(pred, axis=1)

    for i in range (0,pred.shape[0]):
        if(pred[i] == actual[i]):
            count+=1
            
    acc = (count*100)/pred.shape[0]
    return acc

t = time.time()

In [8]:
# 1.EXTRACTING THE MNIST DATASETS
total_img_train = 60000
total_img_test = 10000
img_dim = 28

# Reading the training data features from the MNIST dataset
print("********* EXTRACTING THE MNIST DATASET *********")
print("\n Reading Training Data Features ...")
X_train = gzip.open('train-images-idx3-ubyte.gz', 'r')
X_train.read(16)
flatten_size = img_dim*img_dim
X_train_buffer = X_train.read(img_dim * img_dim * total_img_train)
X_train_data = np.frombuffer(X_train_buffer, dtype=np.uint8).astype(np.float32)
x_train = X_train_data.reshape(total_img_train, img_dim * img_dim)

# Reading the training data labels from the MNIST dataset
print(" Reading Training Data Labels ...")
Y_train = gzip.open('train-labels-idx1-ubyte.gz','r')
Y_train.read(8)
Y_train_buffer = Y_train.read(1 * total_img_train)
y_train = np.frombuffer(Y_train_buffer, dtype=np.uint8).astype(np.int64)

# Reading the test data features from the MNIST dataset
print("\n Reading Test Data Features ...")
X_test = gzip.open('t10k-images-idx3-ubyte.gz', 'r')
X_test.read(16)
X_test_buffer = X_test.read(img_dim * img_dim * total_img_test)
X_test_data = np.frombuffer(X_test_buffer, dtype=np.uint8).astype(np.float32)
x_test = X_test_data.reshape(total_img_test, img_dim * img_dim)

# Reading the test data labels from the MNIST dataset
print(" Reading Test Data Labels ...")
Y_test = gzip.open('t10k-labels-idx1-ubyte.gz','r')
Y_test.read(8)
Y_test_buffer = Y_test.read(1 * total_img_test)
y_test = np.frombuffer(Y_test_buffer, dtype=np.uint8).astype(np.int64)

print("\n ********* DATASET EXTRACTION COMPLETE *********")

********* EXTRACTING THE MNIST DATASET *********

 Reading Training Data Features ...
 Reading Training Data Labels ...

 Reading Test Data Features ...
 Reading Test Data Labels ...

 ********* DATASET EXTRACTION COMPLETE *********


In [10]:
# 2.BUILDING THE MODEL CLASSIFIER
print("********* BUILDING THE MODEL CLASSIFIER *********")
# Initializing and training the SVC classifier on the training data
classifier = svm.LinearSVC()
classifier.fit(x_train, y_train)

# Fetching the classifier prediction
pred = classifier.predict(x_test)
print("\nTime:  ", time.time() - t)

# Calculating the accuracy of the model classifier
accuracy = calcAccuracy(pred, y_test)
print("Accuracy: ", accuracy)

print("\n********* BUILDING THE CLASSIFIER COMPLETE*********")

********* BUILDING THE MODEL CLASSIFIER *********

Time:   3939.7899131774902
Accuracy:  88.99

********* BUILDING THE CLASSIFIER COMPLETE*********




In [11]:
# 3.PERFORMING CROSS-VALIDATION AND HYPERPARAMETER TUNING
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

print("********* PERFORMING CROSS-VALIDATION AND HYPERPARAMETER TUNING *********")
# Defining the steps for the cross-validation
steps = [('scaler', StandardScaler()), ('SVM', SVC(kernel='poly'))]

# Pushing the steps to the pipeline
pipeline = Pipeline(steps)

# Defining different C and gamma param values for performing hyperparameter tuning
C_params = [0.001, 0.1, 0.5, 10, 100]
gamma_params = [0.01, 0.1, 1, 5, 10]
parameters = {'SVM__C':C_params, 'SVM__gamma':gamma_params}
#parameters = {'SVM__C': [0.001], 'SVM__gamma': [0.1]}

# Using GridSearch CV for finding the best param values
classifier = GridSearchCV(pipeline, param_grid=parameters, cv=5)

classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
accuracy = calcAccuracy(y_pred, y_test)

print("\n Time elapse (for Hyperparameter Tuning): ", time.time() - t)
print (" Best Parameter (found using GridSearchCV): ", classifier.best_params_)
print(" Score: ", classifier.score(x_test, y_test))
print("\n Accuracy (calculated using self-defined method): ", accuracy)
print("\n********* CROSS-VALIDATION AND HYPERPARAMETER TUNING COMPLETE *********")

********* PERFORMING CROSS-VALIDATION AND HYPERPARAMETER TUNING *********


KeyboardInterrupt: 