In [4]:
import numpy as np
from scipy.io import loadmat
from scipy.optimize import minimize
#import matplotlib.pyplot as plt
from sklearn import metrics

def preprocess():
    """ 
     Input:
     Although this function doesn't have any input, you are required to load
     the MNIST data set from file 'mnist_all.mat'.

     Output:
     train_data: matrix of training set. Each row of train_data contains 
       feature vector of a image
     train_label: vector of label corresponding to each image in the training
       set
     validation_data: matrix of training set. Each row of validation_data 
       contains feature vector of a image
     validation_label: vector of label corresponding to each image in the 
       training set
     test_data: matrix of training set. Each row of test_data contains 
       feature vector of a image
     test_label: vector of label corresponding to each image in the testing
       set
    """

    mat = loadmat('mnist_all.mat')  # loads the MAT object as a Dictionary

    n_feature = mat.get("train1").shape[1]
    n_sample = 0
    for i in range(10):
        n_sample = n_sample + mat.get("train" + str(i)).shape[0]
    n_validation = 1000
    n_train = n_sample - 10 * n_validation

    # Construct validation data
    validation_data = np.zeros((10 * n_validation, n_feature))
    for i in range(10):
        validation_data[i * n_validation:(i + 1) * n_validation, :] = mat.get("train" + str(i))[0:n_validation, :]

    # Construct validation label
    validation_label = np.ones((10 * n_validation, 1))
    for i in range(10):
        validation_label[i * n_validation:(i + 1) * n_validation, :] = i * np.ones((n_validation, 1))

    # Construct training data and label
    train_data = np.zeros((n_train, n_feature))
    train_label = np.zeros((n_train, 1))
    temp = 0
    for i in range(10):
        size_i = mat.get("train" + str(i)).shape[0]
        train_data[temp:temp + size_i - n_validation, :] = mat.get("train" + str(i))[n_validation:size_i, :]
        train_label[temp:temp + size_i - n_validation, :] = i * np.ones((size_i - n_validation, 1))
        temp = temp + size_i - n_validation

    # Construct test data and label
    n_test = 0
    for i in range(10):
        n_test = n_test + mat.get("test" + str(i)).shape[0]
    test_data = np.zeros((n_test, n_feature))
    test_label = np.zeros((n_test, 1))
    temp = 0
    for i in range(10):
        size_i = mat.get("test" + str(i)).shape[0]
        test_data[temp:temp + size_i, :] = mat.get("test" + str(i))
        test_label[temp:temp + size_i, :] = i * np.ones((size_i, 1))
        temp = temp + size_i

    # Delete features which don't provide any useful information for classifiers
    sigma = np.std(train_data, axis=0)
    index = np.array([])
    for i in range(n_feature):
        if (sigma[i] > 0.001):
            index = np.append(index, [i])
    train_data = train_data[:, index.astype(int)]
    validation_data = validation_data[:, index.astype(int)]
    test_data = test_data[:, index.astype(int)]

    # Scale data to 0 and 1
    train_data /= 255.0
    validation_data /= 255.0
    test_data /= 255.0

    return train_data, train_label, validation_data, validation_label, test_data, test_label


def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def blrObjFunction(initialWeights, *args):
    """
    blrObjFunction computes 2-class Logistic Regression error function and
    its gradient.

    Input:
        initialWeights: the weight vector (w_k) of size (D + 1) x 1 
        train_data: the data matrix of size N x D
        labeli: the label vector (y_k) of size N x 1 where each entry can be either 0 or 1 representing the label of corresponding feature vector

    Output: 
        error: the scalar value of error function of 2-class logistic regression
        error_grad: the vector of size (D+1) x 1 representing the gradient of
                    error function
    """
    train_data, labeli = args

    n_data = train_data.shape[0]
    n_features = train_data.shape[1]
    error = 0
    error_grad = np.zeros((n_features + 1, 1))

    ##################
    # YOUR CODE HERE #
    ##################
    # HINT: Do not forget to add the bias term to your input data

    
    initialWeights = initialWeights.reshape((n_feature+1,1))     # add new column for Bias
    initWeight = np.ones((n_data,1))    # add 1s to signify bias
    dataWithBias = np.hstack((initWeight,train_data))    # Add Bias to dataset
    op = sigmoid(np.dot(dataWithBias,initialWeights))    # Taking the sigmoid 
    error = np.sum((labeli * np.log(op)) + (1.0 - labeli) * np.log(1.0 - op))    
    error = (-1/n_data)*error     #Calculating error normalized
    error_grad = (op - labeli) * dataWithBias
    error_grad = np.sum(error_grad, axis=0)
    error_grad = error_grad/n_data    #Calculating error gradient normalized
    
    return error, error_grad


def blrPredict(W, data):
    """
     blrObjFunction predicts the label of data given the data and parameter W 
     of Logistic Regression
     
     Input:
         W: the matrix of weight of size (D + 1) x 10. Each column is the weight 
         vector of a Logistic Regression classifier.
         X: the data matrix of size N x D
         
     Output: 
         label: vector of size N x 1 representing the predicted label of 
         corresponding feature vector given in data matrix

    """
    label = np.zeros((data.shape[0], 1))

    ##################
    # YOUR CODE HERE #
    ##################
    # HINT: Do not forget to add the bias term to your input data
    
    label = np.zeros((data.shape[0], 1))
    dataWithBias = np.hstack((np.ones((data.shape[0], 1)),data))    #adding bias
    output = sigmoid(np.dot(dataWithBias,W))
    label = np.argmax(output,axis=1)
    label = label.reshape((data.shape[0],1))

    return label





"""
Script for Logistic Regression
"""
train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()

# number of classes
n_class = 10

# number of training samples
n_train = train_data.shape[0]

# number of features
n_feature = train_data.shape[1]

Y = np.zeros((n_train, n_class))
for i in range(n_class):
    Y[:, i] = (train_label == i).astype(int).ravel()

# Logistic Regression with Gradient Descent
W = np.zeros((n_feature + 1, n_class))
initialWeights = np.zeros((n_feature + 1, 1))
opts = {'maxiter': 100}
for i in range(n_class):
    labeli = Y[:, i].reshape(n_train, 1)
    args = (train_data, labeli)
    nn_params = minimize(blrObjFunction, initialWeights, jac=True, args=args, method='CG', options=opts)
    W[:, i] = nn_params.x.reshape((n_feature + 1,))

# Find the accuracy on Training Dataset
predicted_label = blrPredict(W, train_data)
print('\n Training set Accuracy:' + str(100 * np.mean((predicted_label == train_label).astype(float))) + '%')
cm = metrics.confusion_matrix(train_label, predicted_label)
print(cm)

# Find the accuracy on Validation Dataset
predicted_label = blrPredict(W, validation_data)
print('\n Validation set Accuracy:' + str(100 * np.mean((predicted_label == validation_label).astype(float))) + '%')
cm = metrics.confusion_matrix(validation_label, predicted_label)
print(cm)


# Find the accuracy on Testing Dataset
predicted_label = blrPredict(W, test_data)
print('\n Testing set Accuracy:' + str(100 * np.mean((predicted_label == test_label).astype(float))) + '%')
cm = metrics.confusion_matrix(test_label, predicted_label)
print(cm)



 Training set Accuracy:84.908%
[[4832    1   15    8   10   18   25    7    2    5]
 [   2 5649   33   12    3   21    4   11    0    7]
 [  36   42 4590   70   53   27   55   68    1   16]
 [  21   26  134 4658    9  149   21   45    1   67]
 [   9   20   24    5 4569   13   27   12    0  163]
 [  47   18   34  131   47 3966   89   19    8   62]
 [  26   12   28    2   19   75 4746    4    3    3]
 [  12   22   50   11   44   11    3 4972    0  140]
 [ 136  293  840 1004  193 1317  133   56   39  840]
 [  26   22   14   88  164   45    1  156    0 4433]]

 Validation set Accuracy:83.74%
[[979   0   1   3   1   8   6   1   0   1]
 [  0 979   4   4   2   8   0   1   0   2]
 [ 11  18 894  22  13   5  14  15   1   7]
 [  4   9  31 892   4  26   4  13   0  17]
 [  1   5   7   2 942   3   7   0   0  33]
 [  9   9   8  40  19 886  17   2   0  10]
 [  7   3   7   0   6  15 959   2   0   1]
 [  3   4   9   1  15   0   0 926   0  42]
 [ 32  76 194 224  33 250  42   6  11 132]
 [ 10   3   5  20

In [None]:
"""
Script for Support Vector Machine
"""
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn import metrics

print('\n\n--------------SVM-------------------\n\n')
##################
# YOUR CODE HERE #
##################

SVM_linear = SVC(kernel = 'linear')
SVM_linear.fit(train_data,train_label.flatten())
print('training  ' + str(100 * SVM_linear.score(train_data,train_label)))
print('validation ' + str(100 * SVM_linear.score(validation_data,validation_label)))
print('testing   ' + str(100 * SVM_linear.score(test_data,test_label)))

SVM_RBF_1 = SVC(kernel = 'rbf', gamma = 1.0)
SVM_RBF_1.fit(train_data,train_label.flatten())
print('training  ' + str(100 * SVM_RBF_1.score(train_data,train_label)))
print('validation  ' + str(100 * SVM_RBF_1.score(validation_data,validation_label)))
print('testing  ' + str(100 * SVM_RBF_1.score(test_data,test_label)))

SVM_RBF_AUTO = SVC(kernel = 'rbf', gamma = 'auto')
SVM_RBF_AUTO.fit(train_data,train_label.flatten())
print('training  ' + str(100 * SVM_RBF_AUTO.score(train_data,train_label)))
print('validation  ' + str(100 * SVM_RBF_AUTO.score(validation_data,validation_label)))
print('testing  ' + str(100 * SVM_RBF_AUTO.score(test_data,test_label)))

c_vals = np.array([1,10,20,30,40,50,60,70,80,90,100])
train_accuracy = np.zeros(11)
valid_accuracy = np.zeros(11)
test_accuracy = np.zeros(11)


for i in range(len(c_vals)):   
    x = SVC(c_vals[i],kernel='rbf')
    x.fit(train_data, train_label.flatten())
    train_accuracy[i] = 100*x.score(train_data, train_label)
    valid_accuracy[i] = 100*x.score(validation_data, validation_label)
    test_accuracy[i] = 100*x.score(test_data, test_label)
    print('Train Accuracy for: ',c_vals[i],' -> ',train_accuracy[i])
    print('Validation Accuracy for: ',c_vals[i],' -> ',valid_accuracy[i])
    print('Test Accuracy for: ',c_vals[i],' -> ',test_accuracy[i])



plt.plot(c_vals,train_accuracy,'b',label='Training Accuracy',linewidth=3)
plt.plot(c_vals,valid_accuracy,'g',label='Validation Accuracy',linewidth=3)
plt.plot(c_vals,test_accuracy,'y',label='Test Accuracy',linewidth=3)

plt.title('Variation of Accuracies with C Values')
plt.ylabel('Accuracies')
plt.xlabel('C values')
plt.legend(loc='lower right')
plt.grid(True,color='k')

plt.show()
plt.savefig('myfig')
plt.close




--------------SVM-------------------




In [2]:
import numpy as np
from scipy.io import loadmat
from scipy.optimize import minimize
from sklearn import metrics



def preprocess():
    """ 
     Input:
     Although this function doesn't have any input, you are required to load
     the MNIST data set from file 'mnist_all.mat'.

     Output:
     train_data: matrix of training set. Each row of train_data contains 
       feature vector of a image
     train_label: vector of label corresponding to each image in the training
       set
     validation_data: matrix of training set. Each row of validation_data 
       contains feature vector of a image
     validation_label: vector of label corresponding to each image in the 
       training set
     test_data: matrix of training set. Each row of test_data contains 
       feature vector of a image
     test_label: vector of label corresponding to each image in the testing
       set
    """

    mat = loadmat('mnist_all.mat')  # loads the MAT object as a Dictionary

    n_feature = mat.get("train1").shape[1]
    n_sample = 0
    for i in range(10):
        n_sample = n_sample + mat.get("train" + str(i)).shape[0]
    n_validation = 1000
    n_train = n_sample - 10 * n_validation

    # Construct validation data
    validation_data = np.zeros((10 * n_validation, n_feature))
    for i in range(10):
        validation_data[i * n_validation:(i + 1) * n_validation, :] = mat.get("train" + str(i))[0:n_validation, :]

    # Construct validation label
    validation_label = np.ones((10 * n_validation, 1))
    for i in range(10):
        validation_label[i * n_validation:(i + 1) * n_validation, :] = i * np.ones((n_validation, 1))

    # Construct training data and label
    train_data = np.zeros((n_train, n_feature))
    train_label = np.zeros((n_train, 1))
    temp = 0
    for i in range(10):
        size_i = mat.get("train" + str(i)).shape[0]
        train_data[temp:temp + size_i - n_validation, :] = mat.get("train" + str(i))[n_validation:size_i, :]
        train_label[temp:temp + size_i - n_validation, :] = i * np.ones((size_i - n_validation, 1))
        temp = temp + size_i - n_validation

    # Construct test data and label
    n_test = 0
    for i in range(10):
        n_test = n_test + mat.get("test" + str(i)).shape[0]
    test_data = np.zeros((n_test, n_feature))
    test_label = np.zeros((n_test, 1))
    temp = 0
    for i in range(10):
        size_i = mat.get("test" + str(i)).shape[0]
        test_data[temp:temp + size_i, :] = mat.get("test" + str(i))
        test_label[temp:temp + size_i, :] = i * np.ones((size_i, 1))
        temp = temp + size_i

    # Delete features which don't provide any useful information for classifiers
    sigma = np.std(train_data, axis=0)
    index = np.array([])
    for i in range(n_feature):
        if (sigma[i] > 0.001):
            index = np.append(index, [i])
    train_data = train_data[:, index.astype(int)]
    validation_data = validation_data[:, index.astype(int)]
    test_data = test_data[:, index.astype(int)]

    # Scale data to 0 and 1
    train_data /= 255.0
    validation_data /= 255.0
    test_data /= 255.0

    return train_data, train_label, validation_data, validation_label, test_data, test_label



def mlrObjFunction(params, *args):
    """
    mlrObjFunction computes multi-class Logistic Regression error function and
    its gradient.

    Input:
        initialWeights: the weight vector of size (D + 1) x 1
        train_data: the data matrix of size N x D
        labeli: the label vector of size N x 1 where each entry can be either 0 or 1
                representing the label of corresponding feature vector

    Output:
        error: the scalar value of error function of multi-class logistic regression
        error_grad: the vector of size (D+1) x 10 representing the gradient of
                    error function
    """
    train_data, labeli = args
    n_data = train_data.shape[0]
    n_feature = train_data.shape[1]
    error = 0
    error_grad = np.zeros((n_feature + 1, n_class))
    
    

    ##################
    # YOUR CODE HERE #
    ##################
    # HINT: Do not forget to add the bias term to your input data
    
        
    
    w = params.reshape(n_feature+1,n_class)   # adding the bias column
    wVals = np.ones((n_data,1))     #adding 1s for bias value
    dataWithBias = np.hstack((wVals,train_data))     # adding the bias to data
    

    theta = np.dot(dataWithBias,w)
    theta = np.exp(theta)
    den = np.sum(theta,1)

    den = np.reshape(den,(n_data,1))
    theta = theta/den
       
    error = -1 * np.sum(np.sum((labeli * np.log(theta))))
    error = error/n_data
    
    
    error_grad_matrix = np.dot(dataWithBias.T, (theta - labeli))
    error_grad = error_grad_matrix.flatten()
    error_grad = error_grad/n_data
    
    #print(error)
    #print(np.linalg.norm(error_grad))

    
    return error, error_grad


def mlrPredict(W, data):
    """
     mlrObjFunction predicts the label of data given the data and parameter W
     of Logistic Regression

     Input:
         W: the matrix of weight of size (D + 1) x 10. Each column is the weight
         vector of a Logistic Regression classifier.
         X: the data matrix of size N x D

     Output:
         label: vector of size N x 1 representing the predicted label of
         corresponding feature vector given in data matrix

    """
    label = np.zeros((data.shape[0], 1))
    n_data = data.shape[0]

    ##################
    # YOUR CODE HERE #
    ##################
    # HINT: Do not forget to add the bias term to your input data
    
    dataWithBias = np.hstack((np.ones((n_data,1)),data))    #adding the bias column to data
    
    theta = np.dot(dataWithBias,W)
    theta = np.exp(theta)
    den = np.sum(theta,1)
    den = np.reshape(den,(n_data,1))
    
    theta = theta/den
    
    for i in range(theta.shape[0]):
        label[i] = np.argmax(theta[i])
    label = label.reshape(label.shape[0], 1)
    
    return label


"""
Script for Extra Credit Part
"""

train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()

n_feature = train_data.shape[1]

# number of classes
n_class = 10

# number of training samples
n_train = train_data.shape[0]

Y = np.zeros((n_train, n_class))
for i in range(n_class):
    Y[:, i] = (train_label == i).astype(int).ravel()


# FOR EXTRA CREDIT ONLY
W_b = np.zeros((n_feature + 1, n_class))
initialWeights_b = np.zeros((n_feature + 1, n_class))
opts_b = {'maxiter': 100}

args_b = (train_data, Y)
nn_params = minimize(mlrObjFunction, initialWeights_b, jac=True, args=args_b, method='CG', options=opts_b)
W_b = nn_params.x.reshape((n_feature + 1, n_class))

# Find the accuracy on Training Dataset
predicted_label_b = mlrPredict(W_b, train_data)
print('\n Training set Accuracy:' + str(100 * np.mean((predicted_label_b == train_label).astype(float))) + '%')
cm = metrics.confusion_matrix(train_label, predicted_label_b)
print(cm)

# Find the accuracy on Validation Dataset
predicted_label_b = mlrPredict(W_b, validation_data)
print('\n Validation set Accuracy:' + str(100 * np.mean((predicted_label_b == validation_label).astype(float))) + '%')
cm = metrics.confusion_matrix(validation_label, predicted_label_b)
print(cm)

# Find the accuracy on Testing Dataset
predicted_label_b = mlrPredict(W_b, test_data)
print('\n Testing set Accuracy:' + str(100 * np.mean((predicted_label_b == test_label).astype(float))) + '%')
cm = metrics.confusion_matrix(test_label, predicted_label_b)
print(cm)


 Training set Accuracy:93.448%
[[4786    1   12    7   11   33   30    7   32    4]
 [   1 5592   26   17    6   19    2   13   58    8]
 [  23   45 4503   72   58   24   59   53  108   13]
 [  14   18   95 4654    4  148   15   39  105   39]
 [   8   20   21    7 4576    6   42   13   24  125]
 [  39   13   36  117   34 3963   68   18  102   31]
 [  23   11   29    1   24   52 4758    2   16    2]
 [   8   16   49   18   34    9    4 4989   14  124]
 [  22   75   51  103   16  113   23   16 4387   45]
 [  17   18    9   55  126   30    2  134   42 4516]]

 Validation set Accuracy:92.48%
[[975   0   1   3   2   7   3   2   6   1]
 [  0 972   3   2   1   5   0   2  13   2]
 [ 10  13 896  22  13   4  11   9  18   4]
 [  1   7  23 902   3  28   2  12  13   9]
 [  1   4   8   3 941   1  10   2   7  23]
 [  9   4   6  37  17 884  14   2  22   5]
 [  9   2   4   1   7  12 957   1   6   1]
 [  2   3   9   0   9   1   0 931   3  42]
 [ 13  17  19  27   9  20  19   2 868   6]
 [  4   3   5  14