In [1]:
import struct
import numpy as np
import array
import time
import scipy.sparse
import scipy.optimize
import pandas as pd

In [8]:
""" The Softmax Regression class """

class SoftmaxRegression(object):
    
    """ Initialize parameters of the Regressor object """
    
    def __init__(self, input_size, num_classes, lam):
        
        """ Initialize parameters of the Regressor object """

        self.input_size = input_size   # input vector size
        self.num_classes = num_classes  # number of classes
        self.lam = lam                  # weight decay parameter
        
        """ Randomly initialize the class weights """
        
        rand = np.random.RandomState(int(time.time()))

        rand = np.random.RandomState(10)
        self.theta = 0.0005 * rand.rand(num_classes, input_size)
        
#         self.theta = 0.005 * np.asarray(rand.normal(size = (num_classes * input_size, 1)))
        
    def get_ground_truth(self, labels):
        """ Returns the groundtruth matrix for a set of labels"""

        """ Prepare data needed to construct ground truth matrix """
        labels = np.array(labels).flatten()
        data = np.ones(len(labels))
        indptr = np.arange(len(labels)+1)
        
        """ Compute the groundtruth matrix and return """
        
        ground_truth = scipy.sparse.csr_matrix((data, labels, indptr))
        ground_truth = np.transpose(ground_truth.todense())
        
        return ground_truth
    
    def softmax_cost(self, theta, input, labels):
        """ Returns the cost and gradient of 'theta' at a particular 'theta' """
        
        """ Compute the groundtruth matrix """
        ground_truth = self.get_ground_truth(labels)
        
        """ Reshape 'theta' for ease of computation """
        
        theta = theta.reshape(self.num_classes, self.input_size)
        
        """ Compute the class probabilities for each example """
        
        print 'X', input
        print 'X.T[0]', input.T[0]
        print 'X.shape', input.shape
        print 'X.sum()', input.sum()
        print 'theta', theta
        print 'theta.shape', theta.shape
        print 'theta.sum()', theta.sum()
        
        theta_x = np.dot(theta, input)
        print 'theta_x', theta_x
        print 'theta_x.sum()', theta_x.sum()
        print 'theta_x.shape', theta_x.shape
        hypothesis = np.exp(theta_x)
        print 'h', hypothesis
        print 'summation', np.sum(hypothesis, axis = 0)
        probabilities = hypothesis / np.sum(hypothesis, axis = 0)
        print 'probabilities', probabilities
        """ Compute the traditional cost term """
        
        cost_examples = np.multiply(ground_truth, np.log(probabilities))
        traditional_cost = -(np.sum(cost_examples) / input.shape[1])
        
        """ Compute the weight decay term """
        
        theta_squared = np.multiply(theta, theta)
        weight_decay = 0.5 * self.lam * np.sum(theta_squared)
        
        """ Add both terms to get the cost """
        cost = traditional_cost + weight_decay
        
        """ Compute the unroll 'theta' gradient """
        
        theta_grad = -np.dot(ground_truth - probabilities, np.transpose(input))
        theta_grad = theta_grad / input.shape[1] + self.lam * theta
        theta_grad = np.array(theta_grad)
        theta_grad = theta_grad.flatten()
        
        return [cost, theta_grad]
    
    def softmax_predict(self, theta, input):
        
        """ Returns predicted classes for a set of inputs """
        
        """ Reshape 'theta' for ease of computation """
        
        theta = theta.reshape(self.num_classes, self.input_size)
        
        """ Compute the class probabilities for each example """
        
        theta_x = np.dot(theta, input)
        hypothesis = np.exp(theta_x)
        probabilities = hypothesis / np.sum(hypothesis, axis = 0)
        
        """ Give the predictions based on probability values """
        
        predictions = np.zeros((input.shape[1], 1))
        predictions[:,0] = np.argmax(probabilities, axis=0)
        
        return predictions
    
def load_mnist_images(file_name):

    """ Open the file """

    image_file = open(file_name, 'rb')

    """ Read header information from the file """

    head1 = image_file.read(4)
    head2 = image_file.read(4)
    head3 = image_file.read(4)
    head4 = image_file.read(4)

    """ Format the header information for useful data """

    num_examples = struct.unpack('>I', head2)[0]
    num_rows     = struct.unpack('>I', head3)[0]
    num_cols     = struct.unpack('>I', head4)[0]
    

    """ Initialize dataset as array of zeros """

    dataset = np.zeros((num_rows * num_cols, num_examples))

    """ Read the actual image data """

    images_raw = array.array('B', image_file.read())
    image_file.close()

    """ Arrange the data in columns """

    for i in range(num_examples):

        limit1 = num_rows * num_cols * i
        limit2 = num_rows * num_cols * (i + 1)

        dataset[:, i] = images_raw[limit1 : limit2]

    """ Normalize and return the dataset """

    return dataset / 255

def load_mnist_labels(file_name):

    """ Open the file """

    label_file = open(file_name, 'rb')

    """ Read header information from the file """

    head1 = label_file.read(4)
    head2 = label_file.read(4)

    """ Format the header information for useful data """
    
    num_examples = struct.unpack('>I', head2)[0]
    
    """ Initialize data labels as array of zeros """

    labels = np.zeros((num_examples, 1), dtype = np.int)

    """ Read the label data """

    labels_raw = array.array('b', label_file.read())
    label_file.close()

    """ Copy and return the label data """

    labels[:, 0] = labels_raw[:]

    return labels
        
def execute_softmax_regression():

    """ Initialize parameters of the Regressor """

    input_size = 784       # input vector size
    num_classes = 10       # number of classes
    lam = 0.0001           # weight decay parameter
    max_iterations = 100   # number of optimization iterations

    """ Load MNIST training images and labels """

    training_data = load_mnist_images('./train-images-idx3-ubyte')
    training_labels = load_mnist_labels('./train-labels-idx1-ubyte')

    """ Initialize Softmax Regressor with the above parameters """

    regressor = SoftmaxRegression(input_size, num_classes, lam)
    
    """ Run the L-BFGS algorithm to get the optimal parameter values """
    
    opt_solution = scipy.optimize.minimize(
        regressor.softmax_cost,
        regressor.theta,
        args = (
            training_data,
            training_labels,
        ),
        method = 'L-BFGS-B',
        jac = True,
        options = {
            'maxiter': max_iterations
        }
    )
    opt_theta = opt_solution.x

    """ Load MNIST test images and labels """

    test_data = load_mnist_images('./t10k-images-idx3-ubyte')
    test_labels = load_mnist_labels('./t10k-labels-idx1-ubyte')

    """ Obtain predictions from the trained model """

    predictions = regressor.softmax_predict(opt_theta, test_data)

    """ Print accuracy of the trained model """

    correct = test_labels[:, 0] == predictions[:, 0]
    print """Accuray :""", np.mean(correct)


In [9]:

""" Initialize parameters of the Regressor """

input_size = 784       # input vector size
num_classes = 10       # number of classes
lam = 0.0001           # weight decay parameter
max_iterations = 1   # number of optimization iterations

""" Load MNIST training images and labels """

training_data = load_mnist_images('./train-images-idx3-ubyte')[:,:1000]
training_labels = load_mnist_labels('./train-labels-idx1-ubyte')[:,:1000]

""" Initialize Softmax Regressor with the above parameters """

regressor = SoftmaxRegression(input_size, num_classes, lam)

""" Run the L-BFGS algorithm to get the optimal parameter values """

opt_solution = scipy.optimize.minimize(
    regressor.softmax_cost,
    regressor.theta,
    args = (
        training_data,
        training_labels,
    ),
    method = 'L-BFGS-B',
    jac = True,
    options = {
        'maxiter': max_iterations
    }
)
# opt_theta = opt_solution.x

# """ Load MNIST test images and labels """

# test_data = load_mnist_images('./t10k-images-idx3-ubyte')
# test_labels = load_mnist_labels('./t10k-labels-idx1-ubyte')

# """ Obtain predictions from the trained model """

# predictions = regressor.softmax_predict(opt_theta, test_data)

# """ Print accuracy of the trained model """

# correct = test_labels[:, 0] == predictions[:, 0]
# print """Accuray :""", np.mean(correct)



X [[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
X.T[0] [ 0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.     

ValueError: operands could not be broadcast together with shapes (10,60000) (10,1000) 

In [60]:
# def get_ground_truth(self, labels):
#     """ Returns the groundtruth matrix for a set of labels"""

#     """ Prepare data needed to construct ground truth matrix """
#     labels = np.array(labels).flatten()
#     data = np.ones(len(labels))
#     indptr = np.arange(len(labels)+1)

#     """ Compute the groundtruth matrix and return """

#     ground_truth = scipy.sparse.csr_matrix((data, labels, indptr))
#     ground_truth = np.transpose(ground_truth.todense())

#     return ground_truth

# training_labels

print labels
labels = np.array(training_labels).flatten()
data = np.ones(len(labels))
indptr = np.arange(len(labels)+1)
# print indptr

ground_truth = scipy.sparse.csr_matrix((data, labels, indptr))
# print ground_truth.todense()
ground_truth = np.transpose(ground_truth.todense()).T
# print ground_truth[0]
print ground_truth.shape

[5 0 4 ..., 5 6 8]
(60000, 10)


In [59]:
def to_one_hot(y):
    m = y.shape[0]
    
    K = len(np.unique(y))
    one_hot = np.zeros(shape=(m, K))
    for i, row in enumerate(one_hot):
        if i < 10:
            idx = y[i]
            row[idx] = 1
    return one_hot
# print to_one_hot(labels)[0]
one_hot_labels = to_one_hot(labels)
print one_hot_labels.shape

(60000, 10)


In [4]:

input_size = 784       # input vector size
num_classes = 10       # number of classes
lam = 0.0001           # weight decay parameter
max_iterations = 100   # number of optimization iterations

""" Load MNIST training images and labels """

training_data = load_mnist_images('./train-images-idx3-ubyte')
training_labels = load_mnist_labels('./train-labels-idx1-ubyte')

""" Initialize Softmax Regressor with the above parameters """

# regressor = SoftmaxRegression(input_size, num_classes, lam)

' Initialize Softmax Regressor with the above parameters '

In [None]:
regressor.softmax_cost(self, theta, training_data, training_labels):

In [4]:
df = pd.read_csv('./mnist_train.csv')[:1000]
# test = pd.read_csv('./mnist_test.csv', header=None)

matrix = df.as_matrix()
print matrix.shape
m = matrix.shape[0]
y = matrix[:,0:1]
X = matrix[:,1:]

# matrix_test = test.as_matrix()
# X_test = matrix_test[:,1:]
# m_test = X_test.shape[0]
# y_test = matrix_test[:,0:1]


(1000, 785)


NameError: name 'test' is not defined

In [12]:
n_of_features = X.shape[1]
k = len(np.unique(y))
rand = np.random.RandomState(10)
theta = 0.0005 * rand.rand(n_of_features, k)

In [29]:
input_size = 784       # input vector size
num_classes = 10       # number of classes
lam = 0.0001           # weight decay parameter
regressor = SoftmaxRegression(input_size, num_classes, lam)
cost, gradient = regressor.softmax_cost(theta, X.T, y.T)

# print 'cost', cost
# print 'gradient', gradient

X [[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]]
X.T[0] [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0  51 159 253 159  50   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0  48 238 252 252 252 237   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0  54 227 253 252 239 233 252  57   6   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0  10  60 224 252 253 252

In [29]:
print training_data

NameError: name 'training_data' is not defined

In [7]:
print training_data.shape
print training_labels.shape

(784, 60000)
(60000, 1)


In [9]:
training_data = None
training_labels = None

In [13]:
print X[0]
print y.T
print theta

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0  51 159 253 159  50   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0  48 238 252 252 252 237   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0  54 227 253 252 239 233 252  57   6   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0  10  60 224 252 253 252 202  84 252
 253 122   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0 163 252 252 252 253 252 252  96 189 253 167   

In [32]:
print X
# print y
print theta

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]]
[[  3.85660322e-04   1.03759747e-05   3.16824117e-04 ...,   3.80265356e-04
    8.45554183e-05   4.41699071e-05]
 [  3.42679909e-04   4.76696673e-04   1.97413316e-06 ...,   1.45938034e-04
    4.58887061e-04   3.57287892e-04]
 [  2.71272184e-04   7.10850238e-05   1.86670380e-04 ...,   2.56569121e-04
    3.25198591e-04   3.00519477e-04]
 ..., 
 [  3.48713905e-04   1.56844888e-04   1.02725541e-04 ...,   9.49769320e-05
    3.15085083e-04   4.70179917e-04]
 [  4.94031613e-05   2.77312323e-04   1.47121887e-04 ...,   1.56144800e-04
    4.21690755e-04   4.74061754e-04]
 [  4.46830018e-04   1.25052283e-04   4.76457935e-05 ...,   4.98374494e-04
    1.15055453e-04   4.47175715e-04]]


In [31]:
print np.dot(X, theta)

[[ 7.02750991  7.65829303  8.22492154 ...,  7.57622251  7.71207028
   7.86756045]
 [ 4.59951021  5.17384386  4.93871541 ...,  4.73542002  4.88209247
   5.10557152]
 [ 3.966941    4.10703155  4.34924595 ...,  3.72894257  4.59960936
   4.79950616]
 ..., 
 [ 4.94369233  5.86620593  5.6433769  ...,  4.93204326  5.88177398
   5.57196998]
 [ 6.09087626  6.60208114  7.0593024  ...,  5.8579878   6.36091286
   6.73509632]
 [ 5.75268281  6.29468164  6.62473448 ...,  5.72080664  5.6044771
   6.28099406]]


In [41]:
print theta.shape
theta_reshaped = theta.reshape(num_classes, input_size)
# print theta_reshaped.shape
# np.dot(theta_reshaped, X.T)
print theta_reshaped
print theta.T

(784, 10)
[[  3.85660322e-04   1.03759747e-05   3.16824117e-04 ...,   4.66530193e-04
    2.79538175e-04   3.08552539e-04]
 [  2.34494602e-04   4.97414883e-04   4.66736855e-04 ...,   3.08559443e-04
    2.61600180e-04   1.31760154e-04]
 [  2.66439515e-04   1.67756526e-04   1.53673573e-04 ...,   5.97942741e-05
    1.59763830e-04   2.88624047e-04]
 ..., 
 [  4.30791245e-04   3.23250704e-04   1.18635170e-04 ...,   4.50958641e-04
    2.83464425e-04   1.39533453e-04]
 [  2.12040461e-04   6.69079080e-05   3.39008463e-04 ...,   4.56860688e-04
    3.74694615e-04   2.45380228e-04]
 [  3.84250382e-04   1.95418337e-05   1.75991613e-04 ...,   4.98374494e-04
    1.15055453e-04   4.47175715e-04]]
[[  3.85660322e-04   3.42679909e-04   2.71272184e-04 ...,   3.48713905e-04
    4.94031613e-05   4.46830018e-04]
 [  1.03759747e-05   4.76696673e-04   7.10850238e-05 ...,   1.56844888e-04
    2.77312323e-04   1.25052283e-04]
 [  3.16824117e-04   1.97413316e-06   1.86670380e-04 ...,   1.02725541e-04
    1.47121

In [None]:
import tensorflow