## Logistic Regression with Gradient Descent

In [220]:
from sklearn.datasets import fetch_mldata
from sklearn import preprocessing
import matplotlib.pyplot as plt
import numpy as np

In [259]:
mnist = fetch_mldata('MNIST original', data_home='./data')

In [352]:
print(mnist['data'].shape)
print(mnist['target'].shape)

(70000, 784)
(70000,)


In [353]:
X = np.array(mnist.data)
Y = np.array(mnist.target)[:,np.newaxis]

In [354]:
# Select examples with label "0" or "1" and then shuffle the dataset
All = np.hstack((X,Y))
selected = (All[:, -1] == 0) | (All[:, -1] == 1)
All = All[selected]

np.random.shuffle(All)

In [355]:
# Train/test split
train_X = All[:12000, :-1]
train_Y = All[:12000, -1][:,np.newaxis]  # Add an extra axis

test_X = All[12000:, :-1]
test_Y = All[12000:, -1][:,np.newaxis]

# Normalize the data
train_X = preprocessing.scale(train_X) 
test_X = preprocessing.scale(test_X)

# Add Intercept
ones_train = np.ones(len(train_X))[:,np.newaxis]
ones_test = np.ones(len(test_X))[:,np.newaxis]

train_X = np.hstack((train_X, ones_train))
test_X = np.hstack((test_X, ones_test))

print("Training Dimension ",train_X.shape)
print("Testing Dimension ", test_X.shape)

print('Training label: ', train_Y.shape)
print('Testing label: ', test_Y.shape)

Training Dimension  (12000, 785)
Testing Dimension  (2780, 785)
Training label:  (12000, 1)
Testing label:  (2780, 1)


In [365]:
theta = np.random.rand(train_X.shape[1]) * 0.001
theta = theta[:, np.newaxis]
theta.shape

(785, 1)

In [357]:
# Just testings
gr = (sig_vec(train_X @ theta) - train_Y).T @ train_X
gr = gr.T
gr.shape

(785, 1)

In [366]:
error = 0.01
ita = 0.001

In [359]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

sig_vec = np.vectorize(sigmoid) # Vectorized version of sigmoid

In [364]:
def cost_function(theta, train_X, train_Y):
    power = -train_X @ theta * train_Y
    print('lol') # debug
    return -np.sum(np.log(np.exp(power) + 1))

In [361]:
def get_gradient(train_X, train_Y, theta):
    gr = (sig_vec(train_X @ theta) - train_Y).T @ train_X
    gr = gr.T
    return gr

In [362]:
def train(train_X, train_Y, theta, error, ita):
    while True:
        gradient = get_gradient(train_X, train_Y, theta)
        new_theta = theta - ita * gradient
        if abs(cost_function(theta,train_X, train_Y) - cost_function(new_theta, train_X, train_Y)) < error:
            break
        theta = new_theta
    return theta

In [None]:
# TODO: need to fix exp overflow
theta_ret = train(train_X, train_Y, theta, error, ita)

In [143]:
def test(data_test_pre, theta):
    result = sig_vec(data_test_pre[:,:-1] @ theta)
    return len(np.where(result > 0.5)) / len(data_test_pre)