# Logistic Regression

In [1]:
import pickle

import idx2numpy
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle


## Import Data

In [2]:
train_images = idx2numpy.convert_from_file('./data/train-images-idx3-ubyte')
train_labels = idx2numpy.convert_from_file('./data/train-labels-idx1-ubyte')
test_images = idx2numpy.convert_from_file('./data/t10k-images-idx3-ubyte')
test_labels = idx2numpy.convert_from_file('./data/t10k-labels-idx1-ubyte')


### reshape images 3D vector to 2D vector 


In [3]:
X_train = np.vstack([img.reshape(-1, ) for img in train_images])
y_train = train_labels
X_test = np.vstack([img.reshape(-1, ) for img in test_images])
y_test = test_labels
X_train, y_train = shuffle(X_train, y_train)
train_cap = 5000
X_train = X_train[:train_cap, :]
y_train = y_train[:train_cap]
dataset_size = len(y_train)
print(dataset_size)


5000


In [4]:
logit_l2_model_file = './saved_model/q2_12.sav'
logit_l1_model_file = './saved_model/q2_l1.sav'

In [5]:
from sklearn.linear_model import LogisticRegression

### L2 Norm


In [6]:
def logistic_regression(penalty='l2'):
    try:
        loaded_model = pickle.load(open(logit_l2_model_file if penalty == 'l2' else logit_l1_model_file, 'rb'))
        print('local model returned')
        return loaded_model
    except FileNotFoundError:
        regs = []
    for i in range(10):
        print('training digit ..', i)
        y_train_ = y_train.copy()
        y_train_ = (y_train_ == i).astype(int)
        reg = LogisticRegression(penalty=penalty)
        reg.fit(X_train, y_train_)
        regs.append(reg)
    pickle.dump(regs, open(logit_l2_model_file if penalty == 'l2' else logit_l1_model_file, 'wb'))
    print('Fresh model returned')
    return regs


In [7]:
reg_l2 = logistic_regression()

local model returned


In [8]:
for i in range(10):
    y_train_ = y_train.copy()
    y_train_ = (y_train_ == i).astype(int)

    train_accuracy = reg_l2[i].score(X_train, y_train_)
    print('Train Accuracy for %sth digit: %s' % (i, train_accuracy))

    y_test_ = y_test.copy()
    y_test_ = (y_test_ == i).astype(int)
    test_accuracy = reg_l2[i].score(X_test, y_test_)
    print('Test Accuracy for %sth digit: %s\n' % (i, test_accuracy))



Train Accuracy for 0th digit: 0.9846
Test Accuracy for 0th digit: 0.9834

Train Accuracy for 1th digit: 0.9838
Test Accuracy for 1th digit: 0.9854

Train Accuracy for 2th digit: 0.9488
Test Accuracy for 2th digit: 0.9487

Train Accuracy for 3th digit: 0.9452
Test Accuracy for 3th digit: 0.9467

Train Accuracy for 4th digit: 0.9762
Test Accuracy for 4th digit: 0.971

Train Accuracy for 5th digit: 0.953
Test Accuracy for 5th digit: 0.9457

Train Accuracy for 6th digit: 0.9832
Test Accuracy for 6th digit: 0.9791

Train Accuracy for 7th digit: 0.9714
Test Accuracy for 7th digit: 0.9666

Train Accuracy for 8th digit: 0.9146
Test Accuracy for 8th digit: 0.917

Train Accuracy for 9th digit: 0.9278
Test Accuracy for 9th digit: 0.9207



### L1 Norm

In [9]:
reg_l1 = logistic_regression(penalty='l1')

local model returned


In [10]:
for i in range(10):
    y_train_ = y_train.copy()
    y_train_ = (y_train_ == i).astype(int)

    train_accuracy = reg_l1[i].score(X_train, y_train_)
    print('Train Accuracy for %sth digit: %s' % (i, train_accuracy))

    y_test_ = y_test.copy()
    y_test_ = (y_test_ == i).astype(int)
    test_accuracy = reg_l1[i].score(X_test, y_test_)
    print('Test Accuracy for %sth digit: %s\n' % (i, test_accuracy))


Train Accuracy for 0th digit: 0.985
Test Accuracy for 0th digit: 0.9866

Train Accuracy for 1th digit: 0.9892
Test Accuracy for 1th digit: 0.9896

Train Accuracy for 2th digit: 0.9652
Test Accuracy for 2th digit: 0.9647

Train Accuracy for 3th digit: 0.9464
Test Accuracy for 3th digit: 0.9457

Train Accuracy for 4th digit: 0.9756
Test Accuracy for 4th digit: 0.9718

Train Accuracy for 5th digit: 0.9562
Test Accuracy for 5th digit: 0.9539

Train Accuracy for 6th digit: 0.9796
Test Accuracy for 6th digit: 0.9733

Train Accuracy for 7th digit: 0.973
Test Accuracy for 7th digit: 0.9706

Train Accuracy for 8th digit: 0.9122
Test Accuracy for 8th digit: 0.9172

Train Accuracy for 9th digit: 0.9346
Test Accuracy for 9th digit: 0.9302

