## Logistic regression for fashion-MNIST dataset

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Data Preparation

In [2]:
data_train = pd.read_csv('fashion-mnist_train.csv')
data_test = pd.read_csv('fashion-mnist_test.csv')

In [3]:
X_train = data_train.values[:, 1:]
Y_train = data_train.values[:, 0]
X_test = data_test.values[:, 1:]
Y_test = data_test.values[:, 0]

In [4]:
# Normalization of data
X_train, X_test = X_train.astype('float32'), X_test.astype('float32'), 
X_train /= 255
X_test /= 255

## Defining functions for logistic regression

In [5]:
# Logistic regression function
# The function inputs the dataset and number of iterations with a defined learning rate =0.01
# The function returns weights for the input data after minimizing error by stochastic gradient

def logistic(X, Y, T, learning_rate=0.01):
    N = X.shape[0]
    W = np.zeros((X.shape[1],)) 
    for i in range(T):
        rand_index = np.random.choice(N, size=1)
        x_n = X[rand_index][0]
        E_dev = -1 * (Y[rand_index] * x_n)/(1 + np.exp((Y[rand_index][0] * np.dot(x_n, W))))
        W = W - learning_rate * E_dev
    return W

In [6]:
# The function uses multiclass approach to generate new labels for each class

def labels_new(Y, k):
    Y_copy = np.copy(Y)
    for i, y in enumerate(Y_copy):
        Y_copy[i] = 2 * (int(y) == k) - 1
    return Y_copy

In [7]:
# sigmoid function 

def sigmoid(x):
    return 1/(1+np.exp(-x))

#prob function uses the sigmoid and calculates the probability for an input x to each label

def prob(W, X):
    probs = []
    for i in range(10):
        probs.append(sigmoid(np.dot(X, W[i])))
    return probs

# The following function calculates the predicted class from the maximum probability
def predicted_class(p):
    Y_pred = []
    for i in range(len(p[0])):
        Y_pred.append(np.argmax(p[:,i]))
    return Y_pred

# Accuracy of the prediction is calculated by comparing the predicted class and actual class
def accuracy(Y_pred, Y):
    correct = 0
    for i in range(len(Y)):
        if Y_pred[i] == Y[i]:
            correct += 1
    return correct/len(Y)

## Performing logistic Regression

In [8]:
class regression:
    def fit(X, Y, k): # fits the functions
        W = []
        for i in range(k):
            W.append(logistic(X, labels_new(Y, i),T))
        return W

    def score(X, Y, W): # calculates score of regression
        probs = prob(W, X)
        Y_pred = predicted_class(np.array(probs))
        return accuracy(Y_pred, Y)

In [None]:
score=[]
for T in [50,100,500,1000,5000,10000,50000,100000,500000]:
    classifier = regression
    W_log = classifier.fit(X_train, Y_train, 10)
    a=classifier.score(X_test, Y_test, W_log)
    score.append(a)

In [None]:
# Training the algorithm

classifier = regression
W_log = classifier.fit(X_train, Y_train, 10)

## Testing the Implemented Logisitic Regression

In [None]:
s=score
plt.plot(s)
plt.title("Regression analyses", fontsize=15)
plt.xlabel('No. of iterations',fontsize=15)
plt.ylabel('Accuracy',fontsize=15)
positions = (0,1,2, 3,4,5,6,7,8)
labels = ('50','100','500','1000','5000','10000','50000','100000','500000')
plt.xticks(positions, labels)
plt.grid(b=None, which='major', axis='both')