In [1]:
import numpy as np
import glob
import imageio as magic
import pandas as pd
import collections
from sklearn.model_selection import train_test_split

In [14]:

class LogisticRegression:

    def __init__(self):
        print("Logistic Regression")

    def get_sigmoid(self, X, W):
        return 1 / (1 + np.exp(- self.get_hypothesis(X, W)))

    def get_hypothesis(self, X, W):
        return X @ W.T

    def __get_cost(self, X, Y, W, lamda):
        return -(1.0 / len(X)) * (np.sum((Y * np.log(self.get_sigmoid(X, W))) + ((1 - Y) * np.log(1 - self.get_sigmoid(X, W)))) - lamda)

    def __get_gradient(self, X, Y, W, lamda):
        return (1.0 / len(X)) * (np.sum(X * (self.get_hypothesis(X,W) - Y), axis=0) + (lamda * np.sum(W)))

    def __logistic_regression(self, X, Y, W, alpha, max_iterations, lamda):
        
        for i in range(max_iterations):
            
            W = W - alpha * self.__get_gradient(X, Y, W, lamda)
            cost = self.__get_cost(X, Y, W, lamda)
            
            if i % 100 == 0:
                print("Cost: ", cost)
            
        return W, cost

    def train(self, X, Y, W, alpha, max_iterations, lamda=0):
        return self.__logistic_regression(X, Y, W, alpha, max_iterations, lamda)

    def validate(self, X, Y, W):
        return self.__get_cost(X, Y, W, 0)

    def test(self, X, Y, W, lamda=0):
        return self.__get_cost(X, Y, W, 0)
    
    def predict(self,X,W):
        return self.get_sigmoid(X,W)

In [15]:

image_data = []
label = []
for file_name in glob.iglob('/home/lognod/Desktop/nhcd/numerals/**/*.jpg', recursive=True):
    image_array = magic.imread(file_name,as_gray=True)
    label=int(file_name[-12:-11])
    pixel_data = (255.0-image_array.flatten())/256.0
    pixel_data = np.append(label,pixel_data)
    image_data.append(pixel_data)


image_data = np.array(image_data)
np.random.shuffle(image_data)
image_data_pd = pd.DataFrame(image_data)
image_data_pd.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,6.0,0.0,0.046875,0.0,0.003906,0.007812,0.03125,0.039062,0.21875,0.664062,...,0.0,0.0,0.109375,0.277344,0.421875,0.539062,0.730469,0.582031,0.324219,0.117188
1,6.0,0.003906,0.023438,0.011719,0.007812,0.015625,0.0,0.0,0.035156,0.0,...,0.0625,0.023438,0.164062,0.40625,0.332031,0.105469,0.011719,0.007812,0.003906,0.0
2,0.0,0.011719,0.109375,0.0,0.046875,0.015625,0.0,0.15625,0.304688,0.515625,...,0.0,0.0,0.035156,0.0,0.0,0.035156,0.0,0.0,0.0,0.0
3,4.0,0.089844,0.5625,0.929688,0.988281,0.984375,0.804688,0.1875,0.039062,0.0,...,0.503906,0.480469,0.40625,0.417969,0.421875,0.246094,0.109375,0.113281,0.015625,0.003906
4,6.0,0.019531,0.027344,0.003906,0.0,0.019531,0.027344,0.015625,0.027344,0.015625,...,0.0,0.3125,0.339844,0.0,0.011719,0.027344,0.007812,0.0,0.0,0.0


In [16]:
X = image_data_pd.iloc[:,1:]
ones = np.ones([len(X),1])
X = np.concatenate((ones,X), axis = 1)
Y = image_data_pd.iloc[:,0:1].values
print(X.shape)
print(Y)
X_train,X_rest,Y_train,Y_rest =  train_test_split(X,Y,test_size=0.4)
X_validate,X_test,Y_validate,Y_test = train_test_split(X_rest,Y_rest,test_size=0.5)

(2880, 785)
[[6.]
 [6.]
 [0.]
 ...
 [3.]
 [0.]
 [5.]]


In [17]:
print(X_train.shape[0])
W = np.zeros((1,len(X_train[0,:])))
print(W.shape)

1728
(1, 785)


In [18]:
logistic_regression = LogisticRegression()
weight_list =[]
cost_list = []

for i in range(10):
    W = np.zeros((1,len(X_train[0,:])))
    print("Learning: ", float(i))
    Y_train_one = (Y_train == float(i)).astype(int)
    weight,cost = logistic_regression.train(X_train,Y_train_one,W,0.01,100,0)
    weight_list.append(weight.flatten())
    cost_list.append(cost)

Logistic Regression
Learning:  0.0
Cost:  0.7114695611221895
Learning:  1.0
Cost:  0.7092414544955561
Learning:  2.0
Cost:  0.7094634392518312
Learning:  3.0
Cost:  0.7095644544662125
Learning:  4.0
Cost:  0.7081073839956243
Learning:  5.0
Cost:  0.7075633866432153
Learning:  6.0
Cost:  0.7080642813426173
Learning:  7.0
Cost:  0.7086935081319748
Learning:  8.0
Cost:  0.7063661614146622
Learning:  9.0
Cost:  0.7089755260545648


In [19]:
weights = np.array(weight_list)
weights.shape

(10, 785)

In [20]:
weights_data = pd.DataFrame(weights)
weights_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,-0.005805,-0.001008,-0.002114,-0.001812,-0.000529,-0.000704,-6.2e-05,0.000745,0.002024,0.003493,...,-0.000544,-0.001261,-0.001627,-0.001583,-0.001902,-0.002299,-0.002387,-0.002268,-0.001885,-0.000423
1,0.005526,0.000132,-0.000585,-0.001375,-0.001439,-0.000886,-0.000413,0.000276,0.000186,0.000404,...,-0.000107,6.6e-05,-0.000433,-0.000336,3.9e-05,0.000114,2.4e-05,0.000948,0.001407,-3.1e-05
2,0.006165,-0.002173,-0.002818,-0.00296,-0.002694,-0.002695,-0.002923,-0.002754,-0.002083,-0.001365,...,-0.001154,-0.00094,-0.00114,-0.000839,-0.000694,-0.000883,0.000213,0.000639,0.002572,0.001965
3,0.003363,-0.001128,-0.001796,-0.001637,-0.000872,2.4e-05,0.001106,0.001765,0.001766,0.001892,...,-0.001879,-0.001159,-0.000575,-0.000529,-0.000527,-7.7e-05,0.000746,0.004805,0.007894,0.004266
4,0.006847,0.003534,0.004248,0.004005,0.00464,0.003496,0.002499,0.001079,0.000273,-0.001467,...,0.004528,0.003747,0.003893,0.003135,0.00139,-0.000293,-0.001359,-0.002778,-0.003537,-0.001959


In [21]:
weights_data.to_csv("/home/lognod/MiniML/mini_logistic_without_regularization.csv")

In [22]:
print(weights.shape)
logistic_regression.validate(X_validate,Y_validate,weights)

(10, 785)


3.166438892252952

In [23]:
logistic_regression = LogisticRegression()
weights_2 = pd.read_csv("/home/lognod/MiniML/mini_logistic_with_regularization.csv")
y =[]
for i in range(10):
    y.append(logistic_regression.predict(X_test[26,:],weights_2.iloc[i,1:]))

Logistic Regression


In [24]:
y

[0.4953133191104629,
 0.5173177685364324,
 0.4884820586836548,
 0.5324840126875459,
 0.5138344148173045,
 0.49708518115291883,
 0.5781435215174765,
 0.4967140471910474,
 0.5050302815024996,
 0.5213652776239386]

In [25]:
prediction = np.where(y == np.amax(y))
print(prediction[0])

[6]


In [26]:
test = X_test[26,1:]

In [27]:
image = test.reshape(-1,28)
magic.imwrite('/home/lognod/predicted.png', image)


