In [2]:
import numpy as np
import glob
import imageio as magic
import pandas as pd
import collections
from sklearn.model_selection import train_test_split

In [3]:

class LogisticRegression:

    def __init__(self):
        print("Logistic Regression")

    def get_sigmoid(self, X, W):
        return 1 / (1 + np.exp(- self.get_hypothesis(X, W)))

    def get_hypothesis(self, X, W):
        return X @ W.T

    def __get_cost(self, X, Y, W, lamda):
        return -(1.0 / len(X)) * np.sum((Y * np.log(self.get_sigmoid(X, W))) + ((1 - Y) * np.log(1 - self.get_sigmoid(X, W))))

    def __get_gradient(self, X, Y, W, lamda):
        return (1.0 / len(X)) * (np.sum(X * (self.get_hypothesis(X,W) - Y), axis=0) + (lamda * W))

    def __logistic_regression(self, X, Y, W, alpha, max_iterations, lamda):
        
        for i in range(max_iterations):
            
            W = W - alpha * self.__get_gradient(X, Y, W, lamda)
            cost = self.__get_cost(X, Y, W, lamda)
            
            if i % 100 == 0:
                print("Cost: ", cost)
            
        return W, cost

    def train(self, X, Y, W, alpha, max_iterations, lamda=0):
        return self.__logistic_regression(X, Y, W, alpha, max_iterations, lamda)

    def validate(self, X, Y, W):
        return self.__get_cost(X, Y, W, 0)

    def test(self, X, Y, W, lamda=0):
        return self.__get_cost(X, Y, W, 0)

In [4]:

image_data = []
label = []
for file_name in glob.iglob('/home/lognod/Desktop/nhcd/numerals/**/*.jpg', recursive=True):
    image_array = magic.imread(file_name,as_gray=True)
    label=int(file_name[-12:-11])
    pixel_data = (255.0-image_array.flatten())/256.0
    pixel_data = np.append(label,pixel_data)
    image_data.append(pixel_data)


image_data = np.array(image_data)
np.random.shuffle(image_data)
image_data_pd = pd.DataFrame(image_data)
image_data_pd.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,8.0,0.019531,0.039062,0.0,0.0,0.09375,0.0,0.0,0.054688,0.0,...,0.011719,0.0,0.039062,0.003906,0.0,0.007812,0.03125,0.015625,0.0,0.0
1,6.0,0.007812,0.003906,0.0,0.0,0.0,0.019531,0.03125,0.03125,0.035156,...,0.0,0.0,0.019531,0.011719,0.0,0.035156,0.183594,0.472656,0.445312,0.0625
2,5.0,0.003906,0.0,0.035156,0.0,0.019531,0.019531,0.0,0.335938,0.914062,...,0.0,0.0,0.019531,0.039062,0.027344,0.0,0.027344,0.03125,0.371094,0.289062
3,4.0,0.011719,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0,...,0.054688,0.035156,0.0,0.0,0.0,0.019531,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.003906,0.027344,0.011719,0.0,0.0,0.027344,0.007812,...,0.0,0.0,0.011719,0.019531,0.042969,0.054688,0.007812,0.003906,0.003906,0.0


In [5]:
X = image_data_pd.iloc[:,1:]
ones = np.ones([len(X),1])
X = np.concatenate((ones,X), axis = 1)
Y = image_data_pd.iloc[:,0:1].values
print(X.shape)
print(Y)
X_train,X_rest,Y_train,Y_rest =  train_test_split(X,Y,test_size=0.4)
X_validate,X_test,Y_validate,Y_test = train_test_split(X_rest,Y_rest,test_size=0.5)

(2880, 785)
[[8.]
 [6.]
 [5.]
 ...
 [1.]
 [3.]
 [6.]]


In [6]:
print(X_train.shape[0])
W = np.zeros((1,len(X_train[0,:])))
print(W.shape)

1728
(1, 785)


In [7]:
logistic_regression = LogisticRegression()
weight_list =[]
cost_list = []

for i in range(10):
    W = np.zeros((1,len(X_train[0,:])))
    print("Learning: ", float(i))
    Y_train_one = (Y_train == float(i)).astype(int)
    weight,cost = logistic_regression.train(X_train,Y_train_one,W,0.01,10000,0)
    weight_list.append(weight.flatten())
    cost_list.append(cost)
    
print(weight_list)

Logistic Regression
Learning:  0.0
Cost:  0.7114335930412885
Cost:  0.680951986825746
Cost:  0.6782222123996109
Cost:  0.6771981809170187
Cost:  0.6766117668903164
Cost:  0.6762034823387227
Cost:  0.6758879074211205
Cost:  0.6756289519902757
Cost:  0.6754085531756812
Cost:  0.6752164131130821
Cost:  0.675046025038507
Cost:  0.6748929465175869
Cost:  0.6747539719125262
Cost:  0.674626696957294
Cost:  0.6745092685701114
Cost:  0.674400229580961
Cost:  0.6742984161692208
Cost:  0.6742028868287977
Cost:  0.6741128714211615
Cost:  0.6740277336884484
Cost:  0.6739469431387549
Cost:  0.6738700536476474
Cost:  0.6737966869776633
Cost:  0.6737265199590957
Cost:  0.6736592744322835
Cost:  0.6735947092950628
Cost:  0.6735326141695716
Cost:  0.6734728043245541
Cost:  0.6734151165779915
Cost:  0.6733594059701855
Cost:  0.6733055430460781
Cost:  0.6732534116221126
Cost:  0.6732029069406372
Cost:  0.6731539341359465
Cost:  0.6731064069522558
Cost:  0.6730602466663981
Cost:  0.6730153811777325
Cost:  

In [8]:
weights = np.array(weight_list)
weights.shape


(10, 785)

In [9]:
weights_data = pd.DataFrame(weights)
weights_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,-0.093899,0.007686,0.006268,0.029002,0.038011,0.023101,0.016737,0.026905,0.02194,0.01353,...,0.013784,0.00604,-0.003927,0.000533,0.018539,0.010618,-0.010772,-0.021299,0.008064,0.052841
1,0.143591,0.015261,0.00658,-0.023896,-0.021621,-0.012301,-0.015924,0.005058,0.00515,-0.018671,...,0.013339,0.016487,-0.017371,-0.009563,-0.025478,-0.020061,-0.00354,0.012468,-0.012866,-0.037208
2,0.024719,-0.006504,-0.014556,-0.005924,-0.030119,-0.059757,-0.03304,0.022934,0.005104,0.020113,...,-0.010566,-0.026817,-0.010781,0.05096,0.034587,-0.029773,0.008987,-0.018238,-0.026118,0.020069
3,-0.004265,0.021673,-0.019892,-0.030679,-0.010598,0.016354,0.022799,0.024701,0.035129,0.008978,...,-0.000651,0.011287,0.027622,0.02073,0.015739,0.026429,-0.0228,0.036264,0.124849,0.092299
4,0.085713,0.016676,-0.034388,-0.019079,0.048122,0.032352,-0.021156,-0.025811,0.016404,-0.01741,...,0.035385,0.038111,0.056347,0.041209,0.02694,0.007264,-0.014304,-0.020074,-0.020532,-0.019915
5,0.256595,-0.019591,0.069562,0.03199,0.035145,0.015315,0.029077,0.001397,-0.045106,0.007254,...,-0.045057,0.003489,-0.015376,-0.051374,0.001601,0.035554,-0.014873,0.004903,-0.018248,0.005597
6,0.140375,-0.019416,-0.028669,0.003312,-0.00644,-0.035486,-0.024957,-0.018621,-0.018145,0.022911,...,0.012634,-0.02408,-0.029732,-0.036787,-0.030121,0.017947,0.064896,0.021048,0.00641,-0.013966
7,0.064408,-0.003796,-0.003605,-0.015067,-0.012236,0.021723,0.0086,-0.007718,-0.008847,-0.044447,...,0.002931,0.009233,0.014634,0.009524,-0.0226,-0.017069,-0.015763,-0.016221,-0.012662,-0.018381
8,0.179981,0.031347,0.023136,0.0114,-0.030317,-0.008773,0.023627,-0.01205,-0.019274,-0.042605,...,-0.014031,0.003042,0.019952,0.00287,0.015603,0.009954,0.030447,0.002107,-0.032947,-0.023913
9,0.03924,-0.021949,0.014818,0.014394,-0.010183,0.012599,-0.009466,-0.011402,0.012598,0.046739,...,0.006811,-0.020159,-0.024891,-0.014034,-0.021365,-0.020366,-0.006032,0.009189,0.019956,-0.00103


In [10]:
weights_data.to_csv("/home/lognod/mini_logistic2.csv")

In [None]:
for i in range(10):
    logistic_regression.validate(X_validate[0,:],Y_validate)