In [32]:
import sklearn
import numpy as np
import copy
import time
import math

In [33]:
from sklearn.datasets import load_iris
iris = sklearn.datasets.load_iris()
X = iris.data[:100]
Y = iris.target[:100]

In [34]:
class LogisticRegression():
    
    def __init__(self,p):
        '''
        p: number of features
        '''
        self.intercept = 0
        self.p = p
        self.w = [0,0,0,0]
        self.learning_rate = 0.001
        
        self.max_iter = 2000
        self.stopping_criterion = pow(10,-5)
    
    def dot(self,x):
        ans = self.intercept
        for i in range(self.p):
            ans += x[i] * self.w[i]
        return ans
    
    def sigma(self,x):
        return  math.exp(self.dot(x))/(1+math.exp(self.dot(x)))

    def C(self,x,y):
        return math.log(1+math.exp(self.dot(x))) - y * self.dot(x)
    
    def Loss_function(self,X,Y):
        n = len(X)
        loss = 0
        for i in range(n):
            loss+= self.C(X[i],Y[i])
        return loss/n

    def gradient(self,X,Y):
        n = len(X) #nb of observations
        g = [0 for i in range(self.p+1)]
        for i in range(n):
            g[0] += math.exp(self.dot(X[i]))/(1+math.exp(self.dot(X[i]))) - Y[i]
        g[0] /= n
        for j in range(self.p): #nb of features
            for i in range(n): #loop on all observations
                g[1+j] += X[i,j] * (math.exp(self.dot(X[i]))/(1+math.exp(self.dot(X[i]))) - Y[i])
            g[j] /= n
        return g

    def descent(self,X,Y):
        g = self.gradient(X,Y)
        self.intercept = self.intercept - self.learning_rate * g[0]
        for j in range(self.p):
            self.w[j] = self.w[j] - self.learning_rate * g[j+1]

    def learn(self,X, Y):
        start_time = time.time()
        for i in range(self.max_iter):
            self.descent(X,Y)
            #print(self.Loss_function(X,Y))
        print("Best coefficients are", self.intercept, self.w,"\nwith Lose",self.Loss_function(X,Y))
        print("We needed %.2f seconds to fit the model" %(time.time() - start_time ))

    def score(self,X,Y):
        sum = 0
        n = len(X)
        for i in range(n):
            yp = round(self.sigma(X[i]))
            if (yp == Y[i]):
                sum +=1
        print("Score is", sum/n*100, "%")

In [35]:
model = LogisticRegression(4)

model.learn(X,Y)
model.score(X,Y)

Best coefficients are -0.0011322794604707762 [-0.48938185248622096, -0.45009497349481514, 0.06923831933948571, 5.589625416699037] 
with Lose 0.06635242170403859
We needed 5.40 seconds to fit the model
Score is 100.0 %
