Import important libraries

In [15]:
import numpy as np
import pandas as pd
import random

Read dataset using pandas

In [16]:
df=pd.read_csv('big_logistic_regression_dataset.csv')
df.head()

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Target
0,0.496714,-0.138264,0.647689,1.52303,-0.234153,0
1,-0.234137,1.579213,0.767435,-0.469474,0.54256,1
2,-0.463418,-0.46573,0.241962,-1.91328,-1.724918,1
3,-0.562288,-1.012831,0.314247,-0.908024,-1.412304,0
4,1.465649,-0.225776,0.067528,-1.424748,-0.544383,1


In [17]:
df.shape

(20000, 6)

Train-Test-split the dataset

In [18]:
def train_test(x,y,test_size=0.2,random_state=1):
    data=list(zip(x,y))
    random.seed(random_state)
    random.shuffle(data)
    x_s,y_s=zip(*data)
    
    split=int(len(x_s)*(1-test_size))
    x_tr=x_s[:split]
    x_te=x_s[split:]
    y_tr=y_s[:split]
    y_te=y_s[split:]
    return np.array(x_tr),np.array(x_te),np.array(y_tr),np.array(y_te)

x=df.drop(columns=['Target']).values
y=df['Target'].values
x_train,x_test,y_train,y_test=train_test(x,y,test_size=0.2,random_state=2)

Implementing Logistic Regression from scratch

In [19]:
class LogisticRegression:

    def __init__(self,learning_rate=0.1,epochs=1001):
        self.weights=None
        self.learning_rate=learning_rate
        self.epochs=epochs
        self.bias=None
        self.costs = []


    def sigmoid(self,z):
        return 1/(1+np.exp(-z))

    def cost_function(self,y,h):
        h = np.clip(h, 1e-15, 1 - 1e-15)
        return -np.mean(y*np.log(h)+(1-y)*np.log(1-h))

        
    def fit(self,x,y):
        n_sample,feature=x.shape
        
        self.weights=np.zeros(feature)
        self.bias=0
        
        for i in range(self.epochs):
            
            z=x@self.weights+self.bias
            h=self.sigmoid(z)
            
            cost=self.cost_function(y,h);
            self.costs.append(cost)

            dw=(1/n_sample)*(x.T@(h-y))
            db=(1/n_sample)*(np.sum(h-y))

            self.weights-=self.learning_rate*dw
            self.bias-=self.learning_rate*db
            
    def probab(self,x):
            z=x@self.weights+self.bias
            return self.sigmoid(z)
        
    def predict(self,x):
        s=self.probab(x)
        return [1 if i>0.5 else 0 for i in s ]



In [20]:
lr=LogisticRegression(learning_rate=0.01,epochs=500)
lr.fit(x_train,y_train)
y_predict=lr.predict(x_test)

Accuracy of model

In [22]:
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

print("Accuracy is:", accuracy(y_test, y_predict)*100)

Accuracy is: 99.875
