## Importing the libraries

In [1]:
import pandas as pd
import math
import random

In [2]:
df = pd.read_csv('logistic_regression.csv')

split_ratio = 0.6 #train-test

print(df)

     x1    x2  t
0  0.80  0.21  1
1  0.38  0.50  0
2  0.40  0.20  0
3  0.50  0.80  1
4  0.90  0.90  1
5  0.78  0.56  1
6  0.23  0.65  0
7  0.43  0.25  0
8  0.57  0.88  1
9  0.80  0.19  0


In [3]:
def train_test_split(dataset,split):
    
    list_of_records = dataset.to_numpy().tolist()
    
    number_of_records = list(dataset.shape)[0]
    
    train_set_length = int(round(split*number_of_records,0))
    test_set_length = number_of_records-train_set_length
    
    train_set = []
    test_set = []
    
    for i in range(train_set_length):
        train_set.append(0)
    
    for i in range(test_set_length):
        test_set.append(0)

    train_set_above = []
    test_set_above = []
    
    train_set_below = []
    test_set_below = []
    
    
    records_lt_threshold = [record for record in list_of_records if record[-1]==0]
    
    records_gt_threshold = [record for record in list_of_records if record[-1]==1]
    
    train_set_length = int(round(split*len(records_lt_threshold),0))
    test_set_length = len(records_lt_threshold)-train_set_length
    
    #split the zero outputs according to split ratio
    
    for record in records_lt_threshold:
        test_or_train = round(random.uniform(0,1),1)
        if len(train_set_below) == train_set_length:
            test_set_below.append(record)
        elif len(test_set_below) == test_set_length:
            train_set_below.append(record)
        elif test_or_train < split:
            train_set_below.append(record)
        else:
            test_set_below.append(record)
    
    #split the one outputs according to split ratio between train and test
    
    train_set_length = int(round(split*len(records_gt_threshold),0))
    test_set_length = len(records_gt_threshold)-train_set_length
    
    for record in records_gt_threshold:
        test_or_train = round(random.uniform(0,1),1)
        if len(train_set_above) == train_set_length:
            test_set_above.append(record)
        elif len(test_set_above) == test_set_length:
            train_set_above.append(record)
        elif test_or_train < split:
            train_set_above.append(record)
        else:
            test_set_above.append(record)
        
        
    index_above = 0
    index_below = 0
    
    #randomly assign the zero and one outputs to training set
    
    for i in range(len(train_set)):
        above_or_below = round(random.uniform(0,1),1)
        if index_above == len(train_set_above):
            train_set[i] = train_set_below[index_below]
            index_below += 1
        elif index_below == len(train_set_below):
            train_set[i] = train_set_above[index_above]
            index_above += 1
        elif above_or_below < 0.5:
            train_set[i] = train_set_below[index_below]
            index_below += 1
        else:
            train_set[i] = train_set_above[index_above]
            index_above += 1
    
    index_above = 0
    index_below = 0
    
    #randomly assign the zero and one outputs to test set
    for i in range(len(test_set)):
        above_or_below = round(random.uniform(0,1),1)
        if index_above == len(test_set_above):
            test_set[i] = test_set_below[index_below]
            index_below += 1
        elif index_below == len(test_set_below):
            test_set[i] = test_set_above[index_above]
            index_above += 1
        elif above_or_below < 0.5:
            test_set[i] = test_set_below[index_below]
            index_below += 1
        else:
            test_set[i] = test_set_above[index_above]
            index_above += 1
    
    return train_set,test_set

In [4]:
def sigmoid_function(x):
    return 1/(1+math.exp(-x))

In [5]:
def loss_function(target,predicted):
    return -target*math.log(predicted)-(1-target)*math.log(1-predicted) 

In [6]:
def find_changes(data):
    x1=data[0]
    x2=data[1]
    target=data[-2]
    predicted = data[-1]
    change = (predicted-target)
    loss_w1 = change*x1
    loss_w2 = change*x2
    loss_b = change
    return loss_w1,loss_w2,loss_b

In [7]:
def update_coefficients(data,w1,w2,b,learning_rate):
    derivate_loss_w1, derivate_loss_w2, derivate_loss_b = find_changes(data=data)
    w1 += (-learning_rate*derivate_loss_w1)
    w2 += (-learning_rate*derivate_loss_w2)
    b += (-learning_rate*derivate_loss_b)
    return (w1,w2,b)

In [13]:
def logistic_regression(dataset,split):
    
    w1 = random.uniform(0,1)
    w2 = random.uniform(0,1)
    b = 1
    learning_rate = 0.1
    
    train,test = train_test_split(dataset,split)
    
    iterations = 1000

    while iterations >= 0:
        for row in train:
            row_copy = row
            predicted = sigmoid_function(w1*row_copy[0]+w2*row_copy[1]+b)
            row_copy.append(predicted)
            w1,w2,b = update_coefficients(row_copy,w1,w2,b,learning_rate)
        iterations -= 1

    return w1,w2,b,test

In [14]:
w1,w2,b,test_set = logistic_regression(dataset=df,split=split_ratio)

print(w1,w2,b)

0.1311930880131156 0.5615327495068186 0.8375016083387408


## Testing

In [16]:
test_result = []

for record in test_set:
    output = sigmoid_function(record[0]*w1+record[1]*w2+b)
    test_result.append([record[-1],output])

for result in test_result:
    print(f'{result[0]}->{result[1]}')

1.0->0.7427627665877139
0.0->0.7742859480104796
1.0->0.7945093715105791
0.0->0.7406111239846894
