#### Logistic Regression using Numpy

---

In [None]:
# Importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from math import exp 

plt.rcParams["figure.figsize"] = (10, 6)

In [None]:
# Load the data
data = pd.read_csv("data_small.csv")
X_train, y_train = data['x'], data['y']
print(data.shape) # return number of [rows, columns]
data.head(20)

##### Training using Gradient Descent

---

In [None]:
# Creating the logistic regression model

# Function to make predictions - (n polish: funkcja sigmoidalna)
def predict(X, b0, b1):
    return np.array([1 / (1 + exp(-1*b0 + -1*b1*x)) for x in X])

# Function to train the model
def logistic_regression(X, Y):

    # Initializing parameters
    b0 = 0
    b1 = 0
    L = 0.1 # learning rate
    total_epochs = 20
    
    y_pred = predict(X, b0, b1) # predict y_hat using sigmoid function
    
    D_b0 = sum((y_pred - Y))  # Derivative of loss wrt b0
    D_b1 = sum((y_pred - Y) * X )  # Derivative of loss wrt b1
    
    loss_function = -sum(Y * np.log10(y_pred) + (1 - Y) * np.log10(1 - y_pred))

    print(f' Paramteres of b0 = {b0} and b1 = {b1}')
    print(f' Sum of partial derivatives of b0 = {D_b0} and b1 = {D_b1}')
    print(f' Loss function = {loss_function}')
    print(f'---- End of epoch: 0'); 
    print()

    for epoch in range(total_epochs):
        b0 = b0 - L * D_b0  # update parameters
        b1 = b1 - L * D_b1
        y_pred = predict(X, b0, b1)
        D_b0 = sum((y_pred - Y))  # Derivative of loss wrt b0
        D_b1 = sum(X * (y_pred - Y))  # Derivative of loss wrt b1
        loss_function = -sum((Y * np.log10(y_pred)) + (1 - Y) * (np.log10(1 - y_pred)))
        epoch = epoch + 1 # count iterations
        
        '''Print only first/last x iterations'''
        x = 5
        if epoch < x  or epoch > total_epochs - x :
            print(f' Paramteres of b0 = {b0} and b1 = {b1}')
            print(f' Sum of partial derivatives of b0 = {D_b0} and b1 = {D_b1}')
            print(f' Loss function = {loss_function}')
            print(f'---- End of epoch: {epoch}'); 
            print()
        
    return b0, b1

In [None]:
# exectute the main function to train the model and print the results
b0, b1 = logistic_regression(X_train, y_train)

##### Evaluate model

---

In [None]:
y_pred = predict(X_train, b0, b1) # return final sigmoid function predictions
y_pred_class = [1 if p >= 0.5 else 0 for p in y_pred] # assign labels (0,1) to sigmoid function predictions

In [None]:
# Plot y_pred vs y

plt.clf()
plt.scatter(X_train, y_train, label='y')
plt.scatter(X_train, y_pred, c="red", label='y_hat')

plt.ylabel('Class/Predicted')
plt.xlabel('X')
plt.title('y_pred vs y')
plt.legend()

plt.show()

In [None]:
# Plot y_pred labeled vs y

plt.clf()
plt.scatter(X_train, y_train, label='y')
plt.scatter(X_train, y_pred_class, c="red", label='y_pred labeled')

plt.ylabel('Class/Predicted')
plt.xlabel('X')
plt.title('y_pred vs y')
plt.legend()
plt.show()

# The accuracy
accuracy = 0
for i in range(len(y_pred_class)):
    if y_pred_class[i] == y_train.iloc[i]:
        accuracy += 1
print(f"Accuracy = {accuracy / len(y_pred_class)}")

#### Logistic Regression using Sci-kit Learn

In [None]:
from sklearn.linear_model import LogisticRegression

# References: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

# Create an instance and fit the model 
solver_list = ['lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga']
lr_model = LogisticRegression(solver=solver_list[0])
lr_model.fit(X_train.values.reshape(-1, 1), y_train.values.reshape(-1, 1))

# Making predictions
y_pred_sk = lr_model.predict(X_train.values.reshape(-1, 1))

plt.clf()
plt.scatter(X_train, y_train, label = 'y')
plt.scatter(X_train, y_pred_sk, c="red", label='y_pred labeled')
plt.legend()
plt.show()

# Accuracy
print(f"Accuracy = {lr_model.score(X_train.values.reshape(-1, 1), y_train.values.reshape(-1, 1))}")