In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [12]:
# Sigmod function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [13]:
# function to calculate the cost (MSE)
def compute_cost(X, y, theta):
    m = len(y)
    y_pred = sigmoid(X.dot(theta))
    cost = np.sum(-y * np.log(y_pred) - (1 - y) * np.log(1 - y_pred)) / m
    return cost

In [14]:
# function to calculate the F1 score
def compute_f1_score(X, y, theta):
    y_pred = sigmoid(X.dot(theta))
    y_pred = np.where(y_pred >= 0.5, 1, 0)
    tp = np.sum((y == 1) & (y_pred == 1))
    tn = np.sum((y == 0) & (y_pred == 0))
    fp = np.sum((y == 0) & (y_pred == 1))
    fn = np.sum((y == 1) & (y_pred == 0))
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    f1_score = 2 * precision * recall / (precision + recall)
    return f1_score, accuracy

In [15]:
# predict function
def predict(X, theta):
    y_pred = sigmoid(X.dot(theta))
    y_pred = np.where(y_pred >= 0.5, 1, 0)
    return y_pred

In [16]:
# Cost history
cost_history = []

In [17]:
# perform gradient descent
def gradient_descent(X, y, theta, alpha, num_iterations):
    for iteration in range(num_iterations):
        error = sigmoid(X.dot(theta)) - y
        gradient = X.T.dot(error) / len(X)
        theta -= alpha * gradient
        cost = compute_cost(X, y, theta)
        cost_history.append(cost)
        f1_score = compute_f1_score(X, y, theta)
        # print("Iteration:", iteration, "Cost:", cost, "F1 score:", f1_score)
        # print("Coefficients:", theta)
    return theta, cost_history, f1_score

In [18]:
# perform gradient descent with regularization (L2)
def gradient_descent_regularization(X, y, theta, alpha, num_iterations, lambd):
    for iteration in range(num_iterations):
        error = sigmoid(X.dot(theta)) - y
        gradient = X.T.dot(error) / len(X)
        theta[0] -= alpha * gradient[0]
        theta[1:] = (1 - alpha * lambd / len(X)) * theta[1:] - alpha * gradient[1:]
        cost = compute_cost(X, y, theta)
        cost_history.append(cost)
        f1_score = compute_f1_score(X, y, theta)
        # print("Iteration:", iteration, "Cost:", cost, "F1 score:", f1_score)
        # print("Coefficients:", theta)
    return theta, cost_history, f1_score

In [19]:
# plot the cost function as a function of the iteration
def plot_cost_function(cost_history):
    plt.plot(range(len(cost_history)), cost_history)
    plt.xlabel('Iteration')
    plt.ylabel('Cost')
    plt.show()

In [None]:


# hyperparameters
alpha = 0.001
num_iterations = 800000
lambd = 0.1

# initialize coefficients
theta = np.zeros(X.shape[1])
