### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Dataset

In [None]:
df = pd.read_csv('The_Cancer_data_1500_V2.csv')

# Drop any rows with missing values
df = df.dropna()

# Separate features (X) and target variable (y)
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values

# Standardize features (optional but recommended)
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

from sklearn.model_selection import train_test_split

# Split the data into training and test sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### Defining Functions for Logistic Regression with Regularization

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def loss_function(X, y, theta, lambda_reg):
    m = len(y)
    h = sigmoid(X.dot(theta))
    reg_term = (lambda_reg / (2 * m)) * np.sum(theta[1:]**2)  # regularization term
    loss = (-1 / m) * (y.dot(np.log(h)) + (1 - y).dot(np.log(1 - h))) + reg_term
    return loss

def gradient_descent(X, y, theta, alpha, lambda_reg, num_iters):
    m = len(y)
    loss_history = []
    
    for i in range(num_iters):
        h = sigmoid(X.dot(theta))
        gradient = (1 / m) * X.T.dot(h - y)
        reg_term = (lambda_reg / m) * np.concatenate(([0], theta[1:]))  # regularization term
        theta = theta - alpha * (gradient + reg_term)
        
        current_loss = loss_function(X, y, theta, lambda_reg)
        loss_history.append(current_loss)
    
    return theta, loss_history


### Train the Regularized Logistic Regression Model on Training Data

In [None]:
# Add intercept term to X_train and X_test
X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), axis=1)
X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)

# Initialize parameters
theta = np.zeros(X_train.shape[1])

# Set hyperparameters
alpha = 0.01
lambda_reg = 1
num_iters = 1000

# Perform gradient descent on training data
theta, loss_history = gradient_descent(X_train, y_train, theta, alpha, lambda_reg, num_iters)

### Evaluate the Model on Test Data and Plot Loss Function

In [None]:
def predict(X, theta):
    h = sigmoid(X.dot(theta))
    return (h >= 0.5).astype(int)

# Calculate loss on test set
test_loss = loss_function(X_test, y_test, theta, lambda_reg)
print(f"Test Loss: {test_loss}")

# Plot the loss function over iterations
plt.figure()
plt.plot(range(1, num_iters + 1), loss_history, color='b', label='Training Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss Function')
plt.title('Loss Function vs. Iterations')
plt.legend()
plt.grid(True)
plt.show()
