# FOML_ASSIGNMENT_4

    Name: ANTALA AVIRAJ (CS24MTECH14011)

###    Question 5_a) Logistic Regression:

In [6]:
import numpy as np
import pandas as pd

# Logistic (sigmoid) function to compute probabilities
def logistic_function(x, theta):
    z = np.dot(x, theta)  # Compute linear combination
    return 1 / (1 + np.exp(-z))  # Apply sigmoid function

# Cross-entropy error calculation
def cross_entropy(y_true, y_pred):
    eps = 1e-10  # Small value to avoid numerical instability
    y_pred = np.clip(y_pred, eps, 1 - eps)  # Clip predictions to avoid log(0)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

# Gradient descent optimization
def gradient_descent(X, y, theta, learning_rate, iterations=1000, tolerance=1e-6):
    m = len(y)  # Number of samples
    for _ in range(iterations):
        y_pred = logistic_function(X, theta)  # Get predictions
        error = y_pred - y  # Calculate residuals
        gradient = np.dot(X.T, error) / m  # Compute gradient
        theta_new = theta - learning_rate * gradient  # Update parameters
        # Check for convergence
        if np.linalg.norm(theta_new - theta, ord=2) < tolerance:
            theta = theta_new
            break
        theta = theta_new
    return theta


###    Question 5_b_i) Logistic Regression::



#### Logistic Model:
$$
P(\hat{y} = 1 \mid x_1, x_2) = \frac{1}{1 + \exp\left(-(\theta_0 + \theta_1 x_1 + \theta_2 x_2)\right)}
$$

<br>

#### Cross-Entropy Error Function:
$$
E(\theta) = -\frac{1}{n} \sum_{i=1}^{n} \left[ y_i \log\left(P(\hat{y}_i)\right) + (1 - y_i) \log\left(1 - P(\hat{y}_i)\right) \right]
$$


###    Question 5_b_ii) Logistic Regression::

In [7]:
# Training dataset
train_data = {
    'x1': [0.346, 0.303, 0.358, 0.602, 0.790, 0.611],
    'x2': [0.780, 0.439, 0.729, 0.863, 0.753, 0.965],
    'y': [0, 0, 0, 1, 1, 1]
}
train_df = pd.DataFrame(train_data)

# Initial weights and learning rate
theta = np.array([-1, 1.5, 0.5])  # Starting values for model parameters
learning_rate = 0.1

# Prepare training data
X_train = np.c_[np.ones(len(train_df)), train_df[['x1', 'x2']].values]  # Combine bias term with features
y_train = train_df['y'].values

# Compute predictions and gradient for one iteration of gradient descent
y_pred_train = logistic_function(X_train, theta)  # Predictions using current theta
error = y_pred_train - y_train  # Residual error
gradient = np.dot(X_train.T, error) / len(y_train)  # Gradient of loss with respect to theta
theta_updated = theta - learning_rate * gradient  # Update theta using gradient descent rule

# Print updated weights
print("Updated Theta after one iteration:")
print(theta_updated)


Updated Theta after one iteration:
[-1.00316626  1.50535086  0.50196867]


In [8]:
# Test dataset
test_data = {
    'x1': [0.959, 0.750, 0.395, 0.823, 0.761, 0.844],
    'x2': [0.382, 0.306, 0.760, 0.764, 0.874, 0.435],
    'y': [0, 0, 0, 1, 1, 1]
}
test_df = pd.DataFrame(test_data)

# Train to convergence
theta_converged = gradient_descent(X_train, y_train, theta, learning_rate)

# Prepare test data
X_test = np.c_[np.ones(len(test_df)), test_df[['x1', 'x2']].values]  # Combine bias term with features
y_test = test_df['y'].values

# Make predictions using the converged model
y_pred_test = (logistic_function(X_test, theta_converged) >= 0.5)  # Predicted class labels

# Compute evaluation metrics
accuracy = np.mean(y_pred_test == y_test) * 100  # Accuracy as a percentage
precision = np.sum((y_pred_test == 1) & (y_test == 1)) / np.sum(y_pred_test == 1) if np.sum(y_pred_test == 1) > 0 else 0
recall = np.sum((y_pred_test == 1) & (y_test == 1)) / np.sum(y_test == 1) if np.sum(y_test == 1) > 0 else 0

# Print evaluation metrics in a clean format
print("="*35)
print("       Evaluation Metrics Report       ")
print("="*35)
print(f"{'Metric':<15}{'Value':>20}")
print("-"*35)
print(f"{'Accuracy':<15}{accuracy:>20.2f}%")
print(f"{'Precision':<15}{precision:>20.2f}")
print(f"{'Recall':<15}{recall:>20.2f}")
print("="*35)


       Evaluation Metrics Report       
Metric                        Value
-----------------------------------
Accuracy                      66.67%
Precision                      0.60
Recall                         1.00
