In [1]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:
# Step 1: Load the Iris dataset
iris = load_iris()

# Extract the feature matrix X and the target vector y
X = iris.data
y = iris.target

In [3]:
# Step 2: Preprocess the data
# Convert to binary classification: Iris Setosa (class 0) vs. Others (class 1)
y_binary = (y == 0).astype(int)

In [4]:
# Step 3: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

In [5]:
# Step 4: Define the cost function (logistic loss)
def logistic_loss(y_true, y_pred):
    epsilon = 1e-15   # Small constant to avoid log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
y_true = np.array([1, 0, 1, 0, 1])  # True binary labels
y_pred = np.array([0.9, 0.1, 0.8, 0.2, 0.7])  # Predicted probabilities

# Calculate the logistic loss
loss = logistic_loss(y_true, y_pred)

# result
print("Logistic Loss:", loss)

Logistic Loss: [0.10536052 0.10536052 0.22314355 0.22314355 0.35667494]


In [6]:
# Step 5: Define the training function (Gradient Descent)
# This function takes input data, a learning rate, a number of iterations, 
# and a regularization parameter as arguments
def train_logistic_regression(X, y, learning_rate, num_iterations, reg_param):
    n_samples, n_features = X.shape
    W = np.zeros(n_features)
    b = 0

    for epoch in range(num_iterations):
        # raw predictions
        z = np.dot(X, W) + b
        # sigmoid function to get probabilities
        y_pred = 1 / (1 + np.exp(-z))

        # Compute gradients
        dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
        db = (1 / n_samples) * np.sum(y_pred - y)

        # Regularization 
        dw += (reg_param / n_samples) * W

        # Update w and b (weight and bais)
        W -= learning_rate * dw
        b -= learning_rate * db

    return W, b

In [7]:
# Step 6: Train the model
learning_rate = 0.01
num_iterations = 1000
reg_param = 0.1
# Train the logistic regression model
W, b = train_logistic_regression(X_train, y_train, learning_rate, num_iterations, reg_param)

In [8]:
# Step 7: Define the prediction function
def predict(X, W, b):
    z = np.dot(X, W) + b
    y_pred = 1 / (1 + np.exp(-z))
    return np.round(y_pred).astype(int)

In [9]:
# Step 8: Predict on the test set
y_pred_test = predict(X_test, W, b)

In [10]:
# Step 9: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred_test)
confusion = confusion_matrix(y_test, y_pred_test)
report = classification_report(y_test, y_pred_test)

In [12]:
# Print the results
print("Final Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion)
print("Classification Report:\n", report)
'''
20 true positive TP predictions, 
10 true negative TN predictions, 
and no false positives FP or false negatives FN. 
confirms the perfect accuracy.
'''

Final Accuracy: 1.0
Confusion Matrix:
 [[20  0]
 [ 0 10]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

