In [13]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml

# 1. Load the data
titanic = fetch_openml(name="titanic", version=1, as_frame=True)
df = titanic.frame

# 2. Preprocessing
df = df[['age', 'fare', 'pclass', 'survived']]
df = df.dropna() # Logistic regression cannot handle NaN values

X = df[['age', 'fare', 'pclass']].values
y = df[['survived']].astype(int).values.reshape(-1, 1) # Force column vector (n, 1)

# Feature Scaling (Crucial for Gradient Descent)
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X_scaled = (X - X_mean) / X_std

# Add bias term (Intercept)
X_scaled = np.hstack([np.ones((X_scaled.shape[0], 1)), X_scaled])

# 3. Mathematical Functions
def sigmoid(z):
    # Correct formula: 1 / (1 + e^-z)
    return 1 / (1 + np.exp(-z))

def binary_cross_entropy(y_true, y_pred):
    eps = 1e-9
    y_pred = np.clip(y_pred, eps, 1 - eps)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

# 4. Logistic Regression Training
def train_logistic_regression(X, y, lr=0.1, epochs=3000):
    n_samples, n_features = X.shape
    # Initialize weights as a column vector
    w = np.zeros((n_features, 1))
    
    for epoch in range(epochs):
        z = X @ w
        y_pred = sigmoid(z)
        
        # Gradient calculation
        # (y_pred - y) result is (n, 1), X.T is (features, n)
        dw = (1/n_samples) * (X.T @ (y_pred - y))
        w -= lr * dw
        
        if epoch % 500 == 0:
            loss = binary_cross_entropy(y, y_pred)
            print(f"Epoch {epoch:4d}, Loss: {loss:.4f}")
    return w

# 5. Execution
weights = train_logistic_regression(X_scaled, y, lr=0.1, epochs=3000)

# 6. Prediction Logic
def predict_proba(X, w):
    return sigmoid(X @ w)

def predict_class(y_prob, threshold=0.5):
    # Fixed the variable name bug here
    return (y_prob >= threshold).astype(int)

y_prob = predict_proba(X_scaled, weights)
y_pred = predict_class(y_prob)

# 7. Evaluate
accuracy = np.mean(y_pred == y)
print(f"\nFinal Training Accuracy: {accuracy * 100:.2f}%")

# 8. Inspect specific predictions
print("\n--- Sample Predictions ---")
for i in range(5):
    status = "Survived" if y[i] == 1 else "Died"
    pred_status = "Survived" if y_pred[i] == 1 else "Died"
    print(f"Passenger {i+1} | Prob: {y_prob[i][0]:.3f} | Pred: {pred_status} | Actual: {status}")

  warn(


Epoch    0, Loss: 0.6931
Epoch  500, Loss: 0.5983
Epoch 1000, Loss: 0.5983
Epoch 1500, Loss: 0.5983
Epoch 2000, Loss: 0.5983
Epoch 2500, Loss: 0.5983

Final Training Accuracy: 68.33%

--- Sample Predictions ---
Passenger 1 | Prob: 0.808 | Pred: Survived | Actual: Survived
Passenger 2 | Prob: 0.903 | Pred: Survived | Actual: Survived
Passenger 3 | Prob: 0.899 | Pred: Survived | Actual: Died
Passenger 4 | Prob: 0.764 | Pred: Survived | Actual: Died
Passenger 5 | Prob: 0.795 | Pred: Survived | Actual: Died
