In [26]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import f1_score

data = pd.read_excel('cleaned_dataset.xlsx')
y = data['Class']
transport_columns = [col for col in data.columns if 'Type_of_Transport_Used_' in col]
transport_data = data[transport_columns]
transport = transport_data.astype(int).to_numpy()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(transport, y, test_size=0.2, random_state=42)

#X_train, X_test, y_train, y_test = train_test_split(transport, y, test_size=0.3, random_state=42)

# Step 2: Initialize parameters
n_features = X_train.shape[1]
n_classes = len(np.unique(y))  # 4 obesity levels
W = np.random.randn(n_features, n_classes) * 0.01  # Small random weights
b = np.zeros((1, n_classes))  # Bias initialized to zero

# Step 3: Define helper functions
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # Stability trick
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def compute_gradients(X, y_true, y_prob, y_pred):
    n_samples = X.shape[0]
    
    # Gradients for weights and bias (based on probabilities)
    dW = (1 / n_samples) * np.dot(X.T, (y_prob - np.eye(n_classes)[y_true.astype(int) - 1]))
    db = (1 / n_samples) * np.sum(y_prob - np.eye(n_classes)[y_true.astype(int) - 1], axis=0, keepdims=True)
    
    return dW, db

# Step 4: Training loop
learning_rate = 0.1
n_epochs = 1000

for epoch in range(n_epochs):
    # Forward pass
    logits = np.dot(X_train, W) + b  # Compute logits
    y_prob = softmax(logits)  # Class probabilities
    y_pred = np.sum(y_prob * np.arange(1, n_classes + 1), axis=1)  # Expected value (scalar prediction)
    
    # Compute loss
    loss = mean_squared_error(y_train, y_pred)
    
    # Backward pass
    dW, db = compute_gradients(X_train, y_train, y_prob, y_pred)
    
    # Update parameters
    W -= learning_rate * dW
    b -= learning_rate * db
    
    # Print loss every 100 epochs
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Step 5: Evaluate on test set
logits_test = np.dot(X_test, W) + b
y_prob_test = softmax(logits_test)
y_pred_test = np.sum(y_prob_test * np.arange(1, n_classes + 1), axis=1)
test_loss = mean_squared_error(y_test, y_pred_test)
print(f"Test Loss: {test_loss:.4f}")
y_pred_test_rounded = np.round(y_pred_test)
f1 = f1_score(y_test, y_pred_test_rounded, average='weighted')
print(f"F1 Score: {f1:.4f}")

Epoch 0, Loss: 0.6925
Epoch 100, Loss: 0.6668
Epoch 200, Loss: 0.6635
Epoch 300, Loss: 0.6626
Epoch 400, Loss: 0.6624
Epoch 500, Loss: 0.6623
Epoch 600, Loss: 0.6623
Epoch 700, Loss: 0.6623
Epoch 800, Loss: 0.6623
Epoch 900, Loss: 0.6623
Test Loss: 0.6770
F1 Score: 0.2201


In [31]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

# Define a function to train and evaluate the Softmax Regression
def train_softmax(X_train, y_train, X_val, y_val, learning_rate, lambda_reg, batch_size, n_epochs=1000):
    n_samples, n_features = X_train.shape
    n_classes = len(np.unique(y_train))
    
    # Initialize weights and bias
    W = np.random.randn(n_features, n_classes) * 0.01
    b = np.zeros((1, n_classes))

    # Convert y_train and y_val to one-hot encoding
    y_train_onehot = np.eye(n_classes)[y_train]
    y_val_onehot = np.eye(n_classes)[y_val]
    
    # Training loop
    for epoch in range(n_epochs):
        # Mini-batch gradient descent
        for i in range(0, n_samples, batch_size):
            X_batch = X_train[i:i + batch_size]
            y_batch = y_train_onehot[i:i + batch_size]

            # Forward pass
            logits = np.dot(X_batch, W) + b
            exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))  # Stability trick
            probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
            
            # Compute loss (cross-entropy with L2 regularization)
            loss = -np.mean(np.sum(y_batch * np.log(probs + 1e-9), axis=1)) + (lambda_reg / 2) * np.sum(W ** 2)
            
            # Gradients
            dW = np.dot(X_batch.T, (probs - y_batch)) / batch_size + lambda_reg * W
            db = np.sum(probs - y_batch, axis=0, keepdims=True) / batch_size
            
            # Parameter updates
            W -= learning_rate * dW
            b -= learning_rate * db

    # Validation f1 score
    logits_val = np.dot(X_val, W) + b
    probs_val = np.exp(logits_val - np.max(logits_val, axis=1, keepdims=True))
    probs_val /= np.sum(probs_val, axis=1, keepdims=True)
    y_pred = np.argmax(probs_val, axis=1)
    f1 = f1_score(y_val, y_pred)
    
    return f1, loss


In [None]:
from sklearn.model_selection import KFold

# Define cross-validation function
def cross_validate_softmax(X, y, learning_rate, lambda_reg, batch_size, n_folds=5, n_epochs=500):
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)
    f1_scores = []

    for train_idx, val_idx in kf.split(X):
        # Split the data into training and validation sets
        X_array = np.array(X)
        y_array = np.array(y)
        X_train, X_val = X_array[train_idx], X_array[val_idx]
        y_train, y_val = y_array[train_idx], y_array[val_idx]

        # Train the model and evaluate f1 score
        f1, _ = train_softmax(X_train, y_train, X_val, y_val, learning_rate, lambda_reg, batch_size, n_epochs)
        f1_scores.append(f1)

    # Return the average f1 score across folds
    return np.mean(f1_scores)

# Define hyperparameters
learning_rates = [0.001, 0.01, 0.1]
lambda_regs = [1e-5, 1e-4, 1e-3]
batch_sizes = [32, 64, 128]

best_f1 = 0
best_hyperparams = {}

# Perform cross-validation for each combination of hyperparameters
for lr in learning_rates:
    for reg in lambda_regs:
        for batch in batch_sizes:
            f1 = cross_validate_softmax(X_train, y_train, lr, reg, batch)
            print(f"LR: {lr}, Lambda: {reg}, Batch Size: {batch}, Cross-Validated f1 score: {f1:.4f}")
            if f1 > best_f1:
                best_f1 = f1
                best_hyperparams = {'learning_rate': lr, 'lambda_reg': reg, 'batch_size': batch}

print("Best Hyperparameters:", best_hyperparams)
print("Best Cross-Validated f1 score:", best_f1)

In [None]:
import optuna
# Objective function for hyperparameter optimization
def objective(trial):
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1)
    lambda_reg = trial.suggest_loguniform("lambda_reg", 1e-5, 1e-1)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])

    # Train and evaluate the model
    accuracy, _ = train_softmax(X_train, y_train, X_val, y_val, learning_rate, lambda_reg, batch_size)
    return accuracy

# Run Optuna optimization
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

# Best hyperparameters and result
print("Best Hyperparameters:", study.best_params)
print("Best Validation Accuracy:", study.best_value)
