In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_svmlight_file
import itertools

# Load the scaled diabetes dataset
X, y = load_svmlight_file("scaled.txt")
X = pd.DataFrame(X.toarray())

features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X.columns = features
y[y == -1] = 0

# Replace 0 values with the median (excluding 0)
for column in X.columns:
    median = X[X[column] != 0][column].median()
    X[column] = X[column].replace(0, median)

# Split the dataset into a training set (80%) and a test set (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.FloatTensor(X_train.values)
X_test = torch.FloatTensor(X_test.values)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

activation_functions = [
    nn.ReLU(),          # Rectified Linear Unit (ReLU)
    nn.Sigmoid(),       # Sigmoid
    nn.Tanh()           # Hyperbolic Tangent (Tanh)
]

optimizers = [
    optim.SGD,         # Stochastic Gradient Descent (SGD)
    optim.Adam,        # Adam
    optim.RMSprop,     # RMSprop
    optim.Adagrad,     # Adagrad
    optim.Adadelta,    # Adadelta
    optim.Adamax      # Adamax
]

loss_functions = [
    nn.MSELoss(),      # Mean Squared Error (MSE)
    nn.L1Loss(),       # Mean Absolute Error (MAE)
    nn.SmoothL1Loss()  # Huber Loss
]

learning_rates = [0.0001, 0.001, 0.01, 0.1]
weight_decay_grid = [0.0, 0.001, 0.01, 0.1]
l1_lambda_grid = [0.0, 0.001, 0.01, 0.1]
dropout_probs = [0.0, 0.2, 0.4, 0.6]  


epochs = 100
patience = 10

input_size = X_train.shape[1]
hidden_sizes = [32, 64, 128]
output_size = 1

class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, activation, dropout_prob):
        super(MLPModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            activation,
            nn.Dropout(p=dropout_prob),  # Dropout layer
            nn.Linear(hidden_size, hidden_size),
            activation,
            nn.Dropout(p=dropout_prob),  # Dropout layer
            nn.Linear(hidden_size, output_size)
        )

        # Initialization
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.zeros_(layer.bias)

    def forward(self, x):
        return self.layers(x)

def train_and_evaluate(optimizer_class, loss_function, lr, weight_decay, l1_lambda, activation, input_size, hidden_size, output_size, dropout_prob):
    model = MLPModel(input_size, hidden_size, output_size, activation, dropout_prob)

    optimizer = optimizer_class(model.parameters(), lr=lr, weight_decay=weight_decay)

    l1_lambda = l1_lambda
    loss_function = L1RegularizedLoss(base_loss=loss_function, l1_lambda=l1_lambda)

    best_accuracy = 0
    best_epoch = 0
    best_model_weights = None

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        outputs = torch.sigmoid(outputs)
        loss = loss_function(outputs, y_train.view(-1, 1))
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            y_test_pred = (model(X_test) >= 0.5).float()
            test_accuracy = accuracy_score(y_test.numpy(), y_test_pred.numpy())

        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_epoch = epoch
            best_model_weights = model.state_dict()
        elif epoch - best_epoch > patience:
            break

    if best_model_weights is not None:
        model.load_state_dict(best_model_weights)

    result = {
        'Optimizer': optimizer_class.__name__,
        'LossFunction': loss_function.__class__.__name__,
        'Activation': activation.__class__.__name__,
        'LearningRate': lr,
        'WeightDecay': weight_decay,
        'L1Lambda': l1_lambda,
        'HiddenSize': hidden_size,
        'DropoutProbability': dropout_prob,
        'Accuracy': best_accuracy
    }

    return result

class L1RegularizedLoss(nn.Module):
    def __init__(self, base_loss, l1_lambda):
        super(L1RegularizedLoss, self).__init__()
        self.base_loss = base_loss
        self.l1_lambda = l1_lambda

    def forward(self, outputs, targets):
        base_loss = self.base_loss(outputs, targets)
        l1_reg = 0
        for param in self.parameters():
            l1_reg += torch.norm(param, p=1)
        total_loss = base_loss + self.l1_lambda * l1_reg
        return total_loss

# Perform grid search
for optimizer_class, loss_function, lr, weight_decay, l1_lambda, activation, dropout_prob, hidden_size in itertools.product(optimizers, loss_functions, learning_rates, weight_decay_grid, l1_lambda_grid, activation_functions, dropout_probs, hidden_sizes):
    result = train_and_evaluate(optimizer_class, loss_function, lr, weight_decay, l1_lambda, activation, input_size, hidden_size, output_size, dropout_prob)
    results.append(result)

# Find the highest accuracy model
best_result = max(results, key=lambda x: x['Accuracy'])

print("Best Model Hyperparameters:")
print(f"Optimizer: {best_result['Optimizer']}")
print(f"Loss Function: {best_result['LossFunction']}")
print(f"Activation Function: {best_result['Activation']}")
print(f"Learning Rate: {best_result['LearningRate']}")
print(f"Weight Decay (L2 Regularization): {best_result['WeightDecay']}")
print(f"L1 Lambda (L1 Regularization): {best_result['L1Lambda']}")
print(f"Hidden Size: {best_result['HiddenSize']}")
print(f"Dropout Probability: {best_result['DropoutProbability']}")
print("\nBest Model Performance:")
print(f"Accuracy: {best_result['Accuracy']:.3f}")

Best Model Hyperparameters:
Optimizer: RMSprop
Loss Function: L1RegularizedLoss
Activation Function: Tanh
Learning Rate: 0.0001
Weight Decay (L2 Regularization): 0.001
L1 Lambda (L1 Regularization): 0.01
Hidden Size: 64
Dropout Probability: 0.4

Best Model Performance:
Accuracy: 0.851
