In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pandas as pd

# Load dataset
file_path = 'winequality-red.csv'
data = pd.read_csv(file_path, delimiter=';')

# Separate features and target
X = data.drop(columns=["quality"]).values
y = data["quality"].values

# Encode target labels for classification
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader objects
def create_dataloader(X, y, batch_size):
    dataset = TensorDataset(X, y)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Define MLP model for classification
class MLPClassifier(nn.Module):
    def __init__(self, input_size, hidden_layers, activation_fn, num_classes):
        super(MLPClassifier, self).__init__()
        layers = []
        current_size = input_size
        for hidden_neurons in hidden_layers:
            layers.append(nn.Linear(current_size, hidden_neurons))
            layers.append(activation_fn)
            current_size = hidden_neurons
        layers.append(nn.Linear(current_size, num_classes))  # Output layer for classification
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Optimized Hyperparameters
hidden_layer_configs = [[4], [16, 16]]
activation_functions = {
    "relu": nn.ReLU(),
    "tanh": nn.Tanh()
}
epochs_list = [10, 25]
learning_rates = [0.01, 0.001]
batch_sizes = [64, 128]

# Early Stopping Threshold
early_stopping_threshold = 0.001

def train_and_evaluate(
    hidden_layers, activation_fn, epochs, learning_rate, batch_size
):
    train_loader = create_dataloader(X_train_tensor, y_train_tensor, batch_size)
    test_loader = create_dataloader(X_test_tensor, y_test_tensor, batch_size)

    model = MLPClassifier(X_train_tensor.shape[1], hidden_layers, activation_fn, len(label_encoder.classes_))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    # Use GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    best_loss = float('inf')
    for epoch in range(epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        # Evaluate on validation set
        model.eval()
        correct = 0
        total = 0
        total_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in test_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                total_loss += loss.item() * batch_X.size(0)
                _, predicted = torch.max(outputs, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()

        avg_loss = total_loss / len(test_loader.dataset)
        accuracy = correct / total

        if abs(best_loss - avg_loss) < early_stopping_threshold:
            break
        best_loss = avg_loss

    return avg_loss, accuracy

# Run experiments and collect results
results = []
for hidden_layers in hidden_layer_configs:
    for activation_name, activation_fn in activation_functions.items():
        for epochs in epochs_list:
            for learning_rate in learning_rates:
                for batch_size in batch_sizes:
                    avg_loss, accuracy = train_and_evaluate(
                        hidden_layers, activation_fn, epochs, learning_rate, batch_size
                    )
                    results.append(
                        {
                            "hidden_layers": hidden_layers,
                            "activation": activation_name,
                            "epochs": epochs,
                            "learning_rate": learning_rate,
                            "batch_size": batch_size,
                            "avg_loss": avg_loss,
                            "accuracy": accuracy,
                        }
                    )

# Convert results to DataFrame
results_df = pd.DataFrame(results)
results_df.to_csv("optimized_classification_results.csv", index=False)
print("Experiments completed. Results saved to 'optimized_classification_results.csv'.")


Experiments completed. Results saved to 'optimized_classification_results.csv'.
