In [1]:
import torch
import torch.nn.functional as F  
from torch import optim 
from torch import nn
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
import torch.nn.init as init
import random
import time
from sklearn.model_selection import KFold
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler

### Class for the Neural Network

In [2]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes, hidden_layer_sizes, activation_function, apply_softmax = False):
        super(NN, self).__init__()

        seed = 18
        torch.manual_seed(seed)

        self.activation = activation_function
        self.apply_softmax = apply_softmax

        # Input layer
        self.input_layer = nn.Linear(input_size, hidden_layer_sizes[0])
        init.kaiming_uniform_(self.input_layer.weight, mode='fan_in', nonlinearity=activation_function.__name__)


        # Hidden layers
        self.hidden_layers = nn.ModuleList([
            nn.Linear(hidden_layer_sizes[i], hidden_layer_sizes[i + 1])
            for i in range(len(hidden_layer_sizes) - 1)
        ])

        for layer in self.hidden_layers:
            init.kaiming_uniform_(layer.weight, mode='fan_in', nonlinearity=activation_function.__name__)


        # Output layer
        self.output_layer = nn.Linear(hidden_layer_sizes[-1], num_classes)
        init.kaiming_uniform_(self.output_layer.weight, mode='fan_in', nonlinearity=activation_function.__name__)


    def forward(self, x):
        x = x.float()
        x = self.activation(self.input_layer(x))

        # Process through hidden layers
        for layer in self.hidden_layers:
            x = self.activation(layer(x))

        if self.apply_softmax:
            x = F.softmax(self.output_layer(x), dim=1)
        else:
            x = self.output_layer(x)

        return x
    

### Function to train the model

In [3]:
def train_model(model, train_loader, optimizer, criterion, num_epochs):
    for epoch in range(num_epochs):
        total_loss = 0.0  # Initialize total loss for the epoch
        num_batches = len(train_loader)

        for batch_idx, (data, targets) in enumerate(train_loader):
            data = data.reshape(data.shape[0], -1)

            scores = model(data)
            loss = criterion(scores, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()  # Accumulate the batch loss

        average_loss = total_loss / num_batches
        print(f"Epoch {epoch + 1}/{num_epochs}, Average Loss: {average_loss}")
    

### Function to calculate the accuracy

In [4]:
def check_accuracy(loader, model):

    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:

            x = x.reshape(x.shape[0], -1)

            scores = model(x)
            _, predictions = scores.max(1)

            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples


### Function to split the data into train and test

In [5]:
def train_test_split(data: pd.DataFrame, target_label : str, test_size=0.2, return_torch=None, DoSmote = False):
        
    # split the data into train and test
    train = data.sample(frac=(1-test_size),random_state=200)
    test = data.drop(train.index)
    
    # split the train and test into X and Y
    train_X = train.drop([target_label], axis=1).values
    train_Y = train[target_label].values
    if DoSmote == True:
        sm = SMOTE(random_state=42, k_neighbors= 3)
        train_X, train_Y = sm.fit_resample(train_X, train_Y)
    test_X = test.drop([target_label], axis=1).values
    test_Y = test[target_label].values
    
    if return_torch:
        train_X = torch.tensor(train_X)
        train_Y = torch.tensor(train_Y)
        test_X = torch.tensor(test_X)
        test_Y = torch.tensor(test_Y)
    
    return train_X, train_Y, test_X, test_Y

## Function for Grid Search

In [6]:
def grid_search_cv(hidden_layer_sizes_list, activation_functions, learning_rates, batch_sizes, num_epochs_list, train_loader, NumbOfClasses, k_folds=5, use_scaling=True):
    best_accuracy = 0.0
    best_combination = None
    results = []

    kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    if use_scaling:
        scaler = StandardScaler()
        scaler.fit(train_loader.dataset.tensors[0].numpy())

    for hidden_layer_sizes in hidden_layer_sizes_list:
        for activation_function in activation_functions:
            for learning_rate in learning_rates:
                for batch_size in batch_sizes:
                    for num_epochs in num_epochs_list:

                        fold_accuracies = []
                        fold_training_times = []

                        for train_index, test_index in kf.split(train_loader.dataset):
                            X_train, X_test = train_loader.dataset.tensors[0][train_index], train_loader.dataset.tensors[0][test_index]
                            y_train, y_test = train_loader.dataset.tensors[1][train_index], train_loader.dataset.tensors[1][test_index]

                            model = NN(input_size=train_loader.dataset.tensors[0].shape[1],
                                       num_classes=NumbOfClasses,
                                       hidden_layer_sizes=hidden_layer_sizes,
                                       activation_function=activation_function)
                            criterion = nn.CrossEntropyLoss()
                            optimizer = optim.Adam(model.parameters(), lr=learning_rate)

                        
                            if use_scaling:
                                X_train_scaled = torch.tensor(scaler.transform(X_train.numpy()))
                                X_test_scaled = torch.tensor(scaler.transform(X_test.numpy()))
                            else:
                                X_train_scaled, X_test_scaled = X_train, X_test

                            train_start_time = time.time()
                            train_model(model, DataLoader(TensorDataset(X_train_scaled, y_train), batch_size=batch_size, shuffle=True), optimizer, criterion, num_epochs)
                            train_end_time = time.time()
                            fold_training_times.append(train_end_time - train_start_time)

                            accuracy_test = check_accuracy(DataLoader(TensorDataset(X_test_scaled, y_test), batch_size=batch_size, shuffle=False), model)
                            fold_accuracies.append(accuracy_test.item())

                        avg_accuracy = np.mean(fold_accuracies)
                        avg_training_time = np.mean(fold_training_times)

                        result = {
                            'Hidden Layer Sizes': hidden_layer_sizes,
                            'Activation Function': activation_function.__name__,
                            'Learning Rate': learning_rate,
                            'Batch Size': batch_size,
                            'Number of Epochs': num_epochs,
                            'Average Accuracy': avg_accuracy,
                            'Average Training Time': avg_training_time
                        }

                        results.append(result)

                        if avg_accuracy > best_accuracy:
                            best_accuracy = avg_accuracy
                            best_combination = result

    results_df = pd.DataFrame(results)
    return results_df, best_accuracy, best_combination

## Testing the model on the wine quality dataset

#### Loading the dataset

In [7]:
wine_quality = pd.read_csv('./preprocessed-datasets/wine_quality_prepro.csv', index_col=0)
wine_quality.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,class,wine_type
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1


#### Splitting the dataset into training and testing sets, converting to PyTorch tensors and creating PyTorch DataLoaders

In [8]:
smote_in = True
train_X, train_Y, test_X, test_Y = train_test_split(wine_quality, "class", return_torch=True, DoSmote = smote_in)

dataset = TensorDataset(train_X, train_Y)
train_loader = DataLoader(dataset, batch_size=32, shuffle=False)

dataset = TensorDataset(test_X, test_Y)
test_loader = DataLoader(dataset, batch_size=32, shuffle=False)


#### Creating the model, training and testing

In [9]:
input_size = train_X.shape[1] # number of features in wine quality dataset
NumbOfClasses = 10 # 10 classes in wine quality dataset
learning_rate = 0.01
batch_size = 64
num_epochs = 10
hidden_layer_sizes = [25,30]
activation_function = F.tanh

model = NN(input_size=train_X.shape[1], num_classes=NumbOfClasses, hidden_layer_sizes=hidden_layer_sizes, activation_function=activation_function)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_model(model, train_loader, optimizer, criterion, num_epochs)

print(f"Accuracy on training set: {check_accuracy(train_loader, model)}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)}")

Epoch 1/10, Average Loss: 1.360640812674075
Epoch 2/10, Average Loss: 2.306261344772773
Epoch 3/10, Average Loss: 2.420518517726096
Epoch 4/10, Average Loss: 2.344191050121728
Epoch 5/10, Average Loss: 2.3038992153937516
Epoch 6/10, Average Loss: 2.353399555964451
Epoch 7/10, Average Loss: 2.239859869013744
Epoch 8/10, Average Loss: 2.184582583185279
Epoch 9/10, Average Loss: 2.3706629784815596
Epoch 10/10, Average Loss: 2.2964973737261776
Accuracy on training set: 0.1428571492433548
Accuracy on test set: 0.0013071895809844136


## Testing the model on the congressional voting dataset

#### Loading the dataset

In [10]:
cong_voting = pd.read_csv('./preprocessed-datasets/CongressionVoting_prepro.csv')
# encode class value democrat as 1 and republican as 0
cong_voting['class'] = cong_voting['class'].map({'democrat': 1, 'republican': 0})
cong_voting.head()

Unnamed: 0,ID,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,class
0,140,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1
1,383,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1
2,201,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1
3,297,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0
4,309,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0


#### Splitting the dataset into training and testing sets, converting to PyTorch tensors and creating PyTorch DataLoaders

In [11]:
smote_in = False
train_X, train_Y, test_X, test_Y = train_test_split(cong_voting, "class", return_torch=True, DoSmote=smote_in)

dataset = TensorDataset(train_X, train_Y)
train_loader = DataLoader(dataset, batch_size=32, shuffle=False)

dataset = TensorDataset(test_X, test_Y)
test_loader = DataLoader(dataset, batch_size=32, shuffle=False)

#### Creating the model, training and testing

In [12]:
input_size = train_X.shape[1] # number of features in congr voting dataset
NumbOfClasses = 2 # 2 classes in congr voting dataset
learning_rate = 0.01
batch_size = 64
num_epochs = 10
hidden_layer_sizes = [25,30]
activation_function = F.tanh

model = NN(input_size=train_X.shape[1], num_classes=NumbOfClasses, hidden_layer_sizes=hidden_layer_sizes, activation_function=activation_function)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_model(model, train_loader, optimizer, criterion, num_epochs)

print(f"Accuracy on training set: {check_accuracy(train_loader, model)}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)}")

Epoch 1/10, Average Loss: 0.8203917642434438
Epoch 2/10, Average Loss: 0.6987653970718384
Epoch 3/10, Average Loss: 0.6630895932515463
Epoch 4/10, Average Loss: 0.5717843721310297
Epoch 5/10, Average Loss: 0.4445936580499013
Epoch 6/10, Average Loss: 0.3994954625765483
Epoch 7/10, Average Loss: 0.31182117760181427
Epoch 8/10, Average Loss: 0.26471030960480374
Epoch 9/10, Average Loss: 0.2563413182894389
Epoch 10/10, Average Loss: 0.14053159393370152
Accuracy on training set: 0.959770143032074
Accuracy on test set: 0.9069767594337463


## Testing the model on bank marketing dataset

#### Loading and preparing dataset

In [13]:
bank_marketing = pd.read_csv('./preprocessed-datasets/bank_marketing_prepro.csv')

In [14]:
column_to_move = 'class'

# Move class to the last index
columns = [col for col in bank_marketing.columns if col != column_to_move] + [column_to_move]
bank_marketing = bank_marketing[columns]

bank_marketing.drop('Unnamed: 0', axis=1,inplace=True)

bank_marketing.head()

Unnamed: 0,age,default,housing,loan,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,...,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,poutcome_failure,poutcome_nonexistent,poutcome_success,class
0,56,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,0,0,0,0,0,0,1,0,0
1,57,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0
2,37,0.0,1.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0
3,40,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,0,0,0,0,0,0,1,0,0
4,56,0.0,0.0,1.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0


In [15]:
bank_marketing.columns

Index(['age', 'default', 'housing', 'loan', 'campaign', 'pdays', 'previous',
       'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m',
       'nr.employed', 'job_blue-collar', 'job_management', 'job_other',
       'job_self-employed', 'job_serivces', 'job_technician',
       'marital_divorced', 'marital_married', 'marital_single',
       'marital_unknown', 'education_basic.4y', 'education_basic.6y',
       'education_basic.9y', 'education_high.school', 'education_illiterate',
       'education_professional.course', 'education_university.degree',
       'education_unknown', 'poutcome_failure', 'poutcome_nonexistent',
       'poutcome_success', 'class'],
      dtype='object')

#### Splitting the dataset into training and testing sets, converting to PyTorch tensors and creating PyTorch DataLoaders

In [16]:
smote_in = True
train_X, train_Y, test_X, test_Y = train_test_split(bank_marketing, "class", return_torch=True, DoSmote=smote_in)

dataset = TensorDataset(train_X, train_Y)
train_loader = DataLoader(dataset, batch_size=32, shuffle=False)

dataset = TensorDataset(test_X, test_Y)
test_loader = DataLoader(dataset, batch_size=32, shuffle=False)

#### Creating the model, training and testing

In [17]:
input_size = train_X.shape[1] 
NumbOfClasses = 2 
learning_rate = 0.01
batch_size = 64
num_epochs = 10
hidden_layer_sizes = [25,30]
activation_function = F.tanh

model = NN(input_size=train_X.shape[1], num_classes=NumbOfClasses, hidden_layer_sizes=hidden_layer_sizes, activation_function=activation_function)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_model(model, train_loader, optimizer, criterion, num_epochs)

print(f"Accuracy on training set: {check_accuracy(train_loader, model)}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)}")

Epoch 1/10, Average Loss: 0.18898988196431674
Epoch 2/10, Average Loss: 0.2269750215684658
Epoch 3/10, Average Loss: 0.24861385295906963
Epoch 4/10, Average Loss: 0.2453736917733667
Epoch 5/10, Average Loss: 0.24322431265023542
Epoch 6/10, Average Loss: 0.24812340481806466
Epoch 7/10, Average Loss: 0.2489310940900721
Epoch 8/10, Average Loss: 0.24892018978450195
Epoch 9/10, Average Loss: 0.2489189351923551
Epoch 10/10, Average Loss: 0.24891852699443404
Accuracy on training set: 0.5
Accuracy on test set: 0.11689730733633041


# Test Grid search over all three datasets

It was not possible for us to iterate over all datasets because of the large amount of runtime

### Bank Marketing

In [18]:
hidden_layer_sizes_list = [[5],[10],[25, 30], [20, 25, 30]]
activation_functions = [F.tanh, F.relu, F.sigmoid]
learning_rates = [0.01, 0.001, 0.0001]
batch_sizes = [64]
num_epochs_list = [10]

dataset = bank_marketing
smote_in = True
train_X, train_Y, test_X, test_Y = train_test_split(dataset, "class", return_torch=True, DoSmote=smote_in)

train_data = TensorDataset(train_X, train_Y)
train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

test_data = TensorDataset(test_X, test_Y)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

input_size = train_X.shape[1]

NumbOfClasses = 2

grid_results_bank, best_accuracy, best_combination = grid_search_cv(
    hidden_layer_sizes_list, activation_functions, learning_rates, batch_sizes, num_epochs_list, train_loader, NumbOfClasses,
    k_folds=5, use_scaling=True)

grid_results_bank['dataset'] = 'bank_marketing'

Epoch 1/10, Average Loss: 0.561470707135448
Epoch 2/10, Average Loss: 0.5323791904120497
Epoch 3/10, Average Loss: 0.5181172480348681
Epoch 4/10, Average Loss: 0.5003397330804601
Epoch 5/10, Average Loss: 0.4887983540042502
Epoch 6/10, Average Loss: 0.4738469987781973
Epoch 7/10, Average Loss: 0.4665976938654165
Epoch 8/10, Average Loss: 0.4607784700035397
Epoch 9/10, Average Loss: 0.4581564534150186
Epoch 10/10, Average Loss: 0.4565220564359524
Epoch 1/10, Average Loss: 0.5661592637417746
Epoch 2/10, Average Loss: 0.5315083074357991
Epoch 3/10, Average Loss: 0.512279006260666
Epoch 4/10, Average Loss: 0.4964455619780092
Epoch 5/10, Average Loss: 0.48734276025184514
Epoch 6/10, Average Loss: 0.4809280618048105
Epoch 7/10, Average Loss: 0.4749727556125714
Epoch 8/10, Average Loss: 0.47019399755476604
Epoch 9/10, Average Loss: 0.46767020758872474
Epoch 10/10, Average Loss: 0.4662129480819233
Epoch 1/10, Average Loss: 0.5600329651370075
Epoch 2/10, Average Loss: 0.5189289188450151
Epoch 3

In [19]:
grid_results_bank

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average Training Time,dataset
0,[5],tanh,0.01,64,10,0.795699,12.364878,bank_marketing
1,[5],tanh,0.001,64,10,0.755577,12.158159,bank_marketing
2,[5],tanh,0.0001,64,10,0.718461,11.684069,bank_marketing
3,[5],relu,0.01,64,10,0.770642,11.399968,bank_marketing
4,[5],relu,0.001,64,10,0.744679,11.911103,bank_marketing
5,[5],relu,0.0001,64,10,0.731579,11.358967,bank_marketing
6,[5],sigmoid,0.01,64,10,0.754432,11.386674,bank_marketing
7,[5],sigmoid,0.001,64,10,0.743364,11.465565,bank_marketing
8,[5],sigmoid,0.0001,64,10,0.735661,11.650078,bank_marketing
9,[10],tanh,0.01,64,10,0.820842,11.603699,bank_marketing


### Wine quality

In [20]:
hidden_layer_sizes_list = [[5],[10],[25, 30], [20, 25, 30]]
activation_functions = [F.tanh, F.relu, F.sigmoid]
learning_rates = [0.01, 0.001, 0.0001]
batch_sizes = [64]
num_epochs_list = [10]

dataset = wine_quality
smote_in = True
train_X, train_Y, test_X, test_Y = train_test_split(dataset, "class", return_torch=True, DoSmote=smote_in)

train_data = TensorDataset(train_X, train_Y)
train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

test_data = TensorDataset(test_X, test_Y)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

input_size = train_X.shape[1]

NumbOfClasses = 10

grid_results_wine, best_accuracy, best_combination = grid_search_cv(
    hidden_layer_sizes_list, activation_functions, learning_rates, batch_sizes, num_epochs_list, train_loader, NumbOfClasses,
    k_folds=5, use_scaling=True)

grid_results_wine['dataset'] = 'wine_quality'

Epoch 1/10, Average Loss: 1.7219862341880798
Epoch 2/10, Average Loss: 1.280973960052837
Epoch 3/10, Average Loss: 1.205176176446857
Epoch 4/10, Average Loss: 1.1582312806688173
Epoch 5/10, Average Loss: 1.1346585115399024
Epoch 6/10, Average Loss: 1.1224412605015919
Epoch 7/10, Average Loss: 1.1128332702198414
Epoch 8/10, Average Loss: 1.1035884320735931
Epoch 9/10, Average Loss: 1.0937506610696965
Epoch 10/10, Average Loss: 1.0909036566512753
Epoch 1/10, Average Loss: 1.7293281326390275
Epoch 2/10, Average Loss: 1.2808340510936698
Epoch 3/10, Average Loss: 1.1957109991950219
Epoch 4/10, Average Loss: 1.153467654278784
Epoch 5/10, Average Loss: 1.1347701561571373
Epoch 6/10, Average Loss: 1.1221584695758242
Epoch 7/10, Average Loss: 1.1129617194334667
Epoch 8/10, Average Loss: 1.10279958988681
Epoch 9/10, Average Loss: 1.0953863967548718
Epoch 10/10, Average Loss: 1.0903006846254522
Epoch 1/10, Average Loss: 1.7211476299497817
Epoch 2/10, Average Loss: 1.286142404633339
Epoch 3/10, Av

In [21]:
grid_results_wine

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average Training Time,dataset
0,[5],tanh,0.01,64,10,0.544861,3.524143,wine_quality
1,[5],tanh,0.001,64,10,0.463002,3.439461,wine_quality
2,[5],tanh,0.0001,64,10,0.213747,3.510652,wine_quality
3,[5],relu,0.01,64,10,0.539978,3.760917,wine_quality
4,[5],relu,0.001,64,10,0.494768,4.17119,wine_quality
5,[5],relu,0.0001,64,10,0.207216,7.93257,wine_quality
6,[5],sigmoid,0.01,64,10,0.516708,8.525369,wine_quality
7,[5],sigmoid,0.001,64,10,0.454062,6.224549,wine_quality
8,[5],sigmoid,0.0001,64,10,0.150149,4.097279,wine_quality
9,[10],tanh,0.01,64,10,0.582018,5.518821,wine_quality


### Congressional Voting

In [22]:
hidden_layer_sizes_list = [[5],[10],[25, 30], [20, 25, 30]]
activation_functions = [F.tanh, F.relu, F.sigmoid]
learning_rates = [0.01, 0.001, 0.0001]
batch_sizes = [64]
num_epochs_list = [10]

dataset = cong_voting
smote_in = False
train_X, train_Y, test_X, test_Y = train_test_split(dataset, "class", return_torch=True,DoSmote=smote_in)

train_data = TensorDataset(train_X, train_Y)
train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

test_data = TensorDataset(test_X, test_Y)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

input_size = train_X.shape[1]

NumbOfClasses = 2

grid_results_voting, best_accuracy, best_combination = grid_search_cv(
    hidden_layer_sizes_list, activation_functions, learning_rates, batch_sizes, num_epochs_list, train_loader, NumbOfClasses,
    k_folds=5, use_scaling=True)

grid_results_voting['dataset'] = 'cong_voting'

Epoch 1/10, Average Loss: 1.074934760729472
Epoch 2/10, Average Loss: 0.9035237630208334
Epoch 3/10, Average Loss: 0.7592998743057251
Epoch 4/10, Average Loss: 0.5777890086174011
Epoch 5/10, Average Loss: 0.5432573358217875
Epoch 6/10, Average Loss: 0.40527822573979694
Epoch 7/10, Average Loss: 0.3490559160709381
Epoch 8/10, Average Loss: 0.3456357717514038
Epoch 9/10, Average Loss: 0.3522815903027852
Epoch 10/10, Average Loss: 0.2685555915037791
Epoch 1/10, Average Loss: 1.021221439043681
Epoch 2/10, Average Loss: 0.8432358503341675
Epoch 3/10, Average Loss: 0.6551554203033447
Epoch 4/10, Average Loss: 0.6221448183059692
Epoch 5/10, Average Loss: 0.47259073456128436
Epoch 6/10, Average Loss: 0.4194834232330322
Epoch 7/10, Average Loss: 0.3748304347197215
Epoch 8/10, Average Loss: 0.300258606672287
Epoch 9/10, Average Loss: 0.3336060742537181
Epoch 10/10, Average Loss: 0.26849499841531116
Epoch 1/10, Average Loss: 1.0098368724187214
Epoch 2/10, Average Loss: 0.8441717823346456
Epoch 3/

In [23]:
grid_results_voting

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average Training Time,dataset
0,[5],tanh,0.01,64,10,0.896471,0.141268,cong_voting
1,[5],tanh,0.001,64,10,0.287731,0.122248,cong_voting
2,[5],tanh,0.0001,64,10,0.212773,0.124714,cong_voting
3,[5],relu,0.01,64,10,0.890756,0.12625,cong_voting
4,[5],relu,0.001,64,10,0.304706,0.119831,cong_voting
5,[5],relu,0.0001,64,10,0.235798,0.114162,cong_voting
6,[5],sigmoid,0.01,64,10,0.913782,0.058026,cong_voting
7,[5],sigmoid,0.001,64,10,0.264706,0.057321,cong_voting
8,[5],sigmoid,0.0001,64,10,0.166891,0.05444,cong_voting
9,[10],tanh,0.01,64,10,0.936639,0.057579,cong_voting


#### Merging

In [24]:
full_results_df = pd.concat([grid_results_voting,grid_results_wine,grid_results_bank], ignore_index=True)
full_results_df

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average Training Time,dataset
0,[5],tanh,0.0100,64,10,0.896471,0.141268,cong_voting
1,[5],tanh,0.0010,64,10,0.287731,0.122248,cong_voting
2,[5],tanh,0.0001,64,10,0.212773,0.124714,cong_voting
3,[5],relu,0.0100,64,10,0.890756,0.126250,cong_voting
4,[5],relu,0.0010,64,10,0.304706,0.119831,cong_voting
...,...,...,...,...,...,...,...,...
103,"[20, 25, 30]",relu,0.0010,64,10,0.827725,17.058871,bank_marketing
104,"[20, 25, 30]",relu,0.0001,64,10,0.747481,17.138449,bank_marketing
105,"[20, 25, 30]",sigmoid,0.0100,64,10,0.832132,17.476922,bank_marketing
106,"[20, 25, 30]",sigmoid,0.0010,64,10,0.748727,18.439620,bank_marketing


In [28]:
full_results_df.to_csv('./results/cv_grid_search_results.csv', index=False)

### Top Perfroming Configurations

In [26]:
top_models_rows = []

for dataset in full_results_df['dataset'].unique():
    top_models_rows.extend(full_results_df[full_results_df['dataset'] == dataset].nlargest(2, 'Average Accuracy').iterrows())

top_models_rows_data = [row[1] for row in top_models_rows]

top_models_df = pd.DataFrame(top_models_rows_data).reset_index(drop=True)

top_models_df

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average Training Time,dataset
0,"[25, 30]",tanh,0.01,64,10,0.965546,0.066926,cong_voting
1,"[25, 30]",relu,0.01,64,10,0.965378,0.069432,cong_voting
2,"[20, 25, 30]",tanh,0.01,64,10,0.737303,7.681701,wine_quality
3,"[25, 30]",tanh,0.01,64,10,0.726525,6.269039,wine_quality
4,"[25, 30]",relu,0.01,64,10,0.852202,14.773023,bank_marketing
5,"[20, 25, 30]",relu,0.01,64,10,0.841663,17.163013,bank_marketing
