In [1]:
import torch
import torch.nn.functional as F  
from torch import optim 
from torch import nn
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
import torch.nn.init as init
import random
import time
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from nn_implementation import *

## Function for Local Search

In [2]:
def local_search_cv(num_iterations, initial_configuration, param_ranges, train_loader, NumbOfClasses, k_folds=5, use_scaling=True):
    best_f1 = 0.0
    best_combination = initial_configuration
    current_configuration = initial_configuration
    train_times = []
    results = []
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    if use_scaling:
        scaler = StandardScaler()
        scaler.fit(train_loader.dataset.tensors[0].numpy())
        
    for _ in range(num_iterations):
        # Small changes to the current configuration
        new_configuration = {
            'Hidden Layer Sizes': [
                max(1, size + random.randint(-1, 1)) for size in current_configuration['Hidden Layer Sizes']
            ],
            'Activation Function': random.choice(param_ranges['activation_functions']),
            'Learning Rate': max(param_ranges['min_lr'], min(param_ranges['max_lr'], current_configuration['Learning Rate'] + random.uniform(-0.01, 0.01))),
            'Batch Size': random.choice(param_ranges['batch_sizes']),
            'Number of Epochs': max(1, current_configuration['Number of Epochs'] + random.randint(-1, 1))
        }
        fold_accuracies = []
        fold_f1s = []
        training_times = []

        for train_index, test_index in kf.split(train_loader.dataset):
            
            X_train, X_test = train_loader.dataset.tensors[0][train_index], train_loader.dataset.tensors[0][test_index]
            y_train, y_test = train_loader.dataset.tensors[1][train_index], train_loader.dataset.tensors[1][test_index]

            model = NN(input_size=train_loader.dataset.tensors[0].shape[1],
                       num_classes=NumbOfClasses,
                       hidden_layer_sizes=new_configuration['Hidden Layer Sizes'],
                       activation_function=new_configuration['Activation Function'])
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=new_configuration['Learning Rate'])

            if use_scaling:
                X_train_scaled = torch.tensor(scaler.transform(X_train.numpy()))
                X_test_scaled = torch.tensor(scaler.transform(X_test.numpy()))
            else:
                X_train_scaled, X_test_scaled = X_train, X_test
                 
            train_start_time = time.time()        
            train_model(model, DataLoader(TensorDataset(X_train_scaled, y_train), batch_size=new_configuration['Batch Size'], shuffle=True), optimizer, criterion, new_configuration['Number of Epochs'])
            train_end_time = time.time()
            train_time = train_end_time - train_start_time
          
            accuracy_test, f1_test = check_accuracy(DataLoader(TensorDataset(X_test_scaled, y_test), batch_size=new_configuration['Batch Size'], shuffle=False), model)
            fold_accuracies.append(accuracy_test)
            training_times.append(train_time)
            fold_f1s.append(f1_test)

        avg_accuracy = np.mean(fold_accuracies)
        avg_f1 = np.mean(fold_f1s)
        avg_train_time = np.mean(training_times)

        result_entry = {
                'Hidden Layer Sizes': new_configuration['Hidden Layer Sizes'],
                'Activation Function': new_configuration['Activation Function'].__name__,
                'Learning Rate': new_configuration['Learning Rate'],
                'Batch Size': new_configuration['Batch Size'],
                'Number of Epochs': new_configuration['Number of Epochs'],
                'Average Accuracy': avg_accuracy,
                'Average F1': avg_f1,
                'Average Training Time': avg_train_time
        }

        results.append(result_entry)

        if f1_test > best_f1:
            best_f1 = f1_test
            best_combination = new_configuration
            current_configuration = new_configuration  # Update the current configuration

    results_df = pd.DataFrame(results)
    return best_combination, best_f1, results_df

## Loading / preparing data

### Wine Quality

In [3]:
wine_quality = pd.read_csv('./preprocessed-datasets/wine_quality_prepro.csv', index_col=0)
wine_quality.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,class,wine_type
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1


### Congressional Voting

In [4]:
cong_voting = pd.read_csv('./preprocessed-datasets/CongressionVoting_prepro.csv')
# encode class value democrat as 1 and republican as 0
cong_voting['class'] = cong_voting['class'].map({'democrat': 1, 'republican': 0})
cong_voting.head()

Unnamed: 0,ID,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,class
0,140,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1
1,383,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1
2,201,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1
3,297,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0
4,309,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0


### Bank Marketing

In [5]:
bank_marketing = pd.read_csv('./preprocessed-datasets/bank_marketing_prepro.csv')
column_to_move = 'class'

# Move class to the last index
columns = [col for col in bank_marketing.columns if col != column_to_move] + [column_to_move]
bank_marketing = bank_marketing[columns]

bank_marketing.drop('Unnamed: 0', axis=1,inplace=True)
bank_marketing.head()

Unnamed: 0,age,default,housing,loan,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,...,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,poutcome_failure,poutcome_nonexistent,poutcome_success,class
0,56,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,0,0,0,0,0,0,1,0,0
1,57,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0
2,37,0.0,1.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0
3,40,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,0,0,0,0,0,0,1,0,0
4,56,0.0,0.0,1.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0


### Testing Local search

In [6]:
dataset = cong_voting

#train_X, train_Y, test_X, test_Y = train_test_split(dataset, "class", return_torch=True, DoSmote=smote_in)

#train_data = TensorDataset(train_X, train_Y)
#train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

#test_data = TensorDataset(test_X, test_Y)
#test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

X = dataset.drop(["class"], axis=1).values
Y = dataset["class"].values
data = TensorDataset(torch.tensor(X), torch.tensor(Y))
data_loader = DataLoader(data, batch_size=32, shuffle=False)

input_size = X.shape[1]


NumbOfClasses = 2

initial_configuration = {
    'Hidden Layer Sizes': [25, 30],
    'Activation Function': F.relu,
    'Learning Rate': 0.001,
    'Batch Size': 64,
    'Number of Epochs': 10
}

param_ranges = {
    'min_hidden': 5,
    'max_hidden': 50,
    'min_layers': 1,
    'max_layers': 3,
    'activation_functions': [F.relu, F.tanh, F.sigmoid],
    'min_lr': 0.001,
    'max_lr': 0.1,
    'batch_sizes': [32, 64, 128],
    'num_epochs': [5, 10, 15]
}

num_iterations = 50

best_combination, best_f1, results_df_local = local_search_cv(
    num_iterations, initial_configuration, param_ranges, data_loader, NumbOfClasses, k_folds=5, use_scaling=True
)



Epoch 1/10, Average Loss: 0.2541090324521065
Epoch 2/10, Average Loss: 0.22072048485279083
Epoch 3/10, Average Loss: 0.21271023899316788
Epoch 4/10, Average Loss: 0.20390965789556503
Epoch 5/10, Average Loss: 0.19372616708278656
Epoch 6/10, Average Loss: 0.18042687326669693
Epoch 7/10, Average Loss: 0.19293379038572311
Epoch 8/10, Average Loss: 0.1994716003537178
Epoch 9/10, Average Loss: 0.15065941214561462
Epoch 10/10, Average Loss: 0.16667014360427856
Epoch 1/10, Average Loss: 0.22540652006864548
Epoch 2/10, Average Loss: 0.20452119410037994
Epoch 3/10, Average Loss: 0.1765819489955902
Epoch 4/10, Average Loss: 0.2014598771929741
Epoch 5/10, Average Loss: 0.18791567534208298
Epoch 6/10, Average Loss: 0.15833815932273865
Epoch 7/10, Average Loss: 0.1542723923921585
Epoch 8/10, Average Loss: 0.14193421602249146
Epoch 9/10, Average Loss: 0.1294546015560627
Epoch 10/10, Average Loss: 0.11521000787615776
Epoch 1/10, Average Loss: 0.24179722368717194
Epoch 2/10, Average Loss: 0.1909792460

In [7]:
print("Best Combination:", best_combination)
print("Best F1:", best_f1)

Best Combination: {'Hidden Layer Sizes': [23, 31], 'Activation Function': <function tanh at 0x10dc3dee0>, 'Learning Rate': 0.0050058918158679975, 'Batch Size': 128, 'Number of Epochs': 10}
Best F1: 1.0


In [8]:
results_df_local

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average F1,Average Training Time
0,"[24, 31]",tanh,0.001,128,10,0.931078,0.931784,0.079168
1,"[23, 32]",relu,0.001,32,10,0.935835,0.936216,0.189263
2,"[22, 32]",tanh,0.001,64,10,0.857294,0.858783,0.146165
3,"[23, 31]",tanh,0.005006,128,10,0.949577,0.949985,0.223138
4,"[24, 30]",relu,0.01147,64,9,0.949471,0.949189,0.140051
5,"[22, 30]",relu,0.013399,128,11,0.94482,0.944867,0.103684
6,"[24, 30]",sigmoid,0.001772,128,10,0.631395,0.489206,0.069295
7,"[23, 32]",tanh,0.002135,64,11,0.935518,0.936207,0.128085
8,"[22, 31]",relu,0.001,32,9,0.917336,0.918225,0.149306
9,"[24, 32]",sigmoid,0.002547,32,11,0.912896,0.913233,0.182445


## Local search over all datasets

In [9]:
datasets = {'wine_quality': wine_quality, 'cong_voting': cong_voting, 'bank_marketing': bank_marketing}

initial_configuration = {
    'Hidden Layer Sizes': [25, 30],
    'Activation Function': F.tanh,
    'Learning Rate': 0.01,
    'Batch Size': 64,
    'Number of Epochs': 10
}

param_ranges = {
    'min_hidden': 5,
    'max_hidden': 50,
    'min_layers': 1,
    'max_layers': 3,
    'activation_functions': [F.relu, F.tanh, F.sigmoid],
    'min_lr': 0.001,
    'max_lr': 0.1,
    'batch_sizes': [32, 64, 128],
    'num_epochs': [5, 10, 15]
}

num_iterations = 50


all_local_results = []

for dataset_name, dataset in datasets.items():
    if dataset_name == 'wine_quality' or dataset_name == 'bank_marketing':
        smote_in = True
    else:
        smote_in = False
    
    #train_X, train_Y, test_X, test_Y = train_test_split(dataset, "class", return_torch=True, DoSmote=smote_in)

    #train_data = TensorDataset(train_X, train_Y)
    #train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

    #test_data = TensorDataset(test_X, test_Y)
    #test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

    X = dataset.drop(["class"], axis=1).values
    Y = dataset["class"].values
    data = TensorDataset(torch.tensor(X), torch.tensor(Y))
    data_loader = DataLoader(data, batch_size=32, shuffle=False)

    input_size = X.shape[1]


    if dataset_name == 'wine_quality':
        NumbOfClasses = 10 # theoretical number of 'targets' is 10 but in praxis only 7 are present
    else:
        NumbOfClasses = len(np.unique(Y))

    best_combination, best_f1, results_df_local = local_search_cv(
    num_iterations, initial_configuration, param_ranges, data_loader, NumbOfClasses, k_folds=5, use_scaling=True)
    
    results_df_local['dataset'] = dataset_name
    all_local_results.append(results_df_local)

Epoch 1/11, Average Loss: 1.2208691406540755
Epoch 2/11, Average Loss: 1.0971437314661538
Epoch 3/11, Average Loss: 1.0791592183636456
Epoch 4/11, Average Loss: 1.059327723049536
Epoch 5/11, Average Loss: 1.0509784250724605
Epoch 6/11, Average Loss: 1.0420888836790876
Epoch 7/11, Average Loss: 1.0366420702236454
Epoch 8/11, Average Loss: 1.0262067768631913
Epoch 9/11, Average Loss: 1.0221656887996486
Epoch 10/11, Average Loss: 1.01390230510293
Epoch 11/11, Average Loss: 1.0066067445568923
Epoch 1/11, Average Loss: 1.216762826937001
Epoch 2/11, Average Loss: 1.0935691246172277
Epoch 3/11, Average Loss: 1.070687241670562
Epoch 4/11, Average Loss: 1.0579424260593042
Epoch 5/11, Average Loss: 1.0506658197903052
Epoch 6/11, Average Loss: 1.0444228343847322
Epoch 7/11, Average Loss: 1.028574056014782
Epoch 8/11, Average Loss: 1.0246578055184061
Epoch 9/11, Average Loss: 1.0219368622070406
Epoch 10/11, Average Loss: 1.0178320785848105
Epoch 11/11, Average Loss: 1.0078002803209352
Epoch 1/11, 

In [10]:
all_local_results_df = pd.concat(all_local_results, ignore_index=True)
all_local_results_df = pd.DataFrame(all_local_results_df)


In [11]:
all_local_results_df

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average F1,Average Training Time,dataset
0,"[24, 30]",sigmoid,0.013762,64,11,0.558105,0.526884,2.165981,wine_quality
1,"[24, 31]",tanh,0.022259,32,12,0.550563,0.526648,5.264510,wine_quality
2,"[23, 31]",tanh,0.023461,128,11,0.546254,0.527366,1.851085,wine_quality
3,"[23, 32]",relu,0.012282,32,13,0.561802,0.544837,4.951335,wine_quality
4,"[22, 33]",relu,0.018211,32,14,0.549642,0.526481,6.234913,wine_quality
...,...,...,...,...,...,...,...,...,...
145,"[27, 33]",sigmoid,0.008207,64,9,0.898296,0.873509,16.827931,bank_marketing
146,"[26, 31]",relu,0.001000,128,10,0.896815,0.873836,10.173205,bank_marketing
147,"[27, 32]",relu,0.012224,128,8,0.897422,0.871447,8.963481,bank_marketing
148,"[27, 33]",tanh,0.005936,64,9,0.896742,0.876085,16.207057,bank_marketing


In [12]:
all_local_results_df.to_csv('./results/cv_local_search_results.csv', index=False)

In [13]:
top_models_rows = []

for dataset in all_local_results_df['dataset'].unique():
    top_models_rows.extend(all_local_results_df[all_local_results_df['dataset'] == dataset].nlargest(2, 'Average Accuracy').iterrows())

top_models_rows_data = [row[1] for row in top_models_rows]

top_models_df = pd.DataFrame(top_models_rows_data).reset_index(drop=True)

top_models_df

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average F1,Average Training Time,dataset
0,"[23, 32]",relu,0.016112,128,13,0.570572,0.545108,1.517483,wine_quality
1,"[23, 32]",tanh,0.007813,64,13,0.567648,0.547346,2.128746,wine_quality
2,"[25, 27]",tanh,0.005852,32,11,0.967759,0.967782,0.123304,cong_voting
3,"[25, 28]",relu,0.016316,32,10,0.963319,0.963276,0.112215,cong_voting
4,"[27, 33]",relu,0.010166,32,10,0.898781,0.872917,16.808335,bank_marketing
5,"[27, 32]",sigmoid,0.015724,64,9,0.898757,0.872923,10.659402,bank_marketing
