In [13]:
import torch
import torch.nn.functional as F  
from torch import optim 
from torch import nn
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
import torch.nn.init as init
import random
import time
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from nn_implementation import *

## Function for Local Search

In [14]:
def local_search_cv(num_iterations, initial_configuration, param_ranges, train_loader, NumbOfClasses, k_folds=5, use_scaling=True):
    best_f1 = 0.0
    best_combination = initial_configuration
    current_configuration = initial_configuration
    train_times = []
    results = []
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    if use_scaling:
        scaler = StandardScaler()
        scaler.fit(train_loader.dataset.tensors[0].numpy())
        
    for _ in range(num_iterations):
        # Small changes to the current configuration
        new_configuration = {
            'Hidden Layer Sizes': [
                max(1, size + random.randint(-1, 1)) for size in current_configuration['Hidden Layer Sizes']
            ],
            'Activation Function': random.choice(param_ranges['activation_functions']),
            'Learning Rate': max(param_ranges['min_lr'], min(param_ranges['max_lr'], current_configuration['Learning Rate'] + random.uniform(-0.01, 0.01))),
            'Batch Size': random.choice(param_ranges['batch_sizes']),
            'Number of Epochs': max(1, current_configuration['Number of Epochs'] + random.randint(-1, 1))
        }
        fold_accuracies = []
        fold_f1s = []
        training_times = []

        for train_index, test_index in kf.split(train_loader.dataset):
            
            X_train, X_test = train_loader.dataset.tensors[0][train_index], train_loader.dataset.tensors[0][test_index]
            y_train, y_test = train_loader.dataset.tensors[1][train_index], train_loader.dataset.tensors[1][test_index]

            model = NN(input_size=train_loader.dataset.tensors[0].shape[1],
                       num_classes=NumbOfClasses,
                       hidden_layer_sizes=new_configuration['Hidden Layer Sizes'],
                       activation_function=new_configuration['Activation Function'])
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=new_configuration['Learning Rate'])

            if use_scaling:
                X_train_scaled = torch.tensor(scaler.transform(X_train.numpy()))
                X_test_scaled = torch.tensor(scaler.transform(X_test.numpy()))
            else:
                X_train_scaled, X_test_scaled = X_train, X_test
                 
            train_start_time = time.time()        
            train_model(model, DataLoader(TensorDataset(X_train_scaled, y_train), batch_size=new_configuration['Batch Size'], shuffle=True), optimizer, criterion, new_configuration['Number of Epochs'])
            train_end_time = time.time()
            train_time = train_end_time - train_start_time
          
            accuracy_test, f1_test = check_accuracy(DataLoader(TensorDataset(X_test_scaled, y_test), batch_size=new_configuration['Batch Size'], shuffle=False), model)
            fold_accuracies.append(accuracy_test)
            training_times.append(train_time)
            fold_f1s.append(f1_test)

        avg_accuracy = np.mean(fold_accuracies)
        avg_f1 = np.mean(fold_f1s)
        avg_train_time = np.mean(training_times)

        result_entry = {
                'Hidden Layer Sizes': new_configuration['Hidden Layer Sizes'],
                'Activation Function': new_configuration['Activation Function'].__name__,
                'Learning Rate': new_configuration['Learning Rate'],
                'Batch Size': new_configuration['Batch Size'],
                'Number of Epochs': new_configuration['Number of Epochs'],
                'Average Accuracy': avg_accuracy,
                'Average F1': avg_f1,
                'Average Training Time': avg_train_time
        }

        results.append(result_entry)

        if f1_test > best_f1:
            best_f1 = f1_test
            best_combination = new_configuration
            current_configuration = new_configuration  # Update the current configuration

    results_df = pd.DataFrame(results)
    return best_combination, best_f1, results_df

## Loading / preparing data

### Wine Quality

In [15]:
wine_quality = pd.read_csv('./preprocessed-datasets/wine_quality_prepro.csv', index_col=0)
wine_quality.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,class,wine_type
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,1
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,1


### Congressional Voting

In [16]:
cong_voting = pd.read_csv('./preprocessed-datasets/CongressionVoting_prepro.csv')
# encode class value democrat as 1 and republican as 0
cong_voting['class'] = cong_voting['class'].map({'democrat': 1, 'republican': 0})
cong_voting.head()

Unnamed: 0,ID,handicapped-infants,water-project-cost-sharing,adoption-of-the-budget-resolution,physician-fee-freeze,el-salvador-aid,religious-groups-in-schools,anti-satellite-test-ban,aid-to-nicaraguan-contras,mx-missile,immigration,synfuels-crporation-cutback,education-spending,superfund-right-to-sue,crime,duty-free-exports,export-administration-act-south-africa,class
0,140,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1
1,383,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1
2,201,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1
3,297,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0
4,309,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0


### Bank Marketing

In [17]:
bank_marketing = pd.read_csv('./preprocessed-datasets/bank_marketing_prepro.csv')
column_to_move = 'class'

# Move class to the last index
columns = [col for col in bank_marketing.columns if col != column_to_move] + [column_to_move]
bank_marketing = bank_marketing[columns]

bank_marketing.drop('Unnamed: 0', axis=1,inplace=True)
bank_marketing.head()

Unnamed: 0,age,default,housing,loan,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,...,education_basic.9y,education_high.school,education_illiterate,education_professional.course,education_university.degree,education_unknown,poutcome_failure,poutcome_nonexistent,poutcome_success,class
0,56,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,0,0,0,0,0,0,1,0,0
1,57,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0
2,37,0.0,1.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0
3,40,0.0,0.0,0.0,1,999,0,1.1,93.994,-36.4,...,0,0,0,0,0,0,0,1,0,0
4,56,0.0,0.0,1.0,1,999,0,1.1,93.994,-36.4,...,0,1,0,0,0,0,0,1,0,0


### Testing Local search

In [18]:
dataset = cong_voting

#train_X, train_Y, test_X, test_Y = train_test_split(dataset, "class", return_torch=True, DoSmote=smote_in)

#train_data = TensorDataset(train_X, train_Y)
#train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

#test_data = TensorDataset(test_X, test_Y)
#test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

X = dataset.drop(["class"], axis=1).values
Y = dataset["class"].values
data = TensorDataset(torch.tensor(X), torch.tensor(Y))
data_loader = DataLoader(data, batch_size=32, shuffle=False)

input_size = X.shape[1]


NumbOfClasses = 2

initial_configuration = {
    'Hidden Layer Sizes': [25, 30],
    'Activation Function': F.relu,
    'Learning Rate': 0.001,
    'Batch Size': 64,
    'Number of Epochs': 10
}

param_ranges = {
    'min_hidden': 5,
    'max_hidden': 50,
    'min_layers': 1,
    'max_layers': 3,
    'activation_functions': [F.relu, F.tanh, F.sigmoid],
    'min_lr': 0.001,
    'max_lr': 0.1,
    'batch_sizes': [32, 64, 128],
    'num_epochs': [5, 10, 15]
}

num_iterations = 50

best_combination, best_f1, results_df_local = local_search_cv(
    num_iterations, initial_configuration, param_ranges, data_loader, NumbOfClasses, k_folds=5, use_scaling=True
)



Epoch 1/9, Average Loss: 1.1439792116483052
Epoch 2/9, Average Loss: 1.0000627239545186
Epoch 3/9, Average Loss: 0.8494235475858053
Epoch 4/9, Average Loss: 0.7388467192649841
Epoch 5/9, Average Loss: 0.6501200993855795
Epoch 6/9, Average Loss: 0.568446695804596
Epoch 7/9, Average Loss: 0.5038002530733744
Epoch 8/9, Average Loss: 0.4595282773176829
Epoch 9/9, Average Loss: 0.4187895854314168
Epoch 1/9, Average Loss: 1.0623035033543904
Epoch 2/9, Average Loss: 0.9240962068239847
Epoch 3/9, Average Loss: 0.7888694405555725
Epoch 4/9, Average Loss: 0.664382259051005
Epoch 5/9, Average Loss: 0.5817477504412333
Epoch 6/9, Average Loss: 0.5188534160455068
Epoch 7/9, Average Loss: 0.4531663755575816
Epoch 8/9, Average Loss: 0.395596315463384
Epoch 9/9, Average Loss: 0.35818377137184143
Epoch 1/9, Average Loss: 1.0996410846710205
Epoch 2/9, Average Loss: 0.9563032388687134
Epoch 3/9, Average Loss: 0.8162670135498047
Epoch 4/9, Average Loss: 0.7206334074338278
Epoch 5/9, Average Loss: 0.6378455

KeyboardInterrupt: 

In [26]:
print("Best Combination:", best_combination)
print("Best F1:", best_f1)

Best Combination: {'Hidden Layer Sizes': [22, 28], 'Activation Function': <function tanh at 0x000001CA8817AD40>, 'Learning Rate': 0.011267827629393996, 'Batch Size': 128, 'Number of Epochs': 10}
Best Accuracy: tensor(1.)


In [27]:
results_df_local

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average Training Time
0,"[24, 29]",sigmoid,0.001,64,11,0.558824,0.065888
1,"[23, 29]",sigmoid,0.001,32,10,0.558824,0.070072
2,"[23, 29]",relu,0.001,128,10,0.794118,0.042366
3,"[23, 28]",sigmoid,0.006041,128,11,0.941176,0.046875
4,"[22, 28]",tanh,0.011268,128,10,1.0,0.039804
5,"[23, 27]",relu,0.01409,64,9,0.911765,0.049102
6,"[23, 29]",tanh,0.009778,128,10,0.941176,0.040652
7,"[22, 28]",relu,0.007984,64,11,0.970588,0.062218
8,"[21, 29]",sigmoid,0.01432,128,9,0.852941,0.040927
9,"[22, 27]",sigmoid,0.004652,32,11,0.882353,0.08206


## Local search over all datasets

In [19]:
datasets = {'wine_quality': wine_quality, 'cong_voting': cong_voting, 'bank_marketing': bank_marketing}

initial_configuration = {
    'Hidden Layer Sizes': [25, 30],
    'Activation Function': F.relu,
    'Learning Rate': 0.01,
    'Batch Size': 32,
    'Number of Epochs': 10
}

param_ranges = {
    'min_hidden': 5,
    'max_hidden': 50,
    'min_layers': 1,
    'max_layers': 3,
    'activation_functions': [F.relu, F.tanh, F.sigmoid],
    'min_lr': 0.001,
    'max_lr': 0.1,
    'batch_sizes': [32, 64, 128],
    'num_epochs': [5, 10, 15]
}

num_iterations = 50


all_local_results = []

for dataset_name, dataset in datasets.items():
    if dataset_name == 'wine_quality' or dataset_name == 'bank_marketing':
        smote_in = True
    else:
        smote_in = False
    
    #train_X, train_Y, test_X, test_Y = train_test_split(dataset, "class", return_torch=True, DoSmote=smote_in)

    #train_data = TensorDataset(train_X, train_Y)
    #train_loader = DataLoader(train_data, batch_size=32, shuffle=False)

    #test_data = TensorDataset(test_X, test_Y)
    #test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

    X = dataset.drop(["class"], axis=1).values
    Y = dataset["class"].values
    data = TensorDataset(torch.tensor(X), torch.tensor(Y))
    data_loader = DataLoader(data, batch_size=32, shuffle=False)

    input_size = X.shape[1]


    if dataset_name == 'wine_quality':
        NumbOfClasses = 10 # theoretical number of 'targets' is 10 but in praxis only 7 are present
    else:
        NumbOfClasses = len(np.unique(Y))

    best_combination, best_f1, results_df_local = local_search_cv(
    num_iterations, initial_configuration, param_ranges, data_loader, NumbOfClasses, k_folds=5, use_scaling=True)
    
    results_df_local['dataset'] = dataset_name
    all_local_results.append(results_df_local)

Epoch 1/9, Average Loss: 1.1833768867276198
Epoch 2/9, Average Loss: 1.058368110217931
Epoch 3/9, Average Loss: 1.0466419326747123
Epoch 4/9, Average Loss: 1.0251545182035013
Epoch 5/9, Average Loss: 1.0178479098834874
Epoch 6/9, Average Loss: 1.0025088315361117
Epoch 7/9, Average Loss: 1.0040231976041034
Epoch 8/9, Average Loss: 0.9888390576912581
Epoch 9/9, Average Loss: 0.9852977555953651
Epoch 1/9, Average Loss: 1.1735128094813576
Epoch 2/9, Average Loss: 1.052497517843188
Epoch 3/9, Average Loss: 1.0364684700234537
Epoch 4/9, Average Loss: 1.0235919052837816


KeyboardInterrupt: 

In [29]:
all_local_results_df = pd.concat(all_local_results, ignore_index=True)
all_local_results_df = pd.DataFrame(all_local_results_df)


In [30]:
all_local_results_df

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average Training Time,dataset
0,"[25, 31]",sigmoid,0.007763,32,10,0.680089,6.519650,wine_quality
1,"[26, 30]",relu,0.006508,32,10,0.722575,7.076098,wine_quality
2,"[26, 29]",sigmoid,0.010781,128,10,0.652505,2.985102,wine_quality
3,"[27, 29]",tanh,0.006458,32,9,0.720355,6.217975,wine_quality
4,"[25, 30]",relu,0.001000,32,9,0.654724,6.108246,wine_quality
...,...,...,...,...,...,...,...,...
145,"[25, 29]",tanh,0.021298,128,8,0.834742,7.187644,bank_marketing
146,"[25, 28]",tanh,0.024133,64,7,0.803143,9.593440,bank_marketing
147,"[27, 29]",tanh,0.022888,128,6,0.819882,5.363683,bank_marketing
148,"[27, 27]",sigmoid,0.026978,128,8,0.825177,7.696947,bank_marketing


In [33]:
all_local_results_df.to_csv('./results/cv_local_search_results.csv', index=False)

In [32]:
top_models_rows = []

for dataset in all_local_results_df['dataset'].unique():
    top_models_rows.extend(all_local_results_df[all_local_results_df['dataset'] == dataset].nlargest(2, 'Average Accuracy').iterrows())

top_models_rows_data = [row[1] for row in top_models_rows]

top_models_df = pd.DataFrame(top_models_rows_data).reset_index(drop=True)

top_models_df

Unnamed: 0,Hidden Layer Sizes,Activation Function,Learning Rate,Batch Size,Number of Epochs,Average Accuracy,Average Training Time,dataset
0,"[24, 30]",tanh,0.016325,64,11,0.739379,4.710157,wine_quality
1,"[24, 30]",tanh,0.007228,32,11,0.737159,6.833775,wine_quality
2,"[24, 32]",tanh,0.014946,128,9,0.970588,0.034791,cong_voting
3,"[25, 32]",tanh,0.009191,64,8,0.970588,0.042795,cong_voting
4,"[26, 28]",relu,0.020118,128,7,0.850286,6.241501,bank_marketing
5,"[26, 28]",relu,0.0162,64,8,0.847041,10.612522,bank_marketing
