In [8]:
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score
import torch
import numpy as np
import pandas as pd
import time

In [9]:
wine_quality = pd.read_csv('./preprocessed-datasets/wine_quality_prepro.csv', index_col=0)
cong_voting = pd.read_csv('./preprocessed-datasets/CongressionVoting_prepro.csv')
# encode class value democrat as 1 and republican as 0
cong_voting['class'] = cong_voting['class'].map({'democrat': 1, 'republican': 0})
bank_marketing = pd.read_csv('./preprocessed-datasets/bank_marketing_prepro.csv')
column_to_move = 'class'

# Move class to the last index
columns = [col for col in bank_marketing.columns if col != column_to_move] + [column_to_move]
bank_marketing = bank_marketing[columns]

bank_marketing.drop('Unnamed: 0', axis=1,inplace=True)

In [10]:
def train_test_split(data: pd.DataFrame, target_label : str, test_size=0.2, return_torch=None):
        
    # split the data into train and test
    #train = data.sample(frac=(1-test_size),random_state=200)
    #test = data.drop(train.index)
    
    # split the train and test into X and Y
    X = data.drop([target_label], axis=1).values
    Y = data[target_label].values
    
    if return_torch:
        train_X = torch.tensor(X)
        train_Y = torch.tensor(Y)
    
    return X, Y

In [11]:
def MLP(X,y, num_splits = 5):
    kf = KFold(n_splits=num_splits, shuffle=True, random_state=42)

    train_f1 = []
    test_f1 = []
    train_times = []

    for train_index, test_index in kf.split(X, y):
        
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        # MLPClassifier
        mlp_model = MLPClassifier(max_iter=2000)
        
        # Train MLPClassifier
        t0 = time.time()
        mlp_model.fit(X_train, y_train)
        t1 = time.time()
        training_time_tmp = t1-t0
        # Make predictions on the test set
        mlp_predictions_train = mlp_model.predict(X_train)
        mlp_predictions_test = mlp_model.predict(X_test)

        # Calculate accuracy
        train_f1_fold = f1_score(y_train, mlp_predictions_train, average= 'weighted')
        test_f1_fold = f1_score(y_test, mlp_predictions_test,average= 'weighted')

        train_f1.append(train_f1_fold)
        test_f1.append(test_f1_fold)
        train_times.append(training_time_tmp)
        
    
    return np.mean(train_f1), np.mean(test_f1), np.mean(train_times)

In [12]:
results = pd.DataFrame(columns=["method",'dataset','Average F1','Average Training Time'])
datasets = {
    'wine_quality': wine_quality,
    'congression_voting': cong_voting,
    'bank_marketing': bank_marketing
}
for dataset_name, dataset in datasets.items():
    X, y = train_test_split(dataset, 'class')    
    _, test_f1, train_time = MLP(X,y)
    results = results.append({'method':'MLP', 'dataset':dataset_name, 'Average F1':test_f1, 'Average Training Time':train_time}, ignore_index=True)

In [13]:
results

Unnamed: 0,method,dataset,Average F1,Average Training Time
0,MLP,wine_quality,0.46777,1.773693
1,MLP,congression_voting,0.949067,0.597549
2,MLP,bank_marketing,0.864294,2.533217


In [14]:
results.to_csv('./results/MLP.csv')