## Data preparement

In [1]:
import pandas as pd
import time
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

"""Read dataframe"""
df1 = pd.read_csv("./ELECTROENCEPHALOGRAM_OF_SENSITIVITY_BODIES/specified_finger.csv",
                  delimiter=";", decimal=".", header=0)
df2 = pd.read_csv("./ELECTROENCEPHALOGRAM_OF_SENSITIVITY_BODIES/thumb_finger.csv",
                  delimiter=";", decimal=".", header=0)

"""Preprocessing data"""
#Addition of target variable
df1["target"] = "specified_finger"
df2["target"] = "thumb_finger"
#Concatenation dataframes

specified_finger = df1.sample(frac=0.2)
thumb_finger = df2.sample(frac=0.2)

print(specified_finger.shape)
print(thumb_finger.shape)

frames = [specified_finger, thumb_finger]

df = pd.concat(frames)

print(df.shape)

#Removing duplicate rows
df.drop_duplicates(subset=None, keep='first', inplace=True)
#Reset index of dataframe
df.reset_index(drop=True, inplace=True)
#Return column names of dataframe
column_names = list(df.columns)
#Checking data balance
target_var = df[column_names[-1]]
balance = Counter(target_var)
#Initialization of encoder
labelencoder = LabelEncoder()
#Encoding target variable
df["target"] = labelencoder.fit_transform(df["target"])

(17990, 17)
(18007, 17)
(35997, 17)


## Splitting to test & train

In [2]:
"""Splitting independent 'x' and dependent 'y'  variables of dataframe 'df'"""
x = df[column_names[0:len(column_names)-1]]
#Standardization or mean removal and variance scaling
x = (x-x.mean())/x.std()
#Converting to numpy array
x = x.values
#Create target variable
y = df[column_names[len(column_names)-1]].values

"""Splitting on training sample and test sample"""
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# MLP model

## MLP training

In [3]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(activation='logistic', batch_size=25, max_iter=200)

start_time = time.time()

mlp.fit(x_train, y_train)

end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

y_pred = mlp.predict(x_test)
y_pred_prob = mlp.predict_proba(x_test)

acc_score = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

acc_score, f1, roc_auc

Execution time: 68.66433954238892 seconds




(0.7770833333333333, 0.7732089868588385, 0.7770317761088088)

# KAN model

## Data preparing

In [8]:
import numpy as np
num_classes = 2

one_hot_encoded = np.eye(num_classes)[y]

"""Splitting on training sample and test sample"""
x_train, x_test, y_train, y_test = train_test_split(x, one_hot_encoded, test_size=0.2)

y_train[:5]

array([[0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.]])

In [9]:
import torch

dtype = torch.get_default_dtype()
device = "cpu"
    
x_train = torch.from_numpy(x_train).type(dtype).to(device)
y_train = torch.from_numpy(y_train).type(dtype).to(device)

x_test = torch.from_numpy(x_test).type(dtype).to(device)
y_test = torch.from_numpy(y_test).type(dtype).to(device)

custom_dataset = {
    'train_input': x_train,
    'test_input': x_test,
    'train_label': y_train,
    'test_label': y_test
}

## KAN training

In [10]:
from kan import KAN

model = KAN(width=[16, 5, 3, 2], grid=5, k=3)

def train_acc():
    return torch.mean((torch.argmax(model(custom_dataset['train_input'])[:,0]) == custom_dataset['train_label'][:,0]).type(dtype))

def test_acc():
    return torch.mean((torch.argmax(model(custom_dataset['test_input'])[:,0]) == custom_dataset['test_label'][:,0]).type(dtype))

start_time = time.time()

results = model.fit(custom_dataset, opt="LBFGS", steps=100,
                    batch=x_test.shape[0], loss_fn=torch.nn.CrossEntropyLoss(), update_grid = False)
end_time = time.time()

checkpoint directory created: ./model
saving model version 0.0


| train_loss: 6.56e-01 | test_loss: 6.87e-01 | reg: 4.92e+01 | : 100%|█| 100/100 [03:04<00:00,  1.85

saving model version 0.1





In [11]:
execution_time = end_time - start_time
execution_time

184.56576991081238

In [12]:
logits = model(x_test)
logits[:10]
print(logits.shape)
y_pred = torch.argmax(logits, dim=1)

y_pred[:10]

torch.Size([7200, 2])


tensor([0, 1, 0, 1, 1, 1, 0, 1, 0, 1])

## KAN accuracy testing

In [13]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
import numpy as np

y_test_numpy = torch.argmax(y_test, dim=1).detach().numpy()
y_pred_numpy = y_pred.detach().numpy()
#y_pred_numpy = np.where((y_pred_numpy < 0.5) | (y_pred_numpy < 0), 0, 1)

print(np.unique(y_pred_numpy))

#Compute the 'accuracy-score'
acc_score = accuracy_score(y_test_numpy, y_pred_numpy)
#Compute the 'f1-score'
f1 = f1_score(y_test_numpy, y_pred_numpy)
#Compute the 'roc-auc'
roc_auc = roc_auc_score(y_test_numpy, y_pred_numpy)

acc_score, f1, roc_auc

[0 1]


(0.7438888888888889, 0.7384397163120567, 0.7440116558469517)