In [1]:
import matplotlib.pyplot as plt
import torch
import numpy as np
import pandas as pd


from sklearn.model_selection import train_test_split

import pickle
from sklearn.metrics import f1_score

### Preparing data

In [2]:
df = pd.read_csv(r'C:\Users\Anastasia\Documents\GitHub2\ophthalmic_drugs\Analysis\melanin\data_preprossesing\MACCS_melanin_classes.csv')


In [3]:
X = np.array(df.iloc[:, 0:166])
y = np.array(df['Class'])

In [4]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=10)

### Train KAN

In [5]:
from kan.KAN import KAN

In [6]:
dataset = {}
dataset['train_input'] = torch.tensor(Xtrain, dtype=torch.int64)
dataset['train_label'] = torch.tensor(ytrain.reshape(-1, 1), dtype=torch.int64)
dataset['test_input'] = torch.tensor(Xtest, dtype=torch.int64)
dataset['test_label'] = torch.tensor(ytest.reshape(-1, 1), dtype=torch.int64)

X = dataset['train_input']
y = dataset['train_label']


In [7]:
dataset['train_input'].shape

torch.Size([624, 166])

In [8]:
dataset['train_label'].shape

torch.Size([624, 1])

In [34]:
model = KAN(width=[166,1,1], grid=10, seed=42)

In [35]:
def train_acc():
    return torch.mean((torch.round(model(dataset['train_input'])[:,0]) == dataset['train_label'][:,0]).float())

def test_acc():
    return torch.mean((torch.round(model(dataset['test_input'])[:,0]) == dataset['test_label'][:,0]).float())

results = model.train(dataset, opt="LBFGS", steps=20, metrics=(train_acc, test_acc), lamb = 0.1, lamb_entropy = 0.0)
results['train_acc'][-1], results['test_acc'][-1]

train loss: 3.91e-01 | test loss: 4.24e-01 | reg: 2.61e+00 : 100%|██| 20/20 [00:36<00:00,  1.81s/it]


(0.7836538553237915, 0.7564102411270142)

In [19]:
import itertools

# Define the ranges for each parameter
lamb_values = [0.01, 0.1, 0.0] 
lamb_entropy_values = [0.0, 10.0, 100.0]  
grid_values = [1, 5, 10, 20]  
seed_values = [1, 42]  

best_accuracy = 0
best_params = {}

# Perform grid search to find the best combination of parameters
for lamb_val, lamb_entropy_val, grid_val, seed_val in itertools.product(lamb_values, lamb_entropy_values, grid_values, seed_values):
    model = KAN(width=[166,1,1], grid=grid_val, k=3, seed=seed_val)
    results = model.train(dataset, opt="LBFGS", steps=10, metrics=(train_acc, test_acc), lamb=lamb_val, lamb_entropy=lamb_entropy_val)
    
    train_accuracy = results['train_acc'][-1]
    test_accuracy = results['test_acc'][-1]
    
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_params = {'lamb': lamb_val, 'lamb_entropy': lamb_entropy_val, 'grid': grid_val, 'seed': seed_val}

print("Best parameters found:")
print(best_params)
print("Best test accuracy:", best_accuracy)


train loss: 4.02e-01 | test loss: 4.28e-01 | reg: 6.59e+00 : 100%|██| 10/10 [00:12<00:00,  1.27s/it]
train loss: 3.90e-01 | test loss: 4.17e-01 | reg: 6.33e+00 : 100%|██| 10/10 [00:11<00:00,  1.18s/it]
train loss: 3.46e-01 | test loss: 4.15e-01 | reg: 4.50e+00 : 100%|██| 10/10 [00:18<00:00,  1.86s/it]
train loss: 3.37e-01 | test loss: 4.13e-01 | reg: 4.08e+00 : 100%|██| 10/10 [00:18<00:00,  1.86s/it]
train loss: 3.11e-01 | test loss: 4.20e-01 | reg: 3.45e+00 : 100%|██| 10/10 [00:23<00:00,  2.37s/it]
train loss: 4.04e-01 | test loss: 4.38e-01 | reg: 6.54e+00 : 100%|██| 10/10 [00:16<00:00,  1.62s/it]
train loss: nan | test loss: nan | reg: nan : 100%|█████████████████| 10/10 [00:26<00:00,  2.63s/it]
train loss: 4.07e-01 | test loss: 4.28e-01 | reg: 7.97e+00 : 100%|██| 10/10 [00:19<00:00,  1.94s/it]
train loss: 4.32e-01 | test loss: 4.64e-01 | reg: 4.86e+01 : 100%|██| 10/10 [00:17<00:00,  1.76s/it]
train loss: 5.53e-01 | test loss: 6.22e-01 | reg: 5.18e+01 : 100%|██| 10/10 [00:17<00:00,  

Best parameters found:
{'lamb': 0.1, 'lamb_entropy': 0.0, 'grid': 10, 'seed': 42}
Best test accuracy: 0.7692307829856873



