In [2]:
import pandas as pd

# Veriyi yükleme
file_path = './data.xlsx'
data = pd.read_excel(file_path)

# Verinin genel yapısını inceleme
print(data.head())
print(data.info())
print(data.describe())


   age  gender  nobet_tipi  nobet_frequnency  disease_duration  \
0   52       1           1                 0               3.0   
1   19       1           1                 2               8.0   
2   55       1           1                 0               0.4   
3   29       1           1                 2              25.0   
4   71       1           1                 2               6.0   

   marital_status  se_history  nbt_uyku  nbt_uyaniklik  head_trauma  \
0               1           0         1              0            0   
1               0           0         0              1            1   
2               1           1         0              1            0   
3               0           0         0              1            1   
4               1           0         0              1            1   

   interceptive_delivery  febril_convulsion  epilepsy_family_history  \
0                      0                  0                        0   
1                      0        

### Data processing, Vectorization and PCA

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA

# TF-IDF vektörizasyonu
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(data['eeg_sonucu'])

# PCA ile boyut indirgeme
pca = PCA(n_components=1)
eeg_numeric = pca.fit_transform(tfidf_matrix.toarray())

# Yeni nümerik değeri veri setine ekleme
data['eeg_numeric'] = eeg_numeric
data = data.drop(columns=['eeg_sonucu'])


### SMOTE Oversampling

In [4]:
from imblearn.over_sampling import SMOTE

# Input ve output ayrıştırma
X = data.drop(columns=['nobet_tipi'])
y = data['nobet_tipi']

# SMOTE kullanarak veri artırma
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, y)


#### First MLP Model

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.metrics import f1_score, recall_score, accuracy_score

# Veri setini PyTorch tensörlerine dönüştürme
X_tensor = torch.tensor(X_resampled.values, dtype=torch.float32)
y_tensor = torch.tensor(y_resampled.values, dtype=torch.long)

dataset = TensorDataset(X_tensor, y_tensor)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# MLP modeli oluşturma
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

input_dim = X_tensor.shape[1]
hidden_dim = 128
output_dim = len(data['nobet_tipi'].unique())

model = MLP(input_dim, hidden_dim, output_dim)

# Loss ve optimizer belirleme
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Modeli eğitme
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Modeli değerlendirme fonksiyonu
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.numpy())
            all_labels.extend(labels.numpy())
    f1 = f1_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    accuracy = accuracy_score(all_labels, all_preds)
    return f1, recall, accuracy

f1, recall, accuracy = evaluate_model(model, test_dataloader)
print(f'F1 Score: {f1}, Recall: {recall}, Accuracy: {accuracy}')


  from .autonotebook import tqdm as notebook_tqdm


Epoch 1/10, Loss: 0.6164752840995789
Epoch 2/10, Loss: 0.5590840578079224
Epoch 3/10, Loss: 0.5784189105033875
Epoch 4/10, Loss: 0.24356107413768768
Epoch 5/10, Loss: 0.5700127482414246
Epoch 6/10, Loss: 0.2463884949684143
Epoch 7/10, Loss: 0.24983526766300201
Epoch 8/10, Loss: 0.20380906760692596
Epoch 9/10, Loss: 0.1159171536564827
Epoch 10/10, Loss: 0.1682053804397583
F1 Score: 0.8908880017825423, Recall: 0.8908296943231441, Accuracy: 0.8908296943231441


### Second model with Optuna

In [7]:
import optuna
from sklearn.metrics import f1_score, recall_score, accuracy_score

# MLP modeli
class DeepMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, hidden_layers, output_dim, dropout_rate):
        super(DeepMLP, self).__init__()
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))
        
        for _ in range(hidden_layers - 1):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
        
        layers.append(nn.Linear(hidden_dim, output_dim))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# Optuna ile hiperparametre optimizasyonu
def objective(trial):
    input_dim = X_tensor.shape[1]
    hidden_dim = trial.suggest_int('hidden_dim', 64, 512)
    hidden_layers = trial.suggest_int('hidden_layers', 1, 5)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    lr = trial.suggest_float('lr', 1e-5, 1e-2)

    model = DeepMLP(input_dim, hidden_dim, hidden_layers, output_dim, dropout_rate)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    f1, recall, accuracy = evaluate_model(model, test_dataloader)
    return f1

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

print('Best trial:')
trial = study.best_trial
print(f'F1 Score: {trial.value}')
print('Best hyperparameters: ', trial.params)

# En iyi hiperparametrelerle model eğitme
best_params = trial.params
model = DeepMLP(input_dim, best_params['hidden_dim'], best_params['hidden_layers'], output_dim, best_params['dropout_rate'])

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])

num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

f1, recall, accuracy = evaluate_model(model, test_dataloader)
print(f'F1 Score: {f1}, Recall: {recall}, Accuracy: {accuracy}')


[I 2024-08-01 09:47:25,118] A new study created in memory with name: no-name-177309be-3620-433a-b816-d18ecc52d643
[I 2024-08-01 09:47:25,845] Trial 0 finished with value: 0.9737721051104047 and parameters: {'hidden_dim': 137, 'hidden_layers': 2, 'dropout_rate': 0.48728865590840265, 'lr': 0.0033883196726438095}. Best is trial 0 with value: 0.9737721051104047.
[I 2024-08-01 09:47:28,591] Trial 1 finished with value: 0.97814926432341 and parameters: {'hidden_dim': 377, 'hidden_layers': 5, 'dropout_rate': 0.25679902168633306, 'lr': 0.0006938671157206147}. Best is trial 1 with value: 0.97814926432341.
[I 2024-08-01 09:47:29,139] Trial 2 finished with value: 0.9562868418506743 and parameters: {'hidden_dim': 171, 'hidden_layers': 2, 'dropout_rate': 0.3785001309084586, 'lr': 0.00017669580874701227}. Best is trial 1 with value: 0.97814926432341.
[I 2024-08-01 09:47:30,201] Trial 3 finished with value: 0.9563285468274827 and parameters: {'hidden_dim': 181, 'hidden_layers': 5, 'dropout_rate': 0.1

Best trial:
F1 Score: 0.9825294186647899
Best hyperparameters:  {'hidden_dim': 289, 'hidden_layers': 5, 'dropout_rate': 0.45289865164618615, 'lr': 0.001781886623652632}
Epoch 1/20, Loss: 0.4646921455860138
Epoch 2/20, Loss: 0.13262349367141724
Epoch 3/20, Loss: 0.2951509952545166
Epoch 4/20, Loss: 0.3275938034057617
Epoch 5/20, Loss: 0.2633124887943268
Epoch 6/20, Loss: 0.3078048527240753
Epoch 7/20, Loss: 0.15478119254112244
Epoch 8/20, Loss: 0.10209742933511734
Epoch 9/20, Loss: 0.03725193440914154
Epoch 10/20, Loss: 0.11200488358736038
Epoch 11/20, Loss: 0.07284894585609436
Epoch 12/20, Loss: 0.10156287252902985
Epoch 13/20, Loss: 0.1469414234161377
Epoch 14/20, Loss: 0.05503818392753601
Epoch 15/20, Loss: 0.06185167655348778
Epoch 16/20, Loss: 0.054849348962306976
Epoch 17/20, Loss: 0.15650558471679688
Epoch 18/20, Loss: 0.03896467387676239
Epoch 19/20, Loss: 0.06823410838842392
Epoch 20/20, Loss: 0.028532199561595917
F1 Score: 0.9693914336861672, Recall: 0.9694323144104804, Accura

In [10]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Geliştirilmiş MLP modeli (baz model)
class ImprovedMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, hidden_layers, output_dim, dropout_rate):
        super(ImprovedMLP, self).__init__()
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.BatchNorm1d(hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))
        
        for _ in range(hidden_layers - 1):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.BatchNorm1d(hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
        
        layers.append(nn.Linear(hidden_dim, output_dim))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# Temel modelleri eğitme
input_dim = X_tensor.shape[1]
hidden_dim = best_params['hidden_dim']
hidden_layers = best_params['hidden_layers']
dropout_rate = best_params['dropout_rate']
lr = best_params['lr']
output_dim = len(data['nobet_tipi'].unique())

def train_mlp_model(X_train, y_train, X_val, y_val):
    model = ImprovedMLP(input_dim, hidden_dim, hidden_layers, output_dim, dropout_rate)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    num_epochs = 20
    best_model = None
    best_val_loss = np.inf

    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in DataLoader(TensorDataset(X_val, y_val), batch_size=32, shuffle=False):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        val_loss /= len(X_val)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()

    model.load_state_dict(best_model)
    return model

# Veriyi eğitim ve doğrulama setlerine ayırma
X_train_val, X_test, y_train_val, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42)

# Baz modelleri eğitme
mlp_model = train_mlp_model(X_train, y_train, X_val, y_val)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train_val.numpy(), y_train_val.numpy())
svm_model = SVC(probability=True, random_state=42).fit(X_train_val.numpy(), y_train_val.numpy())

# Tahminleri meta model için hazırlama
def get_predictions(models, X):
    mlp_model, rf_model, svm_model = models
    mlp_model.eval()
    with torch.no_grad():
        mlp_preds = mlp_model(X).numpy()
    rf_preds = rf_model.predict_proba(X.numpy())
    svm_preds = svm_model.predict_proba(X.numpy())
    return np.hstack([mlp_preds, rf_preds, svm_preds])

meta_train_preds = get_predictions([mlp_model, rf_model, svm_model], X_train_val)
meta_test_preds = get_predictions([mlp_model, rf_model, svm_model], X_test)

# Meta model eğitme
meta_model = LogisticRegression(random_state=42)
meta_model.fit(meta_train_preds, y_train_val.numpy())

# Meta model ile tahmin yapma
meta_preds = meta_model.predict(meta_test_preds)

# Performans ölçütleri
f1 = f1_score(y_test.numpy(), meta_preds, average='weighted')
recall = recall_score(y_test.numpy(), meta_preds, average='weighted')
accuracy = accuracy_score(y_test.numpy(), meta_preds)

print(f'F1 Score: {f1}, Recall: {recall}, Accuracy: {accuracy}')


F1 Score: 0.9738580278257338, Recall: 0.9737991266375546, Accuracy: 0.9737991266375546


### Thirs Model with XGBoost

In [14]:
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier

# Geliştirilmiş MLP modeli (baz model)
class ImprovedMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, hidden_layers, output_dim, dropout_rate):
        super(ImprovedMLP, self).__init__()
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.BatchNorm1d(hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))
        
        for _ in range(hidden_layers - 1):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.BatchNorm1d(hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
        
        layers.append(nn.Linear(hidden_dim, output_dim))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

# Temel modelleri eğitme
input_dim = X_tensor.shape[1]
hidden_dim = best_params['hidden_dim']
hidden_layers = best_params['hidden_layers']
dropout_rate = best_params['dropout_rate']
lr = best_params['lr']
output_dim = len(data['nobet_tipi'].unique())

def train_mlp_model(X_train, y_train, X_val, y_val):
    model = ImprovedMLP(input_dim, hidden_dim, hidden_layers, output_dim, dropout_rate)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    num_epochs = 20
    best_model = None
    best_val_loss = np.inf

    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=True):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in DataLoader(TensorDataset(X_val, y_val), batch_size=32, shuffle=False):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        val_loss /= len(X_val)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()

    model.load_state_dict(best_model)
    return model

# Veriyi eğitim ve doğrulama setlerine ayırma
X_train_val, X_test, y_train_val, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42)

# Baz modelleri eğitme
mlp_model = train_mlp_model(X_train, y_train, X_val, y_val)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train_val.numpy(), y_train_val.numpy())
svm_model = SVC(probability=True, random_state=42).fit(X_train_val.numpy(), y_train_val.numpy())
knn_model = KNeighborsClassifier(n_neighbors=5).fit(X_train_val.numpy(), y_train_val.numpy())

# Tahminleri meta model için hazırlama
def get_predictions(models, X):
    mlp_model, rf_model, svm_model, knn_model = models
    mlp_model.eval()
    with torch.no_grad():
        mlp_preds = mlp_model(X).numpy()
    rf_preds = rf_model.predict_proba(X.numpy())
    svm_preds = svm_model.predict_proba(X.numpy())
    knn_preds = knn_model.predict_proba(X.numpy())
    return np.hstack([mlp_preds, rf_preds, svm_preds, knn_preds])

meta_train_preds = get_predictions([mlp_model, rf_model, svm_model, knn_model], X_train_val)
meta_test_preds = get_predictions([mlp_model, rf_model, svm_model, knn_model], X_test)

# Meta model eğitme
meta_model = xgb.XGBClassifier(random_state=42)
meta_model.fit(meta_train_preds, y_train_val.numpy())

# Meta model ile tahmin yapma
meta_preds = meta_model.predict(meta_test_preds)

# Performans ölçütleri
f1 = f1_score(y_test.numpy(), meta_preds, average='weighted')
recall = recall_score(y_test.numpy(), meta_preds, average='weighted')
accuracy = accuracy_score(y_test.numpy(), meta_preds)

print(f'F1 Score: {f1}, Recall: {recall}, Accuracy: {accuracy}')


Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


F1 Score: 0.9825537877939624, Recall: 0.982532751091703, Accuracy: 0.982532751091703


### Anyone who might use this repository should cite the work ©  
### All rights belong to the writers (Dr. Cevher ÖZDEN and Dr. Pınar Bengi BOZ)