In [1]:
# from google.colab import drive
# drive.mount('/gdrive')
# %cd /gdrive
# root_project = "./MyDrive/Colab Notebooks/OBF_Psychiatric_a_motor_activity_dataset_of_patients_diagnosed_with_major_depression_schizophrenia_and_ADHD"

# import sys
# sys.path.append(root_project)

import torch
print("CUDA Available:", torch.cuda.is_available())



CUDA Available: True


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import GroupKFold, train_test_split 

from torch.utils.data import Dataset, DataLoader, random_split



import os
import glob
import logging
from typing import Dict, List, Tuple, Optional, Union
from deep_learning_models import SimpleNN, MoreComplexNN
from deep_learning_models import train_model, evaluate_model, plot_loss_function
from obf_class import OBFDataset
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


data_directory = './obf_psy_dataset/'
# obf_data = OBFPsychiatricData(data_directory)

groups_to_include = ['control', 'depression', 'schizophrenia']





## Simple models

In [None]:
dataset = OBFDataset(data_directory, groups_to_include,save=True, seq_len=1)  # Use seq_len=1 for now

# Split into training and testing sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# --- Model Definition and Training ---
input_size = dataset.features.shape[1]
hidden_size_1 = 64
hidden_size_2 = 64
num_classes = len(dataset.get_classes())
activation_function = nn.Sigmoid()
model = SimpleNN(input_size, hidden_size_1, num_classes,activation_function=activation_function)
# model = MoreComplexNN(input_size, hidden_size_1, hidden_size_2, num_classes,activation_function=activation_function)
model_name = 'SimpleNN_sigmoid_dropout_regularization_Adam_64_scheduler'
# --- Device (CPU or GPU) ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
# optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)  # Reduce LR by 0.1 every 30 epochs

num_epochs = 300 #increased for better convergence
train_losses, val_losses = train_model(model, train_loader, criterion, optimizer, num_epochs, test_loader, scheduler=scheduler,device=device)

# --- Evaluation ---
class_names = dataset.get_classes() #get class names
evaluate_model(model, test_loader, class_names, model_name,device=device)

 # --- Saving and Loading the Model ---
os.makedirs('./models', exist_ok=True)  # Ensure the models directory exists
torch.save(model.state_dict(), './models/'+model_name+'_pytorch_model.pth')
print("Model saved to './models/"+model_name+"_pytorch_model.pth'")

# --- Plotting Training and Validation Loss ---
plot_loss_function(train_losses,val_losses,model_name)

## advanced models

In [3]:
from models import LSTMModel, CNN1DModel, TransformerModel
# Use seq_len > 1 for LSTM, CNN, Transformer
dataset = OBFDataset(data_directory, groups_to_include, seq_len=7, use_seq=True) #use seq and groupKfold

# --- GroupKFold Cross-Validation ---
group_kfold = GroupKFold(n_splits=5)  # Use 5 folds
groups = dataset.data['number'].values  # Use 'number' for grouping

for fold, (train_index, test_index) in enumerate(group_kfold.split(dataset.features, dataset.labels, groups)):
    print(f"Fold {fold+1}")
    train_sampler = torch.utils.data.SubsetRandomSampler(train_index)
    test_sampler = torch.utils.data.SubsetRandomSampler(test_index)

    train_loader = DataLoader(dataset, batch_size=32, sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=32, sampler=test_sampler)
    
    # --- Device ---
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    
    # --- Model Definition (Choose One) ---
    input_size = dataset.features.shape[2]  #  (batch, seq_len, features)
    num_classes = len(dataset.get_classes())
    # 1. LSTM
    # model = LSTMModel(input_size, hidden_size=64, num_layers=2, num_classes=num_classes, device = device)
    # 2. CNN
    # model = CNN1DModel(input_size, num_classes)
    # 3. Transformer
    model = TransformerModel(input_size, num_classes)

    model.to(device)

    # --- Loss and Optimizer ---
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # --- Training ---
    num_epochs = 30
    train_losses, val_losses = train_model(model, train_loader, criterion, optimizer, num_epochs,device=device)  # No val_loader

    # --- Evaluation ---
    class_names = dataset.get_classes()
    model_name = f'LSTMModel__Adam_64_model_fold_{fold+1}'
    evaluate_model(model, test_loader, class_names, model_name,device=device)

    # --- (Optional) Save Model ---
    # (You could save the model after each fold, or only save the best-performing model)
    os.makedirs('./models', exist_ok=True)
    torch.save(model.state_dict(), f'./models/model_fold_{fold+1}.pth')
    
    plot_loss_function(train_losses,val_losses,model_name)
    

RuntimeError: shape '[-1, 7, 21]' is invalid for input of size 29127

In [4]:
val_losses

[]