In [1]:
import os
import numpy as np
import torch
import pandas as pd
from glob import glob
from scipy.io import loadmat
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt, iirnotch, periodogram
from sklearn.preprocessing import StandardScaler
from scipy.stats import kurtosis
from torch.utils.data import random_split
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import gc
from sklearn.metrics import r2_score
from scipy.io import savemat
import joblib
from scipy.signal import hilbert
from sklearn.decomposition import PCA
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from typing import Literal

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using Device:", device)

2025-05-23 22:27:50.445845: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-23 22:27:50.551219: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748053670.593486  105431 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748053670.605425  105431 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748053670.703964  105431 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

Using Device: cuda


In [4]:
# Noise Filters
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = fs / 2.0
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut=1.0, highcut=200.0, fs=1000.0, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    return filtfilt(b, a, data, axis=0)

# Apply after bandpass
def notch_filter(data, freq=60.0, fs=1000.0, quality=30.0):
    b, a = iirnotch(freq, quality, fs)
    return filtfilt(b, a, data, axis=0)

# Noise Metrics for evaluation
def compute_rmse(true, estimate):
    return np.sqrt(np.mean((true - estimate) ** 2))

# Kurtosis signal reduction > 0 shows a denoised signal
def proportion_of_positive_kurtosis_signals(kurtosis_raw, kurtosis_denoised):
    return (np.array([(kurtosis_raw - kurtosis_denoised) > 0]).sum() / len(kurtosis_raw)) * 100

# Use a Standard scaler to reduce the mean to 0 and std to 1

In [5]:
# Computing the power envelope of each channel

def band_power_envelope(ecog_signal: np.ndarray, lowcut: float, highcut: float, fs: float = 1000.0, order: int = 4) -> np.ndarray:
    """Computes band-limited envelope via Hilbert transform.
    Parameters
    ----------
    self.ecog_signal : np.ndarray (T, channels)
        This is the ecog signal that has been filtered.
    lowcut : float
        This is the lower band limit in Hz.
    highcut : float
        This is the upper band limit in Hz.
    fs : float, optional
        This is the frequency of the sample., by default 1000.0
    order : int, optional
        This is the Butterworth order, by default 4
    Returns
    -------
    np.ndarray
        envelope
    """
    # 1. Narrowband bandpass
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    narrow = filtfilt(b, a, ecog_signal, axis=0)
    # 2. Hilbert transform to get analytic signal
    analytic = hilbert(narrow, axis=0)
    # 3. Envelope = absolute value
    envelope = np.abs(analytic)
    return envelope

def multiband_features(ecog_raw: np.ndarray, fs: float = 1000.0) -> np.ndarray:
    """Builds concatenated band-power features for μ, β, and high-gamma.
    Parameters
    ----------
    ecog_raw : np.ndarray
        (T, 64)
    fs : float, optional
        Frequency of the sample, by default 1000.0
    Returns
    -------
    np.ndarray
        features: (T, 64, 3) (μ, β, high-gamma per electrode)
    """
    mu_env = band_power_envelope(ecog_raw, lowcut=8.0, highcut=13.0, fs=fs)
    beta_env = band_power_envelope(ecog_raw, lowcut=13.0, highcut=30.0, fs=fs)
    hg_env = band_power_envelope(ecog_raw, lowcut=70.0, highcut=200.0, fs=fs)
    # Concatenate along channel dimension
    return np.concatenate([mu_env, beta_env, hg_env], axis=1)


In [6]:
def create_overlapping_windows(ecog_values: np.ndarray, motion_values: np.ndarray, window_size: int = 20, hop_size: int = 10):
    """Builds overlapping windows to increase sample count and capture smoother transitions.

    Parameters
    ----------
    ecog_values : np.ndarray
        (T, features)
    motion_values : np.ndarray
        (T_motion, 3)_
    window_size : int, optional
        number of timepoints per window, by default 20
    hop_size : int, optional
        step bewteen windows, by default 10
    """
    num_samples, num_features = ecog_values.shape
    max_windows = (num_samples - window_size) // hop_size + 1
    X_list = []
    y_list = []
    for w in range(max_windows):
        start = w * hop_size
        end = start + window_size
        if end > num_samples:
            break
        # Assign label as motion at center of window (or last timepoint)
        X_list.append(ecog_values[start:end, :])
        y_list.append(motion_values[min(end -1, motion_values.shape[0] -1), :])
    X = np.stack(X_list, axis=0)
    y = np.stack(y_list, axis=0)
    return X, y        


In [7]:
# Model definitions
class EcogMotionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
# CNN/LSTM hybrid
class EcogToMotionNet(nn.Module):
    def __init__(self):
        super().__init__()

        # CNN component: outputs 256 channels
        self.convolv = nn.Sequential(
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1),  # Fixed to 256 channels
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.3)
        )

        # Bi-LSTM component (2 Layers)
        self.lstm = nn.LSTM(input_size=256, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True)

        self.attn_weight = nn.Linear(2 * 128, 1, bias=False)

        # Fully connected layer
        self.fc = nn.Sequential(
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.3),
            nn.Linear(2*128, 3)  # Matches hidden_size=128
        )

    def forward(self, x):
        # Input shape: (batch, 20, 64)
        x = x.permute(0, 2, 1)  # Shape: (batch, 64, 20)
        x = self.convolv(x)      # Shape: (batch, 256, 20)
        x = x.permute(0, 2, 1)   # Shape: (batch, 20, 256)

        lstm_out, (h_n, c_n) = self.lstm(x)  # lstm_out shape: (batch, 20, 128)

        # Compute attention scores
        # Flatten across features: attn_score[i, t] = wT * h_{i, t}
        # Then softmax over t to get α_{i, t}
        attn_scores = self.attn_weight(lstm_out).squeeze(-1)
        attn_weights = torch.softmax(attn_scores, dim=1)
        # Weighted sum of LSTM outputs:
        attn_applied = torch.bmm(attn_weights.unsqueeze(1), lstm_out).squeeze(1)

        # Regression to 3D motion
        output = self.fc(attn_applied)
        return output

# Linear Model
class LinearEcogToMotionNet(nn.Module):
    def __init__(self, input_channels = 64, sequence_length = 20, output_dim = 3):
        super().__init__()
        self.linear = nn.Linear(input_channels * sequence_length, output_dim)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = x.flatten(start_dim=1)
        x = self.linear(x)
        return x

# LSTM
class EcogLSTM(nn.Module):
    def __init__(self, input_size = 64, hidden_size = 128, num_layers = 1, output_size = 3):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, (h_n, c_n) = self.lstm(x) # lstm_out shape: (batch_size, seq_len, hidden_size)
        last_output = lstm_out[:, -1, :]
        output = self.fc(last_output)
        return output
 

In [8]:
def predict_and_export(model, data_loader, device, output_file_path):
    model.eval()
    all_preds, all_targets = [], []

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            all_preds.append(outputs.cpu().numpy())
            all_targets.append(targets.cpu().numpy())
    predictions = np.concatenate(all_preds, axis=0)
    targets = np.concatenate(all_targets, axis=0)
    
    # Save as .mat file for visualization
    savemat(output_file_path, {
        "predictions":predictions,
        "targets": targets
    })
    print("Saved predictions to ecog_predictions.mat")

    return predictions, targets

In [9]:
class PreprocessData:
    def __init__(self, ecog_file_path, motion_file_path):
        self.ecog_file_path = ecog_file_path
        self.motion_file_path = motion_file_path
        self.ecog_data = None
        self.motion_data = None
        self.filtered_ecog = None
        self.scaled_ecog = None
        self.X = None
        self.y = None
        self.scaler = None

    def process(self, eval=False, window_size=20, duration_limit=900):
        self.read_data()
        self.common_average_reference()
        self.filter_signal(eval=eval)
        self.format_data(window_size=window_size, duration_limit=duration_limit)
        return self.X, self.y
    
    def read_data(self):
        self.ecog_data = pd.read_csv(self.ecog_file_path)
        self.motion_data = pd.read_csv(self.motion_file_path)
        return self

    def common_average_reference(self):
        # Subtract the common mean from the signals 
        common_average_reference = np.mean(self.ecog_data.drop(["Time", "Fs"], axis=1).values, axis=1, keepdims=1)
        ecog_data_values = self.ecog_data[self.ecog_data.columns[1:-1]].values
        ecog_data_common_mean_subtracted = ecog_data_values - common_average_reference
        self.ecog_data[self.ecog_data.columns[1:-1]] = ecog_data_common_mean_subtracted
        del ecog_data_values, ecog_data_common_mean_subtracted, common_average_reference
        gc.collect()
        return self

    def filter_signal(self, eval=False):
        ecog_raw = self.ecog_data[self.ecog_data.columns[1:-1]].values

        # Apply filters
        filtered = bandpass_filter(ecog_raw, lowcut=1.0, highcut=200.0, fs=1000.0, order=4)
        denoised = notch_filter(filtered, freq=60, fs=1000.0)

        # Evaluate filters
        if eval:
            kurt_raw = kurtosis(ecog_raw, axis=0, fisher=True)
            kurt_denoised = kurtosis(denoised, axis=0, fisher=True)
            proportion_of_positive_kurtosis_signals(kurt_raw, kurt_denoised)
            compute_rmse(ecog_raw, denoised)

        # Compute Power Envelopes
        features = multiband_features(denoised, fs=1000.0) # shape (T, 192)

        # Identify the principal components of the network
        pca = PCA(n_components = 64, random_state=42)
        reduced = pca.fit_transform(features)

        # Scale
        self.scaler = StandardScaler()
        self.scaled_ecog = self.scaler.fit_transform(reduced)

        # Replace in DataFrame
        self.ecog_data = self.ecog_data.copy()
        self.ecog_data[self.ecog_data.columns[1:-1]] = self.scaled_ecog

        # Clean memory
        del ecog_raw, filtered, denoised
        gc.collect()
        return self

    def format_data(self, window_size=20, duration_limit=900):
        ecog_df = self.ecog_data[self.ecog_data["Time"] <= duration_limit]
        motion_df = self.motion_data[self.motion_data["Motion_time"] <= duration_limit]

        ecog_values = ecog_df.drop(columns=["Fs", "Time"]).values
        motion_values = motion_df.drop(columns=["Fsm", "Motion_time"]).values

        print(f"motion_values.shape: {motion_values.shape}")

        # Smooth the signal
        X, y = create_overlapping_windows(ecog_values, motion_values, window_size=20, hop_size=10)
        print(f"y.shape: {y.shape}")
        self.X, self.y = X, y
        
        print(self.X.shape)
        print(self.y.shape)
        
        # Clean up
        del ecog_values, motion_values
        gc.collect()

    def save(self):
        output_file_path_base = self.ecog_file_path.strip("ecog_data.csv")
        joblib.dump(self.scaler, output_file_path_base + "scaler_ecog.pkl")
        np.save(output_file_path_base + "X.npy", self.X)
        np.save(output_file_path_base + "y.npy", self.y)


In [10]:
def train_model(model, device, train_loader, val_loader=None, epochs=20, model_name="model", example_input=torch.rand(1,20,64), checkpoint_dir="models/"):
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
    scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5)
    writer = SummaryWriter(log_dir='runs/' + model_name)
    best_val_loss = float('inf')
    early_stop_counter = 0
    patience = 10 # epochs
    
    # Add the model graph to TensorBoard using example_input
    if example_input is not None:
        writer.add_graph(model, example_input.to(device))

    train_losses = []
    val_losses = []
    r2_scores = []
    
    for epoch in range(epochs):
        # Train
        model.train()
        running_train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item() * X_batch.size(0)
        avg_train_loss = running_train_loss / len(train_loader.dataset)
        train_losses.append(avg_train_loss)
        writer.add_scalar("Loss/Train", avg_train_loss, epoch)
        writer.add_scalar("Learning Rate", optimizer.param_groups[0]['lr'], epoch)
            
        if val_loader is not None:
            # Validate
            model.eval()
            running_val_loss = 0.0
            all_preds = []
            all_targets = []
            with torch.no_grad():
                for X_batch, y_batch in val_loader:
                    X_batch = X_batch.to(device)
                    y_batch = y_batch.to(device)
                    preds = model(X_batch)
                    loss = criterion(preds, y_batch)
                    running_val_loss += loss.item() * X_batch.size(0)
                    all_preds.append(preds.cpu())
                    all_targets.append(y_batch.cpu())
            all_preds = torch.cat(all_preds).numpy()
            all_targets = torch.cat(all_targets).numpy()
            r2 = r2_score(all_targets, all_preds)
            r2_scores.append(r2)
            avg_val_loss = running_val_loss / len(val_loader.dataset)
            val_losses.append(avg_val_loss)
            
            # Log to TensorBoard
            
            writer.add_scalar("Loss/Validation", avg_val_loss, epoch)
            writer.add_scalar("R2/Validation", r2, epoch)
            

            print(f"{model_name} Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.6f} | Val Loss: {avg_val_loss:.6f} | R2: {r2:.6f}")

            scheduler.step(avg_val_loss)

            # Save best model checkpoint
            if avg_val_loss < best_val_loss - 1e-5:
                best_val_loss = avg_val_loss
                early_stop_counter = 0
                print(f"Model Checkpoint | epoch: {epoch} | best_val_loss: {best_val_loss}")
                torch.save(model.state_dict(), checkpoint_dir + model_name + ".pth")
            else:
                early_stop_counter += 1
                if early_stop_counter >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
    
    writer.close()
    return train_losses, val_losses, r2_scores


In [11]:
def plot_losses(losses_dict):
    plt.figure(figsize=(10,6))
    for model_name, (train_losses, val_losses) in losses_dict.items():
        plt.plot(train_losses, label=f"{model_name} Train")
        plt.plot(val_losses, label=f"{model_name} Val")
    plt.xlabel("Epoch")
    plt.ylabel("Loss (MSE)")
    plt.title("Training and Validation Loss Curves")
    plt.legend()
    plt.grid(True)
    plt.show()


In [12]:
os.path.join(os.getcwd(), "src/", "motor_cortex/data/")

'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/'

In [14]:
motion_data_file_l = glob(os.path.join(os.getcwd(), "src/", "motor_cortex/data/data/", "**", "motion*.csv"), recursive=True)
ecog_data_file_l = glob(os.path.join(os.getcwd(), "src/", "motor_cortex/data/data/", "**", "ecog*.csv"), recursive=True)

In [28]:
# df = pd.read_csv(motion_data_file_l[9])
# df.drop(columns=["Left_Wrist_X.1", "Left_Wrist_Y.1", "Left_Wrist_Z.1"], inplace=True)

In [29]:
# # Create zero column data for ipsilateral data (right-wrist, left is zero)
# for index in range(6, 16):
#     current_motion_data_file = motion_data_file_l[index]
#     current_motion_data_file_df = pd.read_csv(current_motion_data_file)
#     current_motion_data_file_df_update = create_six_motion_outputs_for_df(current_motion_data_file_df, wrist="RIGHT")
#     current_motion_data_file_df_update.to_csv(current_motion_data_file, index=False)

In [None]:
# # Create zero column data for contralateral data (left-wrist, right is zero)
# for index in range(16, 26):
#     current_motion_data_file = motion_data_file_l[index]
#     current_motion_data_file_df = pd.read_csv(current_motion_data_file)
#     current_motion_data_file_df_update = create_six_motion_outputs_for_df(current_motion_data_file_df, wrist="LEFT")
#     current_motion_data_file_df_update.to_csv(current_motion_data_file, index=False)
    

# Reading only the Right Wrist data and training to detect depending on the Right Wrist

## Bilateral Data

In [None]:
motion_data_file_l[0]

In [None]:
motion_data_file = motion_data_file_l[0]

In [None]:
ecog_data_file = ecog_data_file_l[0]

In [None]:
motion_data_file

In [None]:
ecog_data_file

In [None]:
motion_data_bilateral_2018_07_12_S1 = pd.read_csv(motion_data_file)

In [None]:
motion_data_bilateral_2018_07_12_S1

In [None]:
ecog_data_bilateral_2018_07_12_S1 = pd.read_csv(ecog_data_file)
channel_data = ecog_data_bilateral_2018_07_12_S1.columns[1:-1].values
ecog_data_bilateral_2018_07_12_S1[channel_data].values

In [None]:
print(motion_data_bilateral_2018_07_12_S1)

In [None]:
ecog_data_bilateral_2018_07_12_S1["Time"]

In [None]:
plt.plot(ecog_data_bilateral_2018_07_12_S1["Time"], ecog_data_bilateral_2018_07_12_S1[channel_data].values)

In [None]:
ecog_channels = ecog_data_bilateral_2018_07_12_S1.columns[1:-1]

In [None]:
ecog_data_bilateral_2018_07_12_S1[ecog_channels].values

In [None]:
motion_data_bilateral_2018_07_12_S1_left_wrist = motion_data_bilateral_2018_07_12_S1[motion_data_bilateral_2018_07_12_S1.columns[1:4]]

In [None]:
motion_data_bilateral_2018_07_12_S1_left_wrist

In [None]:
motion_data_bilateral_2018_07_12_S1_left_wrist

In [None]:
ecog_data_bilateral_2018_07_12_S1[channel_data]

In [None]:
del ecog_data_bilateral_2018_07_12_S1

## Ipsilateral Data (Right Wrist)

In [15]:
INDEX = 12

In [21]:
current_ecog_data_file = ecog_data_file_l[INDEX]
current_motion_data_file = motion_data_file_l[INDEX]

In [22]:
df = pd.read_csv(motion_data_file_l[INDEX])

In [23]:
df[df["Motion_time"] <= 900]

Unnamed: 0,Fsm,Left_Wrist_X,Left_Wrist_Y,Left_Wrist_Z,Motion_time,Right_Wrist_X,Right_Wrist_Y,Right_Wrist_Z
0,50,0.0,0.0,0.0,0.000,-0.401104,0.230454,-0.160554
1,50,0.0,0.0,0.0,0.019,-0.406948,0.233907,-0.160399
2,50,0.0,0.0,0.0,0.040,-0.402722,0.231508,-0.160204
3,50,0.0,0.0,0.0,0.059,-0.412511,0.239396,-0.159490
4,50,0.0,0.0,0.0,0.080,-0.418708,0.244420,-0.159540
...,...,...,...,...,...,...,...,...
44981,50,0.0,0.0,0.0,899.903,-0.600712,0.416428,-0.260001
44982,50,0.0,0.0,0.0,899.923,-0.599108,0.415988,-0.260355
44983,50,0.0,0.0,0.0,899.943,-0.604163,0.419191,-0.259465
44984,50,0.0,0.0,0.0,899.963,-0.602424,0.418739,-0.259521


In [None]:
# ecog_data = pd.read_csv(current_ecog_data_file)
# common_average_reference = np.mean(ecog_data.drop(["Time", "Fs"], axis=1).values, axis=1, keepdims=1)
# # ecog_data[ecog_data[1:-1]
# ecog_data_common_mean_subtracted = ecog_data_values - common_average_reference
# ecog_data[ecog_data.columns[1:-1]] -= np.mean(ecog_data.drop(["Time", "Fs"], axis=1).values, axis=1, keepdims=1)
# np.mean(ecog_data.drop(["Time", "Fs"], axis=1).values, axis=1, keepdims=1)
# ecog_data -= np.mean(ecog_data.drop(columns=["Time", "Fs"]), axis=1, keepdims=True)

In [24]:
preprocessor = PreprocessData(current_ecog_data_file, current_motion_data_file)

In [19]:
import cProfile
import pstats
with cProfile.Profile() as pr:
    preprocessor = PreprocessData(current_ecog_data_file, current_motion_data_file)
    X, y = preprocessor.process()
stats = pstats.Stats(pr)
stats.sort_stats(pstats.SortKey.TIME).print_stats(20)


motion_values.shape: (44986, 6)
y.shape: (89999, 6)
(89999, 20, 64)
(89999, 6)
         502540 function calls (501267 primitive calls) in 52.986 seconds

   Ordered by: internal time
   List reduced from 803 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        6   34.133    5.689   34.133    5.689 {built-in method scipy.fft._pocketfft.pypocketfft.c2c}
       10   10.938    1.094   10.938    1.094 {built-in method scipy.signal._sigtools._linear_filter}
        2    2.673    1.336    2.813    1.406 /home/linux-pc/anaconda3/envs/torch/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py:222(read)
        3    0.547    0.182   34.684   11.561 /home/linux-pc/anaconda3/envs/torch/lib/python3.10/site-packages/scipy/signal/_signaltools.py:2318(hilbert)
      209    0.524    0.003    0.526    0.003 /home/linux-pc/anaconda3/envs/torch/lib/python3.10/site-packages/pandas/core/array_algos/take.py:120(_take_nd_ndarray)
      35

<pstats.Stats at 0x7faee4693a90>

In [None]:
preprocessor.save()

In [70]:
preprocessor = PreprocessData(current_ecog_data_file, current_motion_data_file)
X, y = preprocessor.process()

motion_values.shape: (44986, 6)
y.shape: (89999, 6)
(89999, 20, 64)
(89999, 6)


In [31]:
pd.read_csv(current_ecog_data_file).shape

(920498, 66)

In [55]:
X.shape

(89999, 20, 64)

In [56]:
y.shape

(89999, 6)

In [28]:
pd.read_csv(current_motion_data_file).shape

(46227, 8)

In [None]:
# Preprocess Ipsilateral Data
# for index in range(6, 16):
#     preprocessor = PreprocessData(ecog_data_file_l[index], motion_data_file_l[index])
#     X, y = preprocessor.process()
#     preprocessor.save()

motion_values.shape: (44983, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44989, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44982, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44990, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44991, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44980, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44986, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44991, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44989, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44984, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)


In [None]:
ecog_data_file_l[16:ecog_data_file_l.__len__()]

In [None]:
# # Preprocess Contralateral Data
# for index in range(16, ecog_data_file_l.__len__()):
#     preprocessor = PreprocessData(ecog_data_file_l[index], motion_data_file_l[index])
#     X, y = preprocessor.process()
#     preprocessor.save()

motion_values.shape: (44992, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44978, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44990, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44987, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44985, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44989, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44990, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44991, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44990, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)
motion_values.shape: (44988, 3)
y.shape: (89999, 3)
(89999, 20, 64)
(89999, 3)


In [14]:
# Read in the data
processed_data_l_X = glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', '**', "**", "X.npy"))
processed_data_l_y = glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', '**', "**", "y.npy"))

In [15]:
processed_data_l_X

['/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-10_(S7)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-24_(S10)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-06_(S6)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S5)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S3)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S4)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-10_(S8)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-17_(S9)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S2)/X.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Contralateral/2018-03-15_(S1)/X.npy',
 '/home/linux-pc/g

In [16]:
processed_data_l_y

['/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-10_(S7)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-24_(S10)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-06_(S6)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S5)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S3)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S4)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-10_(S8)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-17_(S9)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S2)/y.npy',
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Contralateral/2018-03-15_(S1)/y.npy',
 '/home/linux-pc/g

In [None]:
# # Perform K-Fold Cross Validation
# iterator = iter(processed_data_l)

# for X, y in zip(iterator, iterator):
#     print(X)
#     print(y)

# Create k-fold cross validation
# select the best model
# make a prediction
# visualize the predictions in matlab
# create a live demo
# deploy demo onto the web
# share results for testing with real people


In [17]:
# Define K-fold sets
test_list_X = []
train_list_X = []
test_list_y = []
train_list_y = []

for i in range(len(processed_data_l_X)):
    test_list_X.append(processed_data_l_X[i])
    test_list_y.append(processed_data_l_y[i])
    train_X = [x for idx, x in enumerate(processed_data_l_X) if idx != i]
    train_y = [y for idx, y in enumerate(processed_data_l_y) if idx != i]
    train_list_X.append(train_X)
    train_list_y.append(train_y)

In [18]:
INDEX = 0

In [19]:
X = np.load(train_list_X[INDEX][0])
y = np.load(train_list_y[INDEX][0])

In [20]:
train_list_X

[['/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-24_(S10)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-06_(S6)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S5)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S3)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S4)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-10_(S8)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-17_(S9)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S2)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Contralateral/2018-03-15_(S1)/X.npy',
  '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Contralateral/2018-04-12_(S3)/X.npy',
  '/h

In [None]:
train_list_y[0]

In [None]:
X.shape

In [None]:
y.shape

In [None]:
# Creating Train and Validation Sets
dataset = EcogMotionDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

In [None]:
# Defining the model

# Train Linear Model
input_channels = X.shape[2]
sequence_length = X.shape[1]
model = LinearEcogToMotionNet(input_channels, sequence_length)

# Train 1D CNN
# model = EcogToMotionNet()

# Train LSTM
# model = EcogLSTM(input_size=64, hidden_size=128, num_layers=1, output_size=3)


## Linear Model

In [None]:
# Linear Model 

# Train Linear Model
input_channels = X.shape[2]
sequence_length = X.shape[1]
linear_model = LinearEcogToMotionNet(input_channels, sequence_length)

linear_model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(linear_model.parameters(), lr=1e-3)

# Training loop
for epoch in range(20):
    linear_model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        preds = linear_model(X_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)
    train_loss /= len(train_loader.dataset)
    linear_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            preds = linear_model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val.Loss: {val_loss:.4f}")

## Convolutional Neural Network

In [None]:
# Train 1D CNN
cnn_model = EcogToMotionNet()

cnn_model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=1e-3)

# Training loop
for epoch in range(20):
    cnn_model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        preds = cnn_model(X_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)
    train_loss /= len(train_loader.dataset)
    cnn_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            preds = cnn_model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(train_loader.dataset)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val.Loss: {val_loss:.4f}")

## Long Short-Term Memory Model

In [None]:
lstm_model = EcogLSTM(input_size=64, hidden_size=128, num_layers=1, output_size=3)
lstm_model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-3)

# Training loop
for epoch in range(20):
    lstm_model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        preds = lstm_model(X_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)
    train_loss /= len(train_loader.dataset)
    lstm_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            preds = lstm_model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val.Loss: {val_loss:.4f}")

# Refined Model Training

### Single Session Training

In [35]:
# Example usage:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define Training and Test data
# Read in the data
processed_data_l_X = glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', '**', "**", "X.npy"))
processed_data_l_y = glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', '**', "**", "y.npy"))
# Define K-fold sets
test_list_X = []
train_list_X = []
test_list_y = []
train_list_y = []

for i in range(len(processed_data_l_X)):
    test_list_X.append(processed_data_l_X[i])
    test_list_y.append(processed_data_l_y[i])
    train_X = [x for idx, x in enumerate(processed_data_l_X) if idx != i]
    train_y = [y for idx, y in enumerate(processed_data_l_y) if idx != i]
    train_list_X.append(train_X)
    train_list_y.append(train_y)

# Load a single specific dataset
# K-Fold 0 uses session 7 as the test set:
KFOLD = 0
SESSION_SET = 6

"""
['/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-24_(S10)/X.npy', 0
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/X.npy', 1
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-06_(S6)/X.npy', 2
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S5)/X.npy', 3
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S3)/X.npy', 4
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S4)/X.npy', 5
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-10_(S8)/X.npy', 6 
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-17_(S9)/X.npy', 7
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S2)/X.npy'] 8
"""

X = np.load(train_list_X[KFOLD][SESSION_SET])
y = np.load(train_list_y[KFOLD][SESSION_SET])

# X = np.load(test_list_X[0]) # Identify the test set
# y = np.load(test_list_y[0]) # Identify the test set

# Creating Train and Validation Sets
dataset = EcogMotionDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

# Assuming train_loader, val_loader, criterion are defined

# 2. CNN_LSTM Hybrid Model
hybrid_model = EcogToMotionNet()
criterion = nn.MSELoss()
hybrid_train_losses, hybrid_val_losses, hybrid_r2 = train_model(hybrid_model, device, train_loader, val_loader, epochs=100, model_name="Hybrid_CNN_LSTM_ipsilateral_3_output")


Hybrid_CNN_LSTM_ipsilateral_3_output Epoch 1/100 | Train Loss: 0.053915 | Val Loss: 0.046009 | R2: 0.019645
Model Checkpoint | epoch: 0 | best_val_loss: 0.046008632621520926
Hybrid_CNN_LSTM_ipsilateral_3_output Epoch 2/100 | Train Loss: 0.051276 | Val Loss: 0.044893 | R2: 0.043653
Model Checkpoint | epoch: 1 | best_val_loss: 0.04489347511278983
Hybrid_CNN_LSTM_ipsilateral_3_output Epoch 3/100 | Train Loss: 0.047520 | Val Loss: 0.045381 | R2: 0.033413
Hybrid_CNN_LSTM_ipsilateral_3_output Epoch 4/100 | Train Loss: 0.043741 | Val Loss: 0.048289 | R2: -0.031997
Hybrid_CNN_LSTM_ipsilateral_3_output Epoch 5/100 | Train Loss: 0.038546 | Val Loss: 0.034776 | R2: 0.262292
Model Checkpoint | epoch: 4 | best_val_loss: 0.03477576765102438
Hybrid_CNN_LSTM_ipsilateral_3_output Epoch 6/100 | Train Loss: 0.035500 | Val Loss: 0.033362 | R2: 0.292818
Model Checkpoint | epoch: 5 | best_val_loss: 0.03336158778186008
Hybrid_CNN_LSTM_ipsilateral_3_output Epoch 7/100 | Train Loss: 0.033531 | Val Loss: 0.0321

### Contralateral Single Session Training

In [16]:
# Example usage:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define Training and Test data
# Read in the data
movement_direction = "Contralateral"

processed_data_l_X = sorted(glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', movement_direction, "**", "X.npy")))
processed_data_l_y = sorted(glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', movement_direction, "**", "y.npy")))

In [17]:
processed_data_l_X[0]

'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Contralateral/2018-03-15_(S1)/X.npy'

In [19]:
for index in range(len(processed_data_l_X)):
    X = np.load(processed_data_l_X[index])
    y = np.load(processed_data_l_y[index])

    # Creating Train and Validation Sets
    dataset = EcogMotionDataset(X, y)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size 
    train_ds, val_ds = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=64)
    
    # 2. CNN_LSTM Hybrid Model
    hybrid_model = EcogToMotionNet()
    Hybrid_CNN_LSTM_contralateral_3_output_session_2 Epoch 79/100 | Train Loss: 0.007047 | Val Loss: 0.011237 | R2: 0.813044
    Model Checkpoint | epoch: 78 | best_val_loss: 0.011236593591857753
    Hybrid_CNN_LSTM_contralateral_3_output_session_2 Epoch 80/100 | Train Loss: 0.006915 | Val Loss: 0.011991 | R2: 0.801362
    Hybrid_CNN_LSTM_contralateral_3_output_session_2 Epoch 81/100 | Train Loss: 0.007074 | Val Loss: 0.012819 | R2: 0.788208
    criterion = nn.MSELoss()
    hybrid_train_losses, hybrid_val_losses, hybrid_r2 = train_model(hybrid_model, device, train_loader, val_loader, epochs=100, model_name=f"Hybrid_CNN_LSTM_contralateral_3_output_session_{index}")



Hybrid_CNN_LSTM_contralateral_3_output_session_0 Epoch 1/100 | Train Loss: 0.076476 | Val Loss: 0.065103 | R2: 0.018215
Model Checkpoint | epoch: 0 | best_val_loss: 0.06510252501567204
Hybrid_CNN_LSTM_contralateral_3_output_session_0 Epoch 2/100 | Train Loss: 0.071910 | Val Loss: 0.061534 | R2: 0.082820
Model Checkpoint | epoch: 1 | best_val_loss: 0.06153363577918046
Hybrid_CNN_LSTM_contralateral_3_output_session_0 Epoch 3/100 | Train Loss: 0.065237 | Val Loss: 0.053672 | R2: 0.201713
Model Checkpoint | epoch: 2 | best_val_loss: 0.053671987336542874
Hybrid_CNN_LSTM_contralateral_3_output_session_0 Epoch 4/100 | Train Loss: 0.054486 | Val Loss: 0.050726 | R2: 0.250228
Model Checkpoint | epoch: 3 | best_val_loss: 0.05072632818379336
Hybrid_CNN_LSTM_contralateral_3_output_session_0 Epoch 5/100 | Train Loss: 0.048533 | Val Loss: 0.046457 | R2: 0.319657
Model Checkpoint | epoch: 4 | best_val_loss: 0.0464571131248441
Hybrid_CNN_LSTM_contralateral_3_output_session_0 Epoch 6/100 | Train Loss: 

In [None]:
# Define K-fold sets
test_list_X = []
train_list_X = []
test_list_y = []
train_list_y = []

for i in range(len(processed_data_l_X)):
    test_list_X.append(processed_data_l_X[i])
    test_list_y.append(processed_data_l_y[i])
    train_X = [x for idx, x in enumerate(processed_data_l_X) if idx != i]
    train_y = [y for idx, y in enumerate(processed_data_l_y) if idx != i]
    train_list_X.append(train_X)
    train_list_y.append(train_y)

# Load a single specific dataset
# K-Fold 0 uses session 7 as the test set:
KFOLD = 0
SESSION_SET = 6

"""
['/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-24_(S10)/X.npy',0
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/X.npy', 1
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-06_(S6)/X.npy', 2
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S5)/X.npy', 3
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S3)/X.npy', 4
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S4)/X.npy', 5
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-10_(S8)/X.npy', 6 
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-17_(S9)/X.npy', 7
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S2)/X.npy'] 8
"""

X = np.load(train_list_X[KFOLD][SESSION_SET])
y = np.load(train_list_y[KFOLD][SESSION_SET])

# X = np.load(test_list_X[0]) # Identify the test set
# y = np.load(test_list_y[0]) # Identify the test set

# Creating Train and Validation Sets
dataset = EcogMotionDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

# Assuming train_loader, val_loader, criterion are defined

### K-Fold Cross Validation Training

In [14]:
import os
import numpy as np
import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from glob import glob
from sklearn.metrics import r2_score
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load session paths
processed_data_l_X = sorted(glob('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/**/X.npy', recursive=True))
processed_data_l_y = sorted(glob('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/**/y.npy', recursive=True))

# Create train-test splits (leave-one-session-out)
results = []
os.makedirs("checkpoints", exist_ok=True)
os.makedirs("runs", exist_ok=True)

patience = 10
min_delta = 1e-4

for KFOLD in range(len(processed_data_l_X)):
    print(f"\n=== Fold {KFOLD} ===")

    test_X_path = processed_data_l_X[KFOLD]
    test_y_path = processed_data_l_y[KFOLD]

    train_X_paths = [x for i, x in enumerate(processed_data_l_X) if i != KFOLD]
    train_y_paths = [y for i, y in enumerate(processed_data_l_y) if i != KFOLD]

    # Initialize model and optimizer
    model = EcogToMotionNet().to(device)
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
    scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5)
    criterion = nn.MSELoss()

    writer = SummaryWriter(log_dir=f"runs/fold_{KFOLD}")
    best_val_loss = float("inf")

    # Sequential training over sessions
    for session_idx, (X_path, y_path) in enumerate(zip(train_X_paths, train_y_paths)):
        print(f"Training on Session {session_idx + 1}/{len(train_X_paths)}")
        X = np.load(X_path)
        y = np.load(y_path)
        dataset = EcogMotionDataset(X, y)
        loader = DataLoader(dataset, batch_size=64, shuffle=True)

        model.train()
        epochs_no_improve = 0
        best_loss = float("inf")

        for epoch in range(80):
            running_loss = 0.0
            for X_batch, y_batch in loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                preds = model(X_batch)
                loss = criterion(preds, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)

            avg_loss = running_loss / len(loader.dataset)
            writer.add_scalar(f"Loss/Train_Session_{session_idx}", avg_loss, epoch)
            print(f"Session {session_idx} Epoch {epoch+1} - Train Loss: {avg_loss:.6f}")
            scheduler.step(avg_loss)

            # Save checkpoint if best loss
            if avg_loss < best_loss - min_delta:
                best_loss = avg_loss
                epochs_no_improve = 0
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': best_loss
                }, f"checkpoints/model_fold_{KFOLD}_session_{session_idx}.pt")
                print(f"Checkpoint saved for fold {KFOLD}, session {session_idx}, epoch {epoch+1}")
            else:
                epochs_no_improve += 1

            if epochs_no_improve >= patience:
                print(f"Early stopping at epoch {epoch+1} for session {session_idx}")
                break

    # Evaluate on test session
    X_test = np.load(test_X_path)
    y_test = np.load(test_y_path)
    test_dataset = EcogMotionDataset(X_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=64)

    model.eval()
    total_loss = 0
    total_r2 = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item() * inputs.size(0)
            total_r2 += r2_score(targets.cpu().numpy(), outputs.cpu().numpy()) * inputs.size(0)
            total_samples += inputs.size(0)

    avg_test_loss = total_loss / total_samples
    avg_test_r2 = total_r2 / total_samples
    print(f"Fold {KFOLD} - Test Loss: {avg_test_loss:.4f}, R^2: {avg_test_r2:.4f}")

    writer.add_scalar("Loss/Test", avg_test_loss, 0)
    writer.add_scalar("R2/Test", avg_test_r2, 0)

    # Final model save
    torch.save({
        'model_state_dict': model.state_dict(),
        'test_loss': avg_test_loss,
        'test_r2': avg_test_r2
    }, f"checkpoints/model_fold_{KFOLD}_final.pt")

    writer.close()
    results.append((KFOLD, avg_test_loss, avg_test_r2))

# Summary of all folds
print("\n=== Summary Across All Folds ===")
for fold, loss, r2 in results:
    print(f"Fold {fold} | Test Loss: {loss:.4f} | R^2: {r2:.4f}")


=== Fold 0 ===
Training on Session 1/9
Session 0 Epoch 1 - Train Loss: 0.051085
Checkpoint saved for fold 0, session 0, epoch 1
Session 0 Epoch 2 - Train Loss: 0.050288
Checkpoint saved for fold 0, session 0, epoch 2
Session 0 Epoch 3 - Train Loss: 0.048895
Checkpoint saved for fold 0, session 0, epoch 3
Session 0 Epoch 4 - Train Loss: 0.045292
Checkpoint saved for fold 0, session 0, epoch 4
Session 0 Epoch 5 - Train Loss: 0.035813
Checkpoint saved for fold 0, session 0, epoch 5
Session 0 Epoch 6 - Train Loss: 0.029496
Checkpoint saved for fold 0, session 0, epoch 6
Session 0 Epoch 7 - Train Loss: 0.026231
Checkpoint saved for fold 0, session 0, epoch 7
Session 0 Epoch 8 - Train Loss: 0.024574
Checkpoint saved for fold 0, session 0, epoch 8
Session 0 Epoch 9 - Train Loss: 0.021405
Checkpoint saved for fold 0, session 0, epoch 9
Session 0 Epoch 10 - Train Loss: 0.021470
Session 0 Epoch 11 - Train Loss: 0.019468
Checkpoint saved for fold 0, session 0, epoch 11
Session 0 Epoch 12 - Train

In [None]:
# 1. LSTM
# lstm_model = EcogLSTM(input_size=64, hidden_size=128, num_layers=1, output_size=3)
# lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-3)
# lstm_train_losses, lstm_val_losses = train_model(lstm_model, train_loader, val_loader, criterion, lstm_optimizer, device, epochs=20, model_name="LSTM")

# 3. Linear
# input_channels = X.shape[2]
# sequence_length = X.shape[1]
# linear_model = LinearEcogToMotionNet(input_channels, sequence_length)
# linear_optimizer = torch.optim.Adam(linear_model.parameters(), lr=1e-3)
# linear_train_losses, linear_val_losses = train_model(linear_model, train_loader, val_loader, criterion, linear_optimizer, device, epochs=20, model_name="Linear")


In [None]:

# # Plot all losses together
# plot_losses({
#     # "LSTM": (lstm_train_losses, lstm_val_losses),
#     "CNN": (cnn_train_losses, cnn_val_losses),
#     # "Linear": (linear_train_losses, linear_val_losses)
# })



## Loading the model and making predictions

In [36]:
# Recreate the model structure
hybrid_model = EcogToMotionNet()
hybrid_model.load_state_dict(torch.load("models/Hybrid_CNN_LSTM_ipsilateral_3_output.pth"))
hybrid_model.to(device)
hybrid_model.eval()

EcogToMotionNet(
  (convolv): Sequential(
    (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
    (7): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): Dropout(p=0.3, inplace=False)
  )
  (lstm): LSTM(256, 128, num_layers=2, batch_first=True, bidirectional=True)
  (attn_weight): Linear(in_features=256, out_features=1, bias=False)
  (fc): Sequential(
    (0): ReLU(inplace=True)
    (1): Dropout(p=0.3, inplace=False)
    (2): Linear(in_features=256, out_features=3, bias=True)
  )
)

In [38]:
# Define K-fold sets
test_list_X = []
train_list_X = []
test_list_y = []
train_list_y = []

for i in range(len(processed_data_l_X)):
    test_list_X.append(processed_data_l_X[i])
    test_list_y.append(processed_data_l_y[i])
    train_X = [x for idx, x in enumerate(processed_data_l_X) if idx != i]
    train_y = [y for idx, y in enumerate(processed_data_l_y) if idx != i]
    train_list_X.append(train_X)
    train_list_y.append(train_y)

In [None]:
# test_list_X[1]

In [None]:
# test_list_y[1]

In [None]:
# train_list_X[0][1]

In [None]:
# Trained on the 8th session and predicting on the first session

In [None]:
# current_ecog_data_file = ecog_data_file_l[8]
# current_motion_data_file = motion_data_file_l[8]
# preprocessor = PreprocessData(current_ecog_data_file, current_motion_data_file)
# X, y = preprocessor.process()
# preprocessor.save()

In [37]:
# Trained using data from session 8; Testing using data from session 1
X = np.load(train_list_X[0][1])
y = np.load(train_list_y[0][1])
# scaler = joblib.load(train_list_X[0][1].strip("X.npy") + "scaler_ecog.pkl")

In [38]:
X.shape

(89999, 20, 64)

In [39]:
y.shape

(89999, 3)

In [40]:
# Creating Train and Validation Sets
dataset = EcogMotionDataset(X, y)
test_loader = DataLoader(dataset, batch_size=64, shuffle = True)

In [41]:
# Example Call
hybrid_model.to(device)
output_file_path = train_list_X[0][1].strip("X.npy") + "ecog_predictions.mat"
predictions, targets = predict_and_export(hybrid_model, test_loader, device, output_file_path)

Saved predictions to ecog_predictions.mat


In [57]:
output_file_path


'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/ecog_predictions.mat'

In [42]:
predictions[0]

array([-0.23674075,  0.13311261, -0.3458554 ], dtype=float32)

In [43]:
compute_rmse(targets[0], predictions[0])

np.float32(0.094205596)