In [52]:
import os
import numpy as np
import torch
import pandas as pd
from glob import glob
from scipy.io import loadmat
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt, iirnotch, periodogram
from sklearn.preprocessing import StandardScaler
from scipy.stats import kurtosis
from torch.utils.data import random_split
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import gc
from sklearn.metrics import r2_score
from scipy.io import savemat
import joblib
from scipy.signal import hilbert
from sklearn.decomposition import PCA
from torch.utils.tensorboard import SummaryWriter
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from typing import Literal

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using Device:", device)

Using Device: cuda


In [17]:
# Noise Filters
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = fs / 2.0
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut=1.0, highcut=200.0, fs=1000.0, order=4):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    return filtfilt(b, a, data, axis=0)

# Apply after bandpass
def notch_filter(data, freq=60.0, fs=1000.0, quality=30.0):
    b, a = iirnotch(freq, quality, fs)
    return filtfilt(b, a, data, axis=0)

# Noise Metrics for evaluation
def compute_rmse(true, estimate):
    return np.sqrt(np.mean((true - estimate) ** 2))

# Kurtosis signal reduction > 0 shows a denoised signal
def proportion_of_positive_kurtosis_signals(kurtosis_raw, kurtosis_denoised):
    return (np.array([(kurtosis_raw - kurtosis_denoised) > 0]).sum() / len(kurtosis_raw)) * 100

# Use a Standard scaler to reduce the mean to 0 and std to 1

In [18]:
# Computing the power envelope of each channel

def band_power_envelope(ecog_signal: np.ndarray, lowcut: float, highcut: float, fs: float = 1000.0, order: int = 4) -> np.ndarray:
    """Computes band-limited envelope via Hilbert transform.
    Parameters
    ----------
    self.ecog_signal : np.ndarray (T, channels)
        This is the ecog signal that has been filtered.
    lowcut : float
        This is the lower band limit in Hz.
    highcut : float
        This is the upper band limit in Hz.
    fs : float, optional
        This is the frequency of the sample., by default 1000.0
    order : int, optional
        This is the Butterworth order, by default 4
    Returns
    -------
    np.ndarray
        envelope
    """
    # 1. Narrowband bandpass
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    narrow = filtfilt(b, a, ecog_signal, axis=0)
    # 2. Hilbert transform to get analytic signal
    analytic = hilbert(narrow, axis=0)
    # 3. Envelope = absolute value
    envelope = np.abs(analytic)
    return envelope

def multiband_features(ecog_raw: np.ndarray, fs: float = 1000.0) -> np.ndarray:
    """Builds concatenated band-power features for μ, β, and high-gamma.
    Parameters
    ----------
    ecog_raw : np.ndarray
        (T, 64)
    fs : float, optional
        Frequency of the sample, by default 1000.0
    Returns
    -------
    np.ndarray
        features: (T, 64, 3) (μ, β, high-gamma per electrode)
    """
    mu_env = band_power_envelope(ecog_raw, lowcut=8.0, highcut=13.0, fs=fs)
    beta_env = band_power_envelope(ecog_raw, lowcut=13.0, highcut=30.0, fs=fs)
    hg_env = band_power_envelope(ecog_raw, lowcut=70.0, highcut=200.0, fs=fs)
    # Concatenate along channel dimension
    return np.concatenate([mu_env, beta_env, hg_env], axis=1)


In [19]:
def create_overlapping_windows(ecog_values: np.ndarray, motion_values: np.ndarray, window_size: int = 20, hop_size: int = 10):
    """Builds overlapping windows to increase sample count and capture smoother transitions.

    Parameters
    ----------
    ecog_values : np.ndarray
        (T, features)
    motion_values : np.ndarray
        (T_motion, 3)_
    window_size : int, optional
        number of timepoints per window, by default 20
    hop_size : int, optional
        step bewteen windows, by default 10
    """
    num_samples, num_features = ecog_values.shape
    max_windows = (num_samples - window_size) // hop_size + 1
    X_list = []
    y_list = []
    for w in range(max_windows):
        start = w * hop_size
        end = start + window_size
        if end > num_samples:
            break
        # Assign label as motion at center of window (or last timepoint)
        X_list.append(ecog_values[start:end, :])
        y_list.append(motion_values[min(end -1, motion_values.shape[0] -1), :])
    X = np.stack(X_list, axis=0)
    y = np.stack(y_list, axis=0)
    return X, y        


In [20]:
# Model definitions
class EcogMotionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
# CNN/LSTM hybrid
class EcogToMotionNet(nn.Module):
    def __init__(self):
        super().__init__()

        # CNN component: outputs 256 channels
        self.convolv = nn.Sequential(
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1),  # Fixed to 256 channels
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.3)
        )

        # Bi-LSTM component (2 Layers)
        self.lstm = nn.LSTM(input_size=256, hidden_size=128, num_layers=2, batch_first=True, bidirectional=True)

        self.attn_weight = nn.Linear(2 * 128, 1, bias=False)

        # Fully connected layer
        self.fc = nn.Sequential(
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.3),
            nn.Linear(2*128, 3)  # Matches hidden_size=128
        )

    def forward(self, x):
        # Input shape: (batch, 20, 64)
        x = x.permute(0, 2, 1)  # Shape: (batch, 64, 20)
        x = self.convolv(x)      # Shape: (batch, 256, 20)
        x = x.permute(0, 2, 1)   # Shape: (batch, 20, 256)

        lstm_out, (h_n, c_n) = self.lstm(x)  # lstm_out shape: (batch, 20, 128)

        # Compute attention scores
        # Flatten across features: attn_score[i, t] = wT * h_{i, t}
        # Then softmax over t to get α_{i, t}
        attn_scores = self.attn_weight(lstm_out).squeeze(-1)
        attn_weights = torch.softmax(attn_scores, dim=1)
        # Weighted sum of LSTM outputs:
        attn_applied = torch.bmm(attn_weights.unsqueeze(1), lstm_out).squeeze(1)

        # Regression to 3D motion
        output = self.fc(attn_applied)
        return output

# Linear Model
class LinearEcogToMotionNet(nn.Module):
    def __init__(self, input_channels = 64, sequence_length = 20, output_dim = 3):
        super().__init__()
        self.linear = nn.Linear(input_channels * sequence_length, output_dim)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = x.flatten(start_dim=1)
        x = self.linear(x)
        return x

# LSTM
class EcogLSTM(nn.Module):
    def __init__(self, input_size = 64, hidden_size = 128, num_layers = 1, output_size = 3):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, (h_n, c_n) = self.lstm(x) # lstm_out shape: (batch_size, seq_len, hidden_size)
        last_output = lstm_out[:, -1, :]
        output = self.fc(last_output)
        return output
 

In [21]:
def predict_and_export(model, data_loader, device, output_file_path):
    model.eval()
    all_preds, all_targets = [], []

    with torch.no_grad():
        for inputs, targets in data_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            all_preds.append(outputs.cpu().numpy())
            all_targets.append(targets.cpu().numpy())
    predictions = np.concatenate(all_preds, axis=0)
    targets = np.concatenate(all_targets, axis=0)
    
    # Save as .mat file for visualization
    savemat(output_file_path, {
        "predictions":predictions,
        "targets": targets
    })
    print("Saved predictions to ecog_predictions.mat")

    return predictions, targets

In [22]:
class PreprocessData:
    def __init__(self, ecog_file_path, motion_file_path):
        self.ecog_file_path = ecog_file_path
        self.motion_file_path = motion_file_path
        self.ecog_data = None
        self.motion_data = None
        self.filtered_ecog = None
        self.scaled_ecog = None
        self.X = None
        self.y = None
        self.scaler = None

    def process(self, eval=False, window_size=20, duration_limit=900):
        self.read_data()
        self.common_average_reference()
        self.filter_signal(eval=eval)
        self.format_data(window_size=window_size, duration_limit=duration_limit)
        return self.X, self.y
    
    def read_data(self):
        self.ecog_data = pd.read_csv(self.ecog_file_path)
        self.motion_data = pd.read_csv(self.motion_file_path)
        return self

    def common_average_reference(self):
        # Subtract the common mean from the signals 
        self.ecog_data -= np.mean(self.ecog_data, axis=1, keepdims=True)
        return self

    def filter_signal(self, eval=False):
        ecog_raw = self.ecog_data[self.ecog_data.columns[1:-1]].values

        # Apply filters
        filtered = bandpass_filter(ecog_raw, lowcut=1.0, highcut=200.0, fs=1000.0, order=4)
        denoised = notch_filter(filtered, freq=60, fs=1000.0)

        # Evaluate filters
        if eval:
            kurt_raw = kurtosis(ecog_raw, axis=0, fisher=True)
            kurt_denoised = kurtosis(denoised, axis=0, fisher=True)
            proportion_of_positive_kurtosis_signals(kurt_raw, kurt_denoised)
            compute_rmse(ecog_raw, denoised)

        # Compute Power Envelopes
        features = multiband_features(denoised, fs=1000.0) # shape (T, 192)

        # Identify the principal components of the network
        pca = PCA(n_componens = 64, random_state=42)
        reduced = pca.fit_transform(features)

        # Scale
        self.scaler = StandardScaler()
        self.scaled_ecog = self.scaler.fit_transform(reduced)

        # Replace in DataFrame
        self.ecog_data = self.ecog_data.copy()
        self.ecog_data[self.ecog_data.columns[1:-1]] = self.scaled_ecog

        # Clean memory
        del ecog_raw, filtered, denoised
        gc.collect()
        return self

    def format_data(self, window_size=20, duration_limit=900):
        ecog_df = self.ecog_data[self.ecog_data["Time"] <= duration_limit]
        motion_df = self.motion_data[self.motion_data["Motion_time"] <= duration_limit]

        ecog_values = ecog_df[ecog_df.columns[1:-1]].values
        motion_values = motion_df[motion_df.columns[2:]].values

        X, y = create_overlapping_windows(ecog_values, motion_values, window_size=20, hop_size=10)
        self.X, self.y = X, y
        
        print(self.X.shape)
        print(self.y.shape)    
        
        # Clean up
        del ecog_values, motion_values
        gc.collect()

    def save(self):
        output_file_path_base = self.ecog_file_path.strip("ecog_data.csv")
        joblib.dump(self.scaler, output_file_path_base + "scaler_ecog.pkl")
        np.save(output_file_path_base + "X.npy", self.X)
        np.save(output_file_path_base + "y.npy", self.y)


In [23]:
def train_model(model, train_loader, val_loader, device, epochs=20, model_name="model", example_input=torch.rand(1,20,64), checkpoint_dir="models/best_ecog_model.pth"):
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
    scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5)
    writer = SummaryWriter(log_dir='runs/' + model_name)
    best_val_loss = float('inf')
    early_stop_counter = 0
    patience = 10 # epochs
    
    # Add the model graph to TensorBoard using example_input
    if example_input is not None:
        writer.add_graph(model, example_input.to(device))

    train_losses = []
    val_losses = []
    r2_scores = []
    
    for epoch in range(epochs):
        # Train
        model.train()
        running_train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item() * X_batch.size(0)
        avg_train_loss = running_train_loss / len(train_loader.dataset)
        train_losses.append(avg_train_loss)
        
        # Validate
        model.eval()
        running_val_loss = 0.0
        all_preds = []
        all_targets = []
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                preds = model(X_batch)
                loss = criterion(preds, y_batch)
                running_val_loss += loss.item() * X_batch.size(0)
                all_preds.append(preds.cpu())
                all_targets.append(y_batch.cpu())
        all_preds = torch.cat(all_preds).numpy()
        all_targets = torch.cat(all_targets).numpy()
        r2 = r2_score(all_targets, all_preds)
        r2_scores.append(r2)
        avg_val_loss = running_val_loss / len(val_loader.dataset)
        val_losses.append(avg_val_loss)

        # Log to TensorBoard
        writer.add_scalar("Loss/Train", avg_train_loss, epoch)
        writer.add_scalar("Loss/Validation", avg_val_loss, epoch)
        writer.add_scalar("R2/Validation", r2, epoch)
        writer.add_scalar("Learning Rate", optimizer.param_groups[0]['lr'], epoch)
        
        print(f"{model_name} Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.6f} | Val Loss: {avg_val_loss:.6f} | R2: {r2:.6f}")
        
        scheduler.step(avg_val_loss)

        # Save best model checkpoint
        if avg_val_loss < best_val_loss - 1e-5:
            best_val_loss = avg_val_loss
            early_stop_counter = 0
            print(f"Model Checkpoint | epoch: {epoch} | best_val_loss: {best_val_loss}")
            torch.save(model.state_dict(), checkpoint_dir)
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break
    
    writer.close()
    return train_losses, val_losses, r2_scores


In [62]:
def create_six_motion_outputs_for_df(df, wrist : Literal ["LEFT", "RIGHT"] = "RIGHT"):
    if wrist == "RIGHT":
        # Assuming your DataFrame is named df
        # Create new zero columns
        new_columns = ['Left_Wrist_X', 'Left_Wrist_Y', 'Left_Wrist_Z']
        zero_data = np.zeros((df.shape[0], len(new_columns)))
    elif wrist == "LEFT":
        # Assuming your DataFrame is named df
        # Create new zero columns
        new_columns = ['Right_Wrist_X', 'Right_Wrist_Y', 'Right_Wrist_Z']
        zero_data = np.zeros((df.shape[0], len(new_columns)))
    else:
        raise ValueError("wrist must be either LEFT or RIGHT")
    # Create a temporary DataFrame with zero columns
    df_zeros = pd.DataFrame(zero_data, columns=new_columns)
    # Concatenate the original DataFrame and the zero DataFrame
    df_combined = pd.concat([df, df_zeros], axis=1)
    # Reorder the columns to the desired order
    desired_order = [
        'Fsm', 'Left_Wrist_X', 'Left_Wrist_Y', 'Left_Wrist_Z',
        'Motion_time', 'Right_Wrist_X', 'Right_Wrist_Y', 'Right_Wrist_Z'
    ]
    df_ordered = df_combined[desired_order]
    del df
    gc.collect()
    return df_ordered

In [24]:
def plot_losses(losses_dict):
    plt.figure(figsize=(10,6))
    for model_name, (train_losses, val_losses) in losses_dict.items():
        plt.plot(train_losses, label=f"{model_name} Train")
        plt.plot(val_losses, label=f"{model_name} Val")
    plt.xlabel("Epoch")
    plt.ylabel("Loss (MSE)")
    plt.title("Training and Validation Loss Curves")
    plt.legend()
    plt.grid(True)
    plt.show()


In [25]:
os.path.join(os.getcwd(), "src/", "motor_cortex/data/")

'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/'

In [27]:
motion_data_file_l = glob(os.path.join(os.getcwd(), "src/", "motor_cortex/data/data/", "**", "motion*.csv"), recursive=True)
ecog_data_file_l = glob(os.path.join(os.getcwd(), "src/", "motor_cortex/data/data/", "**", "ecog*.csv"), recursive=True)

# Reading only the Right Wrist data and training to detect depending on the Right Wrist

## Bilateral Data

In [30]:
motion_data_file_l[0]

'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Bilateral/2018-07-12_(S1)/motion_data.csv'

In [31]:
motion_data_file = motion_data_file_l[0]

In [32]:
ecog_data_file = ecog_data_file_l[0]

In [33]:
motion_data_file

'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Bilateral/2018-07-12_(S1)/motion_data.csv'

In [34]:
ecog_data_file

'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Bilateral/2018-07-12_(S1)/ecog_data.csv'

In [35]:
motion_data_bilateral_2018_07_12_S1 = pd.read_csv(motion_data_file)

In [None]:
motion_data_bilateral_2018_07_12_S1["Motion_time"]

0           0.000
1           0.024
2           0.044
3           0.062
4           0.082
           ...   
51288    1026.151
51289    1026.170
51290    1026.191
51291    1026.211
51292    1026.241
Name: Motion_time, Length: 51293, dtype: float64

In [37]:
ecog_data_bilateral_2018_07_12_S1 = pd.read_csv(ecog_data_file)
channel_data = ecog_data_bilateral_2018_07_12_S1.columns[1:-1].values
ecog_data_bilateral_2018_07_12_S1[channel_data].values

array([[ 548.44,  445.42,  398.44, ...,  406.98,  382.4 ,  754.9 ],
       [ 469.38,  417.6 ,  299.79, ...,  245.42,  405.31,  654.38],
       [ 321.88,  202.92,  220.83, ...,   97.4 ,  346.46,  533.23],
       ...,
       [-298.85,  -66.67,  116.15, ..., -176.35,  -57.81,  -28.02],
       [-389.06,  -96.04,  127.29, ..., -180.  ,  -88.54,  -81.25],
       [-288.85,   32.4 ,  161.15, ..., -138.65, -104.17, -129.58]],
      shape=(1024398, 64))

In [51]:
print(motion_data_bilateral_2018_07_12_S1)

       Fsm  Left_Wrist_X  Left_Wrist_Y  Left_Wrist_Z  Motion_time  \
0       50     -0.589976     -0.590399     -0.228992        0.000   
1       50     -0.587234     -0.570032     -0.211194        0.024   
2       50     -0.577829     -0.577735     -0.216256        0.044   
3       50     -0.525398     -0.500043     -0.190533        0.062   
4       50     -0.567822     -0.568262     -0.240272        0.082   
...    ...           ...           ...           ...          ...   
51288   50      0.983919      1.375122     -0.250602     1026.151   
51289   50      0.996104      1.382786     -0.243810     1026.170   
51290   50      0.987838      1.387219     -0.243038     1026.191   
51291   50      0.978803      1.392886     -0.242469     1026.211   
51292   50      1.059255      1.383485     -0.169499     1026.241   

       Right_Wrist_X  Right_Wrist_Y  Right_Wrist_Z  
0          -2.308375       0.156087       1.136016  
1          -2.320215       0.178141       1.007657  
2          -

In [41]:
current_motion_data_file = motion_data_file_l[8]

In [49]:
current_motion_data_file_df = pd.read_csv(current_motion_data_file)

In [50]:
print(current_motion_data_file_df)

       Fsm  Motion_time  Right_Wrist_X  Right_Wrist_Y  Right_Wrist_Z
0       50        0.000      -0.400157       0.707055      -0.247075
1       50        0.019      -0.398422       0.706933      -0.246666
2       50        0.040      -0.393779       0.700217      -0.246530
3       50        0.059      -0.402774       0.705328      -0.245737
4       50        0.082      -0.394572       0.701167      -0.245586
...    ...          ...            ...            ...            ...
46222   50      924.813      -0.683175       0.404797      -0.382371
46223   50      924.833      -0.679442       0.397681      -0.381935
46224   50      924.852      -0.675566       0.394216      -0.382246
46225   50      924.873      -0.677858       0.397714      -0.381256
46226   50      924.892      -0.671611       0.391159      -0.381112

[46227 rows x 5 columns]


In [60]:
current_motion_data_file_df_update = create_six_motion_outputs_for_df(current_motion_data_file_df, wrist="RIGHT")

In [61]:
current_motion_data_file_df_update

Unnamed: 0,Fsm,Left_Wrist_X,Left_Wrist_Y,Left_Wrist_Z,Motion_time,Right_Wrist_X,Right_Wrist_Y,Right_Wrist_Z
0,50,0.0,0.0,0.0,0.000,-0.400157,0.707055,-0.247075
1,50,0.0,0.0,0.0,0.019,-0.398422,0.706933,-0.246666
2,50,0.0,0.0,0.0,0.040,-0.393779,0.700217,-0.246530
3,50,0.0,0.0,0.0,0.059,-0.402774,0.705328,-0.245737
4,50,0.0,0.0,0.0,0.082,-0.394572,0.701167,-0.245586
...,...,...,...,...,...,...,...,...
46222,50,0.0,0.0,0.0,924.813,-0.683175,0.404797,-0.382371
46223,50,0.0,0.0,0.0,924.833,-0.679442,0.397681,-0.381935
46224,50,0.0,0.0,0.0,924.852,-0.675566,0.394216,-0.382246
46225,50,0.0,0.0,0.0,924.873,-0.677858,0.397714,-0.381256


In [None]:
ecog_data_bilateral_2018_07_12_S1["Time"]

In [None]:
plt.plot(ecog_data_bilateral_2018_07_12_S1["Time"], ecog_data_bilateral_2018_07_12_S1[channel_data].values)

In [None]:
ecog_channels = ecog_data_bilateral_2018_07_12_S1.columns[1:-1]

In [None]:
ecog_data_bilateral_2018_07_12_S1[ecog_channels].values

In [None]:
motion_data_bilateral_2018_07_12_S1_left_wrist = motion_data_bilateral_2018_07_12_S1[motion_data_bilateral_2018_07_12_S1.columns[1:4]]

In [None]:
motion_data_bilateral_2018_07_12_S1_left_wrist

In [None]:
motion_data_bilateral_2018_07_12_S1_left_wrist

In [None]:
ecog_data_bilateral_2018_07_12_S1[channel_data]

In [None]:
del ecog_data_bilateral_2018_07_12_S1

## Ipsilateral Data (Right Wrist)

In [28]:
ecog_data_file_l[8]

'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/ecog_data.csv'

In [29]:
ecog_data_file_l[8]

'/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/ecog_data.csv'

In [None]:
current_ecog_data_file = ecog_data_file_l[8]
current_motion_data_file = motion_data_file_l[8]
preprocessor = PreprocessData(current_ecog_data_file, current_motion_data_file)
X, y = preprocessor.process()
preprocessor.save()

In [None]:
# Preprocess Ipsilateral Data
# for index in range(6, 16):
#     preprocessor = PreprocessData(ecog_data_file_l[index], motion_data_file_l[index])
#     X, y = preprocessor.process()
#     preprocessor.save()

In [None]:
ecog_data_file_l[16:ecog_data_file_l.__len__()]

In [None]:
# # Preprocess Contralateral Data
# for index in range(16, ecog_data_file_l.__len__()):
#     preprocessor = PreprocessData(ecog_data_file_l[index], motion_data_file_l[index])
#     X, y = preprocessor.process()
#     preprocessor.save()

In [None]:
# Read in the data
processed_data_l_X = glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', '**', "**", "X.npy"))
processed_data_l_y = glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', '**', "**", "y.npy"))

In [None]:
processed_data_l_X

In [None]:
processed_data_l_y

In [None]:
# # Perform K-Fold Cross Validation
# iterator = iter(processed_data_l)

# for X, y in zip(iterator, iterator):
#     print(X)
#     print(y)

# Create k-fold cross validation
# select the best model
# make a prediction
# visualize the predictions in matlab
# create a live demo
# deploy demo onto the web
# share results for testing with real people


In [None]:
# Define K-fold sets
test_list_X = []
train_list_X = []
test_list_y = []
train_list_y = []

for i in range(len(processed_data_l_X)):
    test_list_X.append(processed_data_l_X[i])
    test_list_y.append(processed_data_l_y[i])
    train_X = [x for idx, x in enumerate(processed_data_l_X) if idx != i]
    train_y = [y for idx, y in enumerate(processed_data_l_y) if idx != i]
    train_list_X.append(train_X)
    train_list_y.append(train_y)

In [None]:
INDEX = 0

In [None]:
X = np.load(train_list_X[INDEX][0])
y = np.load(train_list_y[INDEX][0])

In [None]:
train_list_X

In [None]:
train_list_y[0]

In [None]:
X.shape

In [None]:
y.shape

In [None]:
# Creating Train and Validation Sets
dataset = EcogMotionDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

In [None]:
# Defining the model

# Train Linear Model
input_channels = X.shape[2]
sequence_length = X.shape[1]
model = LinearEcogToMotionNet(input_channels, sequence_length)

# Train 1D CNN
# model = EcogToMotionNet()

# Train LSTM
# model = EcogLSTM(input_size=64, hidden_size=128, num_layers=1, output_size=3)


## Linear Model

In [None]:
# Linear Model 

# Train Linear Model
input_channels = X.shape[2]
sequence_length = X.shape[1]
linear_model = LinearEcogToMotionNet(input_channels, sequence_length)

linear_model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(linear_model.parameters(), lr=1e-3)

# Training loop
for epoch in range(20):
    linear_model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        preds = linear_model(X_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)
    train_loss /= len(train_loader.dataset)
    linear_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            preds = linear_model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val.Loss: {val_loss:.4f}")

## Convolutional Neural Network

In [None]:
# Train 1D CNN
cnn_model = EcogToMotionNet()

cnn_model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=1e-3)

# Training loop
for epoch in range(20):
    cnn_model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        preds = cnn_model(X_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)
    train_loss /= len(train_loader.dataset)
    cnn_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            preds = cnn_model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(train_loader.dataset)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val.Loss: {val_loss:.4f}")

## Long Short-Term Memory Model

In [None]:
lstm_model = EcogLSTM(input_size=64, hidden_size=128, num_layers=1, output_size=3)
lstm_model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-3)

# Training loop
for epoch in range(20):
    lstm_model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        preds = lstm_model(X_batch)
        loss = criterion(preds, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)
    train_loss /= len(train_loader.dataset)
    lstm_model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            preds = lstm_model(X_batch)
            loss = criterion(preds, y_batch)
            val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val.Loss: {val_loss:.4f}")

# Refined Model Training

In [None]:
# Example usage:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define Training and Test data
# Read in the data
processed_data_l_X = glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', '**', "**", "X.npy"))
processed_data_l_y = glob(os.path.join('/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/', '**', "**", "y.npy"))
# Define K-fold sets
test_list_X = []
train_list_X = []
test_list_y = []
train_list_y = []

for i in range(len(processed_data_l_X)):
    test_list_X.append(processed_data_l_X[i])
    test_list_y.append(processed_data_l_y[i])
    train_X = [x for idx, x in enumerate(processed_data_l_X) if idx != i]
    train_y = [y for idx, y in enumerate(processed_data_l_y) if idx != i]
    train_list_X.append(train_X)
    train_list_y.append(train_y)

# Load a single specific dataset
# K-Fold 0 uses session 7 as the test set:
KFOLD = 0
SESSION_SET = 6

"""
['/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-24_(S10)/X.npy', 0
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S1)/X.npy', 1
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-06_(S6)/X.npy', 2
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S5)/X.npy', 3
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S3)/X.npy', 4
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-03_(S4)/X.npy', 5
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-10_(S8)/X.npy', 6 
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-05-17_(S9)/X.npy', 7
 '/home/linux-pc/gh/CRCNS/src/motor_cortex/data/data/Ipsilateral/2018-04-29_(S2)/X.npy'] 8
"""

X = np.load(train_list_X[KFOLD][SESSION_SET])
y = np.load(train_list_y[KFOLD][SESSION_SET])

# X = np.load(test_list_X[0]) # Identify the test set
# y = np.load(test_list_y[0]) # Identify the test set

# Creating Train and Validation Sets
dataset = EcogMotionDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)

# Assuming train_loader, val_loader, criterion are defined

# 2. CNN_LSTM Hybrid Model
hybrid_model = EcogToMotionNet()
criterion = nn.MSELoss()
hybrid_train_losses, hybrid_val_losses, hybrid_r2 = train_model(hybrid_model, train_loader, val_loader,device, epochs=100, model_name="Hybrid_CNN_LSTM")



CNN Epoch 1/100 | Train Loss: 0.792474 | Val Loss: 0.774172 | R2: 0.253328
Model Checkpoint | epoch: 0 | best_val_loss: 0.7741717590975992
CNN Epoch 2/100 | Train Loss: 0.726997 | Val Loss: 0.736752 | R2: 0.289478
Model Checkpoint | epoch: 1 | best_val_loss: 0.7367517074150091
CNN Epoch 3/100 | Train Loss: 0.705741 | Val Loss: 0.796277 | R2: 0.232063
CNN Epoch 4/100 | Train Loss: 0.683208 | Val Loss: 0.721690 | R2: 0.304022
Model Checkpoint | epoch: 3 | best_val_loss: 0.7216902985475052
CNN Epoch 5/100 | Train Loss: 0.675859 | Val Loss: 0.723419 | R2: 0.302353
CNN Epoch 6/100 | Train Loss: 0.670785 | Val Loss: 0.673190 | R2: 0.350776
Model Checkpoint | epoch: 5 | best_val_loss: 0.6731901810306723
CNN Epoch 7/100 | Train Loss: 0.658786 | Val Loss: 0.673207 | R2: 0.350736
CNN Epoch 8/100 | Train Loss: 0.651000 | Val Loss: 0.670014 | R2: 0.353841
Model Checkpoint | epoch: 7 | best_val_loss: 0.6700141186885853
CNN Epoch 9/100 | Train Loss: 0.638856 | Val Loss: 0.650128 | R2: 0.373013
Model

NameError: name 'cnn_train_losses' is not defined

In [None]:
# 1. LSTM
# lstm_model = EcogLSTM(input_size=64, hidden_size=128, num_layers=1, output_size=3)
# lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-3)
# lstm_train_losses, lstm_val_losses = train_model(lstm_model, train_loader, val_loader, criterion, lstm_optimizer, device, epochs=20, model_name="LSTM")

# 3. Linear
# input_channels = X.shape[2]
# sequence_length = X.shape[1]
# linear_model = LinearEcogToMotionNet(input_channels, sequence_length)
# linear_optimizer = torch.optim.Adam(linear_model.parameters(), lr=1e-3)
# linear_train_losses, linear_val_losses = train_model(linear_model, train_loader, val_loader, criterion, linear_optimizer, device, epochs=20, model_name="Linear")


In [None]:

# # Plot all losses together
# plot_losses({
#     # "LSTM": (lstm_train_losses, lstm_val_losses),
#     "CNN": (cnn_train_losses, cnn_val_losses),
#     # "Linear": (linear_train_losses, linear_val_losses)
# })



## Loading the model and making predictions

In [None]:
# Recreate the model structure
cnn_model = EcogToMotionNet()
cnn_model.load_state_dict(torch.load("models/Hybrid_best_model_session_8_ipsilateral_2018_05_10.pth"))
cnn_model.to(device)
cnn_model.eval()

In [None]:
# Define K-fold sets
test_list_X = []
train_list_X = []
test_list_y = []
train_list_y = []

for i in range(len(processed_data_l_X)):
    test_list_X.append(processed_data_l_X[i])
    test_list_y.append(processed_data_l_y[i])
    train_X = [x for idx, x in enumerate(processed_data_l_X) if idx != i]
    train_y = [y for idx, y in enumerate(processed_data_l_y) if idx != i]
    train_list_X.append(train_X)
    train_list_y.append(train_y)

In [None]:
test_list_X[1]

In [None]:
test_list_y[1]

In [None]:
train_list_X[0][1]

In [None]:
# Trained on the 8th session and predicting on the first session

In [None]:
current_ecog_data_file = ecog_data_file_l[8]
current_motion_data_file = motion_data_file_l[8]
preprocessor = PreprocessData(current_ecog_data_file, current_motion_data_file)
X, y = preprocessor.process()
preprocessor.save()

In [None]:
# Trained using data from session 8; Testing using data from session 1
X = np.load(train_list_X[0][1])
y = np.load(train_list_y[0][1])
scaler = joblib.load(train_list_X[0][1].strip("X.npy") + "scaler_ecog.pkl")

In [None]:
# Creating Train and Validation Sets
dataset = EcogMotionDataset(X, y)
test_loader = DataLoader(dataset, batch_size=64, shuffle = True)

In [None]:
# Example Call
cnn_model.to(device)
output_file_path = train_list_X[0][1].strip("X.npy") + "ecog_predictions.mat"
predictions, targets = predict_and_export(cnn_model, test_loader, device, output_file_path)