In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cd /content/drive/MyDrive/be_lab

## Dependencies

In [None]:
!pip install torch
!pip install torchsummary
!pip install torchvision
!pip install scipy
!pip install einops
!pip install transformers
!pip install transformers[torch]

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:

import argparse
import os
import numpy as np
import math
import scipy.io
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
from torchvision.utils import save_image, make_grid

from torch.utils.data import DataLoader
from torch.autograd import Variable
import torch.autograd as autograd

from torch.nn.utils import weight_norm
from torch.autograd import Variable

from transformers import Trainer, TrainingArguments
import torch.nn.init as init
from torch.utils.data import Dataset


import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

from torch.backends import cudnn
cudnn.benchmark = False
cudnn.deterministic = True

from torch.nn import MSELoss




**Model**

In [None]:
class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()


class Chomp2d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp2d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :, :-self.chomp_size].contiguous()


class TemporalBlockPro(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlockPro, self).__init__()
        # Define the convolutional layers without applying weight_norm here
        conv1 = nn.Conv2d(n_inputs, n_outputs, (1, kernel_size),
                          stride=stride, padding=(0, padding), dilation=(1, dilation))
        conv2 = nn.Conv2d(n_outputs, n_outputs, (1, kernel_size),
                          stride=stride, padding=(0, padding), dilation=(1, dilation))

        # Apply weight_norm directly in the sequential model
        self.net = nn.Sequential(
            weight_norm(conv1), Chomp2d(padding), nn.PReLU(), nn.Dropout(dropout),
            weight_norm(conv2), Chomp2d(padding), nn.PReLU(), nn.Dropout(dropout)
        )

        self.downsample = nn.Conv2d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        if self.downsample is not None:
            self.downsample = weight_norm(self.downsample)
        self.relu = nn.PReLU()
        self.init_weights()

    def init_weights(self):
        # Initialize weights for conv1 and conv2 if needed. Since weight_norm is applied within the sequential,
        # you might need to access the first and fifth layers of self.net for weight initialization, if necessary.
        pass  # Add weight initialization code here if needed

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)


class TemporalConvNetPro(nn.Module):
    def __init__(self, num_channels, num_eeg_chan=32, freq=6, kernel_size=2, dropout=0.2, early_fusion=True):
        super(TemporalConvNetPro, self).__init__()
        self.early_fusion = early_fusion
        if early_fusion:
            self.fusion_layer = weight_norm(nn.Conv2d(
                in_channels=num_channels[0], out_channels=num_channels[0],
                kernel_size=(num_eeg_chan, 1), stride=(1, 1)
            ))
        else:
            self.fusion_layer = nn.Identity()
        self.space_aware_temporal_layer = nn.Sequential(
            weight_norm(nn.Conv2d(
                in_channels=1, out_channels=num_channels[0],
                kernel_size=(freq, kernel_size), stride=(freq, 1),
                dilation=(1, 2), padding=(0, ((kernel_size - 1) * 2)))),
            Chomp2d((kernel_size - 1) * 2),
            nn.PReLU(),
            nn.Dropout(dropout),
            self.fusion_layer
        )
        layers = []
        num_levels = len(num_channels) - 1
        for i in range(num_levels):
            dilation_size = 2 ** (i+2)
            in_channels = num_channels[i] if i == 0 else num_channels[i]
            out_channels = num_channels[i+1]
            layers += [TemporalBlockPro(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=int((kernel_size - 1) * dilation_size), dropout=dropout)]

        self.network = nn.Sequential(*layers)
        self.init_weights()

    def init_weights(self):
        self.space_aware_temporal_layer[0].weight.data.normal_(0, 0.01)
        if self.early_fusion:
            self.fusion_layer.weight.data.normal_(0, 0.01)

    def forward(self, x):
        x = self.space_aware_temporal_layer(x)
        return self.network(x)


class SpaceAwareTemporalBlock(nn.Module):
    def __init__(self, in_channels=1, out_channels=32, num_eeg_chan=32, freq=6, kernel_size=2, dropout=0.2, early_fusion=True):
        super(SpaceAwareTemporalBlock, self).__init__()
        self.early_fusion = early_fusion
        # Initialize the fusion layer for early fusion
        if early_fusion:
            self.fusion_layer = weight_norm(nn.Conv2d(
                in_channels=out_channels, out_channels=out_channels,
                kernel_size=(num_eeg_chan, 1), stride=(1, 1)
            ))
        else:
            self.fusion_layer = nn.Identity()

        # Use self.fusion_layer directly if early_fusion is False, otherwise create a separate instance for the sequential model
        fusion_layer_for_sequential = self.fusion_layer if not early_fusion else weight_norm(nn.Conv2d(
            in_channels=out_channels, out_channels=out_channels,
            kernel_size=(num_eeg_chan, 1), stride=(1, 1)
        ))

        self.space_aware_temporal_layer = nn.Sequential(
            weight_norm(nn.Conv2d(
                in_channels=in_channels, out_channels=out_channels,
                kernel_size=(freq, kernel_size), stride=(freq, 1),
                dilation=(1, 2), padding=(0, ((kernel_size - 1) * 2)))),
            Chomp2d((kernel_size - 1) * 2),
            nn.PReLU(),
            nn.Dropout(dropout),
            fusion_layer_for_sequential
        )
        self.init_weights()

    def forward(self, x):
        return self.space_aware_temporal_layer(x)

    def init_weights(self):
        self.space_aware_temporal_layer[0].weight.data.normal_(0, 0.01)
        if self.early_fusion:
            # Initialize weights for the fusion layer used in the sequential model
            self.space_aware_temporal_layer[4].weight.data.normal_(0, 0.01)
            # Also initialize weights for the self.fusion_layer if it's going to be used elsewhere
            self.fusion_layer.weight.data.normal_(0, 0.01)


class TemporalConvNetProM(nn.Module):
    def __init__(self, num_channels, num_eeg_chan=32, freq=6, kernel_size=[2, 4, 6], dropout=0.2, early_fusion=True):
        super(TemporalConvNetProM, self).__init__()
        self.early_fusion = early_fusion
        self.sa_tcn_1 = SpaceAwareTemporalBlock(
            out_channels=num_channels[0], num_eeg_chan=num_eeg_chan,
            freq=freq, kernel_size=kernel_size[0], dropout=dropout, early_fusion=early_fusion)

        self.sa_tcn_2 = SpaceAwareTemporalBlock(
            out_channels=num_channels[0], num_eeg_chan=num_eeg_chan,
            freq=freq, kernel_size=kernel_size[1], dropout=dropout, early_fusion=early_fusion)

        self.sa_tcn_3 = SpaceAwareTemporalBlock(
            out_channels=num_channels[0], num_eeg_chan=num_eeg_chan,
            freq=freq, kernel_size=kernel_size[2], dropout=dropout, early_fusion=early_fusion)

        layers = []
        num_levels = len(num_channels) - 1
        for i in range(num_levels):
            dilation_size = 2 ** (i+2)
            in_channels = num_channels[i]
            out_channels = num_channels[i+1]
            layers += [TemporalBlockPro(in_channels, out_channels, kernel_size[1], stride=1, dilation=dilation_size,
                                     padding=int((kernel_size[1] - 1) * dilation_size), dropout=dropout)]

        self.OneByOneConv = weight_norm(nn.Conv2d(
                in_channels=3*num_channels[0], out_channels=num_channels[0],
                kernel_size=(1, 1), stride=(1, 1)
            ))
        self.OneByOneConv.weight.data.normal_(0, 0.01)
        self.pure_temporal_layers = nn.Sequential(*layers)

    def forward(self, x):
        x1 = self.sa_tcn_1(x)
        x2 = self.sa_tcn_2(x)
        x3 = self.sa_tcn_3(x)

        x = torch.cat((x1, x2, x3), dim=1)
        x = self.OneByOneConv(x)
        return self.pure_temporal_layers(x)

In [None]:

class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)


class SA_TCN(nn.Module):
    def __init__(self, cnn1d_channels=[128, 128, 128], cnn1d_kernel_size=5,
                 cnn1d_dropout_rate=0.1, num_eeg_chan=32, freq=6, output_dim=1, early_fusion=True, model_type='reg'):
        super().__init__()
        self.output_dim = output_dim
        self.mode = model_type
        if self.mode == 'cls':
            assert output_dim > 1, "This model support at least binary classification. output_dim should > 1."
        self.temporal = TemporalConvNetPro(num_channels=cnn1d_channels, num_eeg_chan=num_eeg_chan, freq=freq,
                                           kernel_size=cnn1d_kernel_size, dropout=cnn1d_dropout_rate,
                                           early_fusion=early_fusion)
        self.regressor = nn.Linear(cnn1d_channels[-1], output_dim)

    def forward(self, x):
        # x: batch, 1, hidden, seq
        x = self.temporal(x).transpose(1, 3).contiguous()
        x = x.squeeze(-2)
        x = self.regressor(x).contiguous()
        if self.mode == 'cls':
            x = torch.mean(x, dim=1)
        return x


class MASA_TCN(nn.Module):
    def __init__(self, cnn1d_channels=[128, 128, 128], cnn1d_kernel_size=[3, 5, 15],
                 cnn1d_dropout_rate=0.1, num_eeg_chan=32, freq=6,
                 output_dim=1, early_fusion=True, model_type='reg'):
        super().__init__()
        self.output_dim = output_dim
        self.mode = model_type
        if self.mode == 'cls':
            assert output_dim > 1, "This model support at least binary classification. output_dim should > 1."
        self.temporal = TemporalConvNetProM(num_channels=cnn1d_channels, num_eeg_chan=num_eeg_chan, freq=freq,
                                            kernel_size=cnn1d_kernel_size, dropout=cnn1d_dropout_rate,
                                            early_fusion=early_fusion)
        self.regressor = nn.Linear(cnn1d_channels[-1], output_dim)

    def forward(self, x):
        # x: batch, 1, hidden, seq
        x = self.temporal(x).transpose(1, 3).contiguous()
        x = x.squeeze(-2)
        x = self.regressor(x).contiguous()
        if self.mode == 'cls':
            x = torch.mean(x, dim=1)
        return x

In [None]:
class MASATCNforTraining(nn.Module):
    def __init__(self, **kwargs) -> None:
        super(MASATCNforTraining, self).__init__()
        self.network = MASA_TCN(cnn1d_channels=[128, 128, 128],
                                cnn1d_kernel_size=[3, 5, 15],
                                cnn1d_dropout_rate=0.1,
                                num_eeg_chan=5000,
                                freq=1,
                                output_dim=1,
                                early_fusion=True,
                                model_type='reg')

    def forward(self, input, label = None):
        input = input.unsqueeze(1)
        input = torch.transpose(input, 2, 3)
        input = input.reshape(4, 1, -1)
        input = input.unsqueeze(-1)
        outputs = self.network(input)
        outputs = outputs.squeeze(-1)
        outputs = outputs.transpose(0, 1)


        return outputs

In [None]:

from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

batch_size = 4

def pad_collate(batch):
    # Extract inputs and labels from the batch
    inputs = [item['input'] for item in batch]
    labels = [item['label'] for item in batch]

    # Check if the batch size is not a multiple of 4
    required_batch_size = batch_size
    shortfall = len(batch) % required_batch_size
    if shortfall > 0:
        # Calculate how many samples to add
        samples_to_add = required_batch_size - shortfall
        # Randomly select samples to add
        for _ in range(samples_to_add):
            random_sample = random.choice(batch)  # Assuming 'random' is already imported
            inputs.append(random_sample['input'])
            labels.append(random_sample['label'])

    # Pad the inputs to have the same length
    inputs_padded = pad_sequence(inputs, batch_first=True, padding_value=0)

    # Stack labels into a single tensor
    labels = torch.stack(labels)

    return {'input': inputs_padded, 'label': labels}



class EEGDataset(Dataset):
    def __init__(self, root_dir, max_timesteps=1000):
        self.max_timesteps = max_timesteps
        self.data_files = []
        self.portion_counts = []  # Store the number of portions per file
        self.labels = []
        for folder_name in os.listdir(root_dir):
            if '(' in folder_name and ')' in folder_name:
                label = int(folder_name.split('(')[-1].split(')')[0])
                folder_path = os.path.join(root_dir, folder_name)
                for file_name in os.listdir(folder_path):
                    if file_name.endswith('.mat') and file_name != 'FFT.mat':
                        file_path = os.path.join(folder_path, file_name)
                        self.data_files.append(file_path)
                        self.labels.append(label)
                        # Determine how many portions this file will be split into
                        data = scipy.io.loadmat(file_path)['data']
                        portions = math.ceil(data.shape[1] / max_timesteps)
                        self.portion_counts.append(portions)

    def __len__(self):
        return sum(self.portion_counts)

    def __getitem__(self, idx):
        # Find which file and which portion this index corresponds to
        file_idx = 0
        while idx >= self.portion_counts[file_idx]:
            idx -= self.portion_counts[file_idx]
            file_idx += 1
        portion_idx = idx
        label = self.labels[file_idx]
        data_path = self.data_files[file_idx]
        data = scipy.io.loadmat(data_path)['data']
        # Calculate the start and end indices for this portion
        start_idx = portion_idx * self.max_timesteps
        end_idx = min((portion_idx + 1) * self.max_timesteps, data.shape[1])
        #print(end_idx)
        # Slice the data for this portion
        data_portion = data[:, start_idx:end_idx]
        data_portion = torch.tensor(data_portion, dtype=torch.float32)
        return {'input': data_portion, 'label_ids': label, 'label': torch.tensor(label, dtype=torch.long)}

def model_init():
    return MASATCNforTraining()  # Initialize your modified Conformer model here



data_collator = pad_collate


## Training Configs

In [None]:
class CustomTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.loss_fn = MSELoss()
        # self.total_loss = []

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs["label"].float()  # Ensure labels are float for MSE Loss
        outputs = model(**inputs)
        # Reshape labels to match output shape (batch_size, 1)
        labels = labels.view(-1, 1)
        labels = labels.transpose(0, 1)
        loss = self.loss_fn(outputs, labels)
        # print(loss)

        return (loss, outputs) if return_outputs else loss



training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/be_lab/results',
    num_train_epochs=2000,
    per_device_train_batch_size=batch_size,
    logging_dir='/content/drive/MyDrive/be_lab/logs',
    logging_steps=10,
    learning_rate=2e-4,
    lr_scheduler_type='cosine',
    save_steps=2000,
    warmup_steps=100,
    warmup_ratio=0.1,

)


trainer = CustomTrainer(
    model_init=model_init,
    args=training_args,
    train_dataset=EEGDataset(root_dir='/content/drive/MyDrive/be_lab/data/G01_data_cut'), # train dataset path
    data_collator=data_collator,
)





tensor(0.0411, device='cuda:0', grad_fn=<MseLossBackward0>)


Step,Training Loss
22010,0.0312
22020,0.035
22030,0.0201
22040,0.051
22050,0.0356
22060,0.0663
22070,0.0793
22080,0.0266
22090,0.037
22100,0.0783


[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
tensor(0.1290, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0354, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0628, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0105, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1136, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0096, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1165, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0403, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0329, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1070, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1682, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0187, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0116, device='cuda:0', grad_fn=<MseLossBackward0>)
tenso

KeyboardInterrupt: 

## Train !

In [None]:
#trainer.train()

trainer.train(resume_from_checkpoint = "/content/drive/MyDrive/be_lab/results/checkpoint-22000") # checkpoint path

## Inference !

In [None]:
!set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
from safetensors.torch import load_model, save_model
#PYTORCH_CUDA_ALLOC_CONF=expandable_segments
def perform_inference():
    # Initialize the model
    model = trainer.model
    model.eval()  # Set the model to evaluation mode

    # DataLoader for inference dataset
    inference_dataset = EEGDataset(root_dir='/content/drive/MyDrive/be_lab/data/test_0519')  # Inference data path
    inference_loader = DataLoader(inference_dataset, batch_size=4, collate_fn=pad_collate)

    # Check the size of the dataset
    print(f"Dataset size: {len(inference_dataset)}")

    # Try to get the first item from the dataset
    if len(inference_dataset) > 0:
        first_item = inference_dataset[0]
        print(f"First item keys: {first_item.keys()}")
    else:
        print("Dataset is empty. Check the dataset path and contents.")

    total_error = 0
    batches = 0

    # Perform inference
    with torch.no_grad():  # No need to track gradients
        for batch in inference_loader:
            batches += 1
            inputs = batch['input']
            inputs = inputs.to(torch.device('cuda:0'))
            true_labels = batch['label'].to(torch.device('cuda:0'))  # Assuming you want to compare against true labels
            outputs = model(inputs)
            print(outputs)
            print(true_labels)
            predicted_labels = outputs.to(torch.device('cuda:0'))
            total_error += abs(predicted_labels - true_labels).sum()

    print("average_error = ", total_error / batches / 4)
    print("batches = ", batches)
    print("total_error = ", total_error)
    print("len of dataset = ", len(inference_dataset))


perform_inference()



Dataset size: 203
First item keys: dict_keys(['input', 'label_ids', 'label'])
tensor([[2.8229, 2.5100, 3.7429, 4.2514]], device='cuda:0')
tensor([4, 4, 4, 4], device='cuda:0')
tensor([[3.6222, 3.8791, 4.2922, 5.7457]], device='cuda:0')
tensor([4, 4, 4, 4], device='cuda:0')
tensor([[4.3512, 2.8890, 3.6209, 2.2324]], device='cuda:0')
tensor([4, 4, 4, 4], device='cuda:0')
tensor([[3.8879, 3.6747, 2.1747, 5.0577]], device='cuda:0')
tensor([4, 4, 4, 4], device='cuda:0')
tensor([[ 3.9280, 12.4104,  3.1175,  2.2727]], device='cuda:0')
tensor([6, 6, 6, 6], device='cuda:0')
tensor([[ 3.8449,  1.4131, 10.9793,  2.0999]], device='cuda:0')
tensor([6, 6, 6, 6], device='cuda:0')
tensor([[ 2.6333,  1.8082, 13.3172,  3.1872]], device='cuda:0')
tensor([6, 6, 6, 6], device='cuda:0')
tensor([[3.3701, 2.5394, 8.9142, 1.9101]], device='cuda:0')
tensor([6, 6, 6, 6], device='cuda:0')
tensor([[ 5.3368, 13.9574,  4.9442,  2.1773]], device='cuda:0')
tensor([6, 6, 6, 6], device='cuda:0')
tensor([[ 9.3008, 32.886