In [46]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

In [1]:
# get tremor data (.npz file) from drive
# link: https://drive.google.com/file/d/1KJx9-cy2u6Dv3KyotrhhXGRQ0vCWjo5Q/view?usp=drive_link
!pip install gdown
import gdown

file_id = "1KJx9-cy2u6Dv3KyotrhhXGRQ0vCWjo5Q"
gdown.download(f"https://drive.google.com/uc?id={file_id}", "tremor_data.npz", quiet=False)

data = np.load('tremor_data.npz')
print(data.files)



Downloading...
From: https://drive.google.com/uc?id=1KJx9-cy2u6Dv3KyotrhhXGRQ0vCWjo5Q
To: /content/tremor_data.npz
100%|██████████| 21.3M/21.3M [00:00<00:00, 119MB/s] 

['features', 'labels']





Data is comprised of **feature arrays** and **label arrays** which have already been processed. Features are in 3 second segments, sampled at 100Hz so 300 data points, and across 3 dimensional channels, x y z.

In [23]:
print(f"Feature array shape: {data['features'].shape}")
print(f"Label array shape: {data['labels'].shape}")

Feature array shape: (3091, 300, 3)
Label array shape: (3091,)


In [14]:
counts = np.unique(data['labels'], return_counts=True)
class_name = {0: 'non-tremor', 1: 'pre-tremor', 2: 'tremor'}
label_counts = {class_name[int(lbl)]: int(count) for lbl, count in zip(counts[0], counts[1])}
print(f"Label Distribution: {label_counts}")

Label Distribution: {'non-tremor': 2997, 'pre-tremor': 28, 'tremor': 66}


There are unexpectly a serious **class imbalance issue**, where non-tremor (label 0) has a greater presence than pre-tremor (label 1) and tremor (label 2). Something we can do before even training is **downsample the dominant class**, 'non-tremor'.

In [41]:
def downsample(signal, labels, percent_tremor=10):
    '''
    percent_tremor: Percent of samples that are labeled either 'pre-tremor' (1) or 'tremor' (2) in the downsampled dataset.
    '''
    count = np.unique(labels, return_counts=True)[1]
    num_tremor = count[1] + count[2]
    # adjust non tremor sample count
    num_non_tremor = int((num_tremor / percent_tremor) * 100) - num_tremor
    print(f'Downsampling to {num_non_tremor} of non-tremor samples from {count[0]}')
    print(f'Downsampled set has {num_tremor} amount of positive tremor labels.')

    if len(labels) - num_tremor < num_non_tremor:
        print(f"Chosen percent_tremor: {percent_tremor} is upsampling 'non_tremor' cases.")
        return signal, labels
    else:
        # get indicies of each sample with respective label
        zero_mask = labels == 0
        tremor_mask = labels !=0

        # randomly select 'non_tremor' samples to keep
        kept_zeroes = np.random.choice(np.where(zero_mask)[0], num_non_tremor, replace=False)
        kept_sample_idx = np.concatenate((kept_zeroes, np.where(tremor_mask)[0]))
        shuffled_idx = np.random.permutation(kept_sample_idx)

        return signal[shuffled_idx], labels[shuffled_idx]

In [42]:
downsampled_signal, downsampled_labels = downsample(data['features'], data['labels'], percent_tremor=10)
print(f"Downsampled Feature array shape: {downsampled_signal.shape}")
print(f"Downsampled Label array shape: {downsampled_labels.shape}")

Downsampling to 846 of non-tremor samples from 2997
Downsampled set has 94 amount of positive tremor labels.
Downsampled Feature array shape: (940, 300, 3)
Downsampled Label array shape: (940,)


Create PyTorch dataset for tremor data.

In [44]:
class TremorDataset(Dataset):
    def __init__(self, signal, labels):
        '''
        signal (ndarray): 3D array of shape (num_samples, num_points, num_channels)
        labels (ndarray): 1D array of shape (num_samples,)
        '''
        # change signal dimension to (num_samples, num_channels, num_points) since 1D Conv passes over lowest dimension
        self.signal = torch.tensor(signal, dtype=torch.float32).permute(0, 2, 1)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.signal[idx], self.labels[idx]

In [45]:
dataset = TremorDataset(downsampled_signal, downsampled_labels)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

# one batch
signal_batch, label_batch = next(iter(loader))
print(signal_batch.shape) # expecting (B, channels, sequence) -> (B, 3, 300)
print(label_batch.shape) # expecting (B)

torch.Size([64, 3, 300])
torch.Size([64])


# **Model: 1D CNN**

In [None]:
'''GPT generated code'''
class TremorCNN(nn.Module):
    def __init__(self, num_channels=3, num_classes=3):
        super(TremorCNN, self).__init__()

        # Conv1: in_channels=3 (x,y,z), out_channels=16, kernel_size=5
        self.conv1 = nn.Conv1d(in_channels=num_channels, out_channels=16, kernel_size=5)
        self.bn1 = nn.BatchNorm1d(16)

        # Conv2: 16 -> 32
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5)
        self.bn2 = nn.BatchNorm1d(32)

        # Adaptive pooling to fix output length
        self.pool = nn.AdaptiveMaxPool1d(50)  # output length = 50

        # Fully connected layers
        self.fc1 = nn.Linear(32 * 50, 64)
        self.fc2 = nn.Linear(64, num_classes)  # num_classes = 3 (0,1,2)

    def forward(self, x):
        # x: (batch_size, 3, sequence_length)
        x = F.relu(self.bn1(self.conv1(x)))   # -> (batch, 16, L1)
        x = F.relu(self.bn2(self.conv2(x)))   # -> (batch, 32, L2)
        x = self.pool(x)                       # -> (batch, 32, 50)
        x = x.view(x.size(0), -1)             # flatten -> (batch, 32*50)
        x = F.relu(self.fc1(x))               # -> (batch, 64)
        x = self.fc2(x)                       # -> (batch, num_classes)
        return x