In [173]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter

from scipy.ndimage import gaussian_filter1d
from scipy.signal import iirnotch, filtfilt, butter

# Reading data

In [None]:
# Load raw data from the file
file_path = "MW.txt"  # Replace with actual file path
with open(file_path, "r") as file:
    lines = file.readlines()

# Extract codes (4th column) and brainwaves (7th column onward)
codes = []
brainwaves = []

for line in lines[1:]:  # Skip header
    parts = line.strip().split("\t")  # Assuming tab-separated values
    code = parts[4]  # 4th column (code)
    brainwave_values = list(map(float, parts[6:][0].split(',')))  # 7th column to end (brainwave signals)
    
    codes.append(int(code))
    brainwaves.append(brainwave_values)

In [None]:
target_length = max(len(ts) for ts in brainwaves)  # Choose max length

# Function to interpolate a time series to the target length
def interpolate_timeseries(series, target_length):
    x_old = np.linspace(0, 1, len(series))  # Original time points
    x_new = np.linspace(0, 1, target_length)  # New time points
    return np.interp(x_new, x_old, series).tolist()

# Apply interpolation to each series
resampled_brainwaves = [interpolate_timeseries(ts, target_length) for ts in brainwaves]

print(resampled_brainwaves)

In [None]:
# Convert to NumPy arrays
codes = np.array(codes)  # String values
brainwave_array = np.array(resampled_brainwaves)  # Numerical brainwave data

# Save separately
np.save("codes.npy", codes)  # Save codes
np.save("brainwaves.npy", brainwave_array)  # Save brainwave signals

print("Brainwave data and codes saved successfully!")

# Loading data


In [203]:
def bandpass_filter(data, lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, data)

def notch_filter(data, fs, notch_freq=50.0, quality_factor=30.0):
    b, a = iirnotch(notch_freq, quality_factor, fs)
    return filtfilt(b, a, data)

def filter_digit_set(waves, fs=256):
    data = notch_filter(waves, fs)
    data = bandpass_filter(waves, lowcut=1.0, highcut=40.0, fs=fs)
    return data

def smooth(waves, std):
    smooth_waves = gaussian_filter1d(waves, sigma=std, radius=1024)
    return smooth_waves, std

def preprocess_digit_class(brainwaves, codes, digit_code):
    digit_idx = np.where(codes == digit_code)
    digit_signals = brainwaves[digit_idx]

    train_size = int(0.8 * len(digit_signals))  
    val_size = len(digit_signals) - train_size
    train_waves, val_waves = random_split(digit_signals, [train_size, val_size])
    
    train_waves_filtered = filter_digit_set(train_waves)
    val_waves_filtered = filter_digit_set(val_waves)

    std = np.std(train_waves_filtered)

    train_waves_filtered_smooth, std = smooth(train_waves_filtered, std)
    val_waves_filtered_smooth, _ = smooth(val_waves_filtered, std)

    train_codes = [digit_code] * len(train_waves_filtered_smooth)
    val_codes = [digit_code] * len(val_waves_filtered_smooth)

    return (train_waves_filtered_smooth, 
            val_waves_filtered_smooth,
            train_codes,
            val_codes)

def preprocess_all_digits(brainwaves, codes):
    digit_data = {}
    for digit_code in range(10):  # Assuming digits 0-9
        train_waves, val_waves, train_codes, val_codes = preprocess_digit_class(brainwaves, codes, digit_code)
        digit_data[digit_code] = {
            'train': (train_waves, train_codes),
            'val': (val_waves, val_codes)
        }
    return digit_data

In [206]:
# Load NumPy arrays
brainwave_array = np.load("brainwaves.npy")  # Shape (N, 1024)
codes = np.load("codes.npy") # Shape (N, 1)

digit_code_index = np.where(codes != 0)
non_zero_codes = codes[digit_code_index]
non_zero_codes -= 1

non_zero_brainwaves_array = brainwave_array[digit_code_index]

digit_data = preprocess_all_digits(non_zero_brainwaves_array, non_zero_codes)

In [229]:
# Combine all train and val sets in digit_data
all_train_waves = []
all_train_codes = []
all_val_waves = []
all_val_codes = []

for digit_code, data in digit_data.items():
    train_waves, train_codes = data['train']
    val_waves, val_codes = data['val']
    
    all_train_waves.extend(train_waves)
    all_train_codes.extend(train_codes)
    all_val_waves.extend(val_waves)
    all_val_codes.extend(val_codes)

# Convert to NumPy arrays
train_waves = np.array(all_train_waves)
train_codes = np.array(all_train_codes)
val_waves = np.array(all_val_waves)
val_codes = np.array(all_val_codes)

print("All train and validation sets combined successfully!")

All train and validation sets combined successfully!


In [230]:
class EEGDataset(Dataset):
    def __init__(self, brainwave_array, codes):
        """
        Custom PyTorch Dataset for EEG data.
        
        Parameters:
        - brainwave_array: NumPy array of shape (N, 1024), EEG signals
        - codes: NumPy array of shape (N, 1), corresponding labels
        """
        self.X = torch.tensor(brainwave_array, dtype=torch.float32)  # Convert to tensor
        self.y = torch.tensor(codes, dtype=torch.long).squeeze()  # Convert to tensor, remove extra dim
        
    def __len__(self):
        return len(self.X)  # Number of samples
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]  # Returns (EEG signal, code)

# def normalize_data(data):
#     return (data - data.mean(axis=1, keepdims=True)) / data.std(axis=1, keepdims=True)

# def preprocess_eeg(data, fs):
#     # Step 1: Apply Notch filter to remove 50/60Hz line noise
#     data = notch_filter(data, fs)
    
#     # Step 2: Apply Bandpass filter to focus on desired frequency range (e.g., 1-40Hz)
#     data = bandpass_filter(data, lowcut=1.0, highcut=40.0, fs=fs)
    
#     # Step 4: Z-score normalize data if necessary to reduce baseline drift
#     smoothed_signal = gaussian_filter1d(data, sigma=1, radius=512)
    
#     return smoothed_signal

# # Load NumPy arrays
# brainwave_array = np.load("brainwaves.npy")  # Shape (N, 1024)
# codes = np.load("codes.npy") # Shape (N, 1)

# digit_code_index = np.where(codes != 0)
# non_zero_codes = codes[digit_code_index]
# non_zero_codes -= 1

# normalized_brainwave_array = preprocess_eeg(brainwave_array, fs=256)  # Assuming 256Hz sampling rate
# non_zero_brainwaves_array = normalized_brainwave_array[digit_code_index]

In [231]:
# Create Dataset
train_dataset = EEGDataset(train_waves, train_codes)
val_dataset = EEGDataset(val_waves, val_codes)

# Separating classes

In [232]:
# # Split the dataset into training (80%) and validation (20%) sets
# train_size = int(0.8 * len(dataset))  # 80% for training
# val_size = len(dataset) - train_size  # Remaining 20% for validation
# train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoader for both training and validation datasets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [233]:
# class FFTTransform(nn.Module):
#     def __init__(self):
#         super(FFTTransform, self).__init__()

#     def forward(self, x):
#         x = torch.fft.rfft(x, dim=1)
#         return torch.abs(x)

# class EEGClassifier(nn.Module):
#     def __init__(self, num_classes=11):
#         super(EEGClassifier, self).__init__()

#         self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
#         self.pool = nn.MaxPool1d(2)
#         self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
#         self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)

#         self.dropout = nn.Dropout(0.5)
#         self.fc1 = nn.Linear(128 * 64, 256)  # Adjust based on final conv shape
#         self.fc2 = nn.Linear(256, num_classes)

#     def forward(self, x):
#         x = x.unsqueeze(1)  # (N, 513) -> (N, 1, 513)

#         x = self.pool(torch.relu(self.conv1(x)))  # (N, 1, 513) -> (N, 32, 256)
#         x = self.pool(torch.relu(self.conv2(x)))  # (N, 32, 256) -> (N, 64, 128)
#         x = self.pool(torch.relu(self.conv3(x)))  # (N, 64, 128) -> (N, 128, 64)

#         x = x.view(x.size(0), -1)  # Flatten
#         x = self.dropout(torch.relu(self.fc1(x)))
#         x = self.fc2(x)
#         return x

# model = nn.Sequential(
#     FFTTransform(),
#     EEGClassifier(num_classes=len(np.unique(codes)))
#     )

# class EEGClassifier(nn.Module):
#     def __init__(self, num_classes=11):  # You can change the number of classes
#         super(EEGClassifier, self).__init__()
        
#         self.conv1 = nn.Conv1d(in_channels=1, out_channels=8, kernel_size=64, stride=1, padding=1)  # (1, 1024) -> (32, 1024)
#         self.pool = nn.MaxPool1d(2)  # Downsample by a factor of 2
#         self.conv2 = nn.Conv1d(in_channels=8, out_channels=16, kernel_size=32, stride=1, padding=1)  # (32, 1024) -> (64, 1024)
#         self.conv3 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=16, stride=1, padding=1)  # (64, 1024) -> (128, 1024)
        
#         self.fc1 = nn.Linear(32 * 128, 128)  # Flattened features (128 channels, downsampled size)
#         self.fc2 = nn.Linear(128, num_classes)  # Output layer for classification

#         self.dropout = nn.Dropout(p=0.5)  # Dropout for regularization

#     def forward(self, x):
#         x = x.unsqueeze(1)  # Add channel dimension (N, 1024) -> (N, 1, 1024)
        
#         x = self.pool(torch.relu(self.conv1(x)))  # Conv1 + ReLU + MaxPool
#         x = self.pool(torch.relu(self.conv2(x)))  # Conv2 + ReLU + MaxPool
#         x = self.pool(torch.relu(self.conv3(x)))  # Conv3 + ReLU + MaxPool
        
#         x = x.view(x.size(0), -1)  # Flatten (N, 128, 128) -> (N, 128*128)
        
#         x = self.dropout(torch.relu(self.fc1(x)))  # FC1 + ReLU + Dropout
#         x = self.fc2(x)  # Output layer
#         return x
import torch
import torch.nn as nn
import numpy as np

class EEGClassifier(nn.Module):
    def __init__(self, num_classes=11):  # You can change the number of classes
        super(EEGClassifier, self).__init__()
        
        # First convolution layer: (1, 1024) -> (8, 1024)
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=8, kernel_size=3, stride=2, padding=1)
        # Second convolution layer: (8, 1024) -> (16, 1024)
        self.conv2 = nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=1)
        # Third convolution layer: (16, 1024) -> (32, 1024)
        self.conv3 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1)
        # Fourth convolution layer: (32, 1024) -> (64, 1024)
        self.conv4 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1)
        
        # Max pooling operation with a larger downsampling factor
        self.pool = nn.MaxPool1d(2)  # Pool by factor of 2

        # FC layers for classification (adjusted after the additional convolutions and pooling)
        self.fc1 = nn.Linear(256, 32)  # After convolutions and pooling
        self.fc2 = nn.Linear(32, num_classes)  # Output layer for classification
        
        self.dropout = nn.Dropout(p=0.3)  # Dropout for regularization

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension (N, 1024) -> (N, 1, 1024)

        # Apply convolutions + ReLU + Pooling
        x = self.pool(torch.relu(self.conv1(x)))  # (N, 8, 1024) -> (N, 8, 512)
        x = self.pool(torch.relu(self.conv2(x)))  # (N, 16, 512) -> (N, 16, 256)
        x = self.pool(torch.relu(self.conv3(x)))  # (N, 32, 256) -> (N, 32, 128)
        x = self.pool(torch.relu(self.conv4(x)))  # (N, 64, 128) -> (N, 64, 64)
        
        # Flatten the tensor before feeding it to the fully connected layer
        x = x.view(x.size(0), -1)  # Flatten (N, 64, 64) -> (N, 64 * 64)
        
        # Apply the fully connected layers
        x = self.dropout(torch.relu(self.fc1(x)))  # FC1 + ReLU + Dropout
        x = self.fc2(x)  # Output layer
        return x

model = EEGClassifier(num_classes=len(np.unique(codes)))  # Adjust num_classes based on your dataset

In [238]:
# Initialize TensorBoard
writer = SummaryWriter('runs/EEG_classification')

# Initialize the model, criterion, and optimizer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [239]:
def train_model(model, train_loader, val_loader, epochs=10):
    model.train()
    global_step = 0  # To keep track of steps for TensorBoard logging
    
    for epoch in range(epochs):
        train_loss = 0
        train_correct = 0
        train_total = 0
        
        # Training phase
        for eeg_batch, code_batch in train_loader:
            eeg_batch, code_batch = eeg_batch.to(device), code_batch.to(device)
            
            optimizer.zero_grad()
            outputs = model(eeg_batch)  # Forward pass
            loss = criterion(outputs, code_batch)
            loss.backward()  # Backpropagation
            optimizer.step()  # Optimizer step
            
            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            train_correct += (predicted == code_batch).sum().item()
            train_total += code_batch.size(0)
            
            # TensorBoard: Log every 10 steps
            if global_step % 10 == 0:
                writer.add_scalar('Training Loss', loss.item(), global_step)
                writer.add_scalar('Training Accuracy', (predicted == code_batch).sum().item() / code_batch.size(0), global_step)
                
            global_step += 1
        
        # Calculate average loss and accuracy for training set
        avg_train_loss = train_loss / len(train_loader)
        train_accuracy = train_correct / train_total
        
        # Validation phase
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for eeg_batch, code_batch in val_loader:
                eeg_batch, code_batch = eeg_batch.to(device), code_batch.to(device)
                outputs = model(eeg_batch)
                loss = criterion(outputs, code_batch)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == code_batch).sum().item()
                val_total += code_batch.size(0)
                
        # Calculate average loss and accuracy for validation set
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / val_total
        
        # Log to TensorBoard
        writer.add_scalar('Validation Loss', avg_val_loss, epoch)
        writer.add_scalar('Validation Accuracy', val_accuracy, epoch)
        
        # Print results
        print(f"Epoch [{epoch+1}/{epochs}]")
        print(f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
    
    writer.close()  # Close the TensorBoard writer

In [None]:
train_model(model, train_loader, val_loader, epochs=10)

In [21]:
noise_input = torch.randn((1, 1024))  # or shape (1, 513) after FFT
model.eval()
output = model(noise_input.to(device))
print(torch.softmax(output, dim=1))

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       grad_fn=<SoftmaxBackward0>)
