In [4]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from scipy.signal import butter, filtfilt, welch
from scipy.stats import entropy
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from tqdm import tqdm
import math

# Configuration
class Config:
    base_path = '/kaggle/input/mtcaic3'
    task = 'MI'
    channels = ['FZ', 'C3', 'CZ', 'C4', 'PZ', 'PO7', 'OZ', 'PO8']
    sample_rate = 250
    mi_trial_length = 2250
    batch_size = 32
    lr = 0.0005  # Reduced learning rate
    epochs = 200
    patience = 20  # Increased patience
    noise_std = 0.15  # Adjusted noise
    max_shift = 25    # Increased time shift
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    checkpoint_path = '/kaggle/working/best_model.pth'
    history_path = '/kaggle/working/training_history.csv'
    max_grad_norm = 1.0  # Gradient clipping
    dropout_rate = 0.5  # Increased dropout

config = Config()

# Bandpass Filter Setup
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

mu_b, mu_a = butter_bandpass(8, 13, config.sample_rate)
beta_b, beta_a = butter_bandpass(13, 30, config.sample_rate)

# Feature Extraction Functions
def compute_band_power(data, b, a):
    filtered = filtfilt(b, a, data)
    return np.mean(filtered ** 2)

def compute_spectral_entropy(data, fs=250):
    f, psd = welch(data, fs=fs, nperseg=min(256, len(data)))
    psd_norm = psd / psd.sum()
    return entropy(psd_norm)

def hjorth_parameters(data):
    first_deriv = np.diff(data)
    second_deriv = np.diff(first_deriv)
    
    activity = np.var(data)
    mobility = np.sqrt(np.var(first_deriv) / activity)
    complexity = np.sqrt(np.var(second_deriv) / np.var(first_deriv)) / mobility
    
    return activity, mobility, complexity

def extract_features(eeg_data):
    """Extract enhanced features from 8-channel EEG data"""
    features = []
    n_channels = eeg_data.shape[0]
    
    # Band powers (Mu and Beta)
    mu_powers = []
    beta_powers = []
    for i in range(n_channels):
        channel_data = eeg_data[i]
        mu_power = compute_band_power(channel_data, mu_b, mu_a)
        beta_power = compute_band_power(channel_data, beta_b, beta_a)
        mu_powers.append(mu_power)
        beta_powers.append(beta_power)
    
    # Power differences (C3-C4)
    c3_idx, c4_idx = config.channels.index('C3'), config.channels.index('C4')
    mu_diff = mu_powers[c3_idx] - mu_powers[c4_idx]
    beta_diff = beta_powers[c3_idx] - beta_powers[c4_idx]
    
    # Asymmetry indices
    mu_asym = (mu_powers[c3_idx] - mu_powers[c4_idx]) / (mu_powers[c3_idx] + mu_powers[c4_idx] + 1e-10)
    beta_asym = (beta_powers[c3_idx] - beta_powers[c4_idx]) / (beta_powers[c3_idx] + beta_powers[c4_idx] + 1e-10)
    
    # Band power ratios
    mu_ratios = [mu / (mu + beta + 1e-10) for mu, beta in zip(mu_powers, beta_powers)]
    
    # Time-domain features
    means = np.mean(eeg_data, axis=1)
    variances = np.var(eeg_data, axis=1)
    rms = np.sqrt(np.mean(eeg_data ** 2, axis=1))
    
    # Hjorth parameters
    hjorth_features = []
    for i in range(n_channels):
        activity, mobility, complexity = hjorth_parameters(eeg_data[i])
        hjorth_features.extend([activity, mobility, complexity])
    
    # Spectral entropy
    spectral_entropies = [compute_spectral_entropy(eeg_data[i]) for i in range(n_channels)]
    
    # Assemble feature vector (84 features total)
    features.extend(mu_powers)           # 8
    features.extend(beta_powers)         # 8
    features.extend([mu_diff, beta_diff, mu_asym, beta_asym])  # 4
    features.extend(mu_ratios)           # 8
    features.extend(means)               # 8
    features.extend(variances)           # 8
    features.extend(rms)                 # 8
    features.extend(hjorth_features)     # 24
    features.extend(spectral_entropies)  # 8
    
    return np.array(features)

# Dataset Class with Fixed Feature Size Handling
class MIDataset(Dataset):
    def __init__(self, df, base_path, eeg_scaler=None, feature_scaler=None, augment=False):
        self.df = df
        self.base_path = base_path
        self.augment = augment
        self.eeg_scaler = eeg_scaler
        self.feature_scaler = feature_scaler
        
        # Store raw EEGs and labels
        self.raw_eegs = []
        self.labels = []
        
        for _, row in tqdm(df.iterrows(), total=len(df)):
            eeg_data = self.load_eeg_data(row)
            label = self.map_label(row['label'])
            self.raw_eegs.append(eeg_data)
            self.labels.append(label)
            
        # Initialize EEG scaler (per-channel normalization)
        if eeg_scaler is None:
            # Create array of shape (n_trials * time_points, channels)
            all_eeg = np.vstack([eeg.T for eeg in self.raw_eegs])
            self.eeg_scaler = StandardScaler()
            self.eeg_scaler.fit(all_eeg)
        else:
            self.eeg_scaler = eeg_scaler
            
        # Precompute features for scaling
        scaled_eegs = [self.eeg_scaler.transform(eeg.T).T for eeg in self.raw_eegs]
        features_list = [extract_features(eeg) for eeg in scaled_eegs]
        
        # Initialize feature scaler
        if feature_scaler is None:
            self.feature_scaler = StandardScaler()
            self.feature_scaler.fit(np.array(features_list))
        else:
            self.feature_scaler = feature_scaler
            
        # Save feature dimension for model initialization
        self.feature_dim = len(features_list[0])
    
    def map_label(self, label_str):
        # Only Left and Right classes for MI
        mapping = {'Left': 0, 'Right': 1}
        return mapping[label_str]
    
    def load_eeg_data(self, row):
        # Determine dataset split
        id_num = row['id']
        if id_num <= 4800:
            split = 'train'
        elif id_num <= 4900:
            split = 'validation'
        else:
            split = 'test'
        
        # Build file path
        eeg_path = os.path.join(
            self.base_path, 
            row['task'], 
            split,
            row['subject_id'],
            str(row['trial_session']),
            'EEGdata.csv'
        )
        
        # Load and extract trial data
        full_data = pd.read_csv(eeg_path)
        start_idx = (row['trial'] - 1) * config.mi_trial_length
        end_idx = start_idx + config.mi_trial_length
        trial_data = full_data.iloc[start_idx:end_idx][config.channels].values.T
        return trial_data.astype(np.float32)
    
    def __len__(self):
        return len(self.raw_eegs)
    
    def __getitem__(self, idx):
        eeg = self.raw_eegs[idx].copy()
        label = self.labels[idx]
        
        # Data augmentation
        if self.augment:
            # Time shifting
            shift = np.random.randint(-config.max_shift, config.max_shift + 1)
            if shift != 0:
                eeg = np.roll(eeg, shift, axis=1)
            
            # Gaussian noise
            noise = np.random.normal(0, config.noise_std, eeg.shape)
            eeg += noise
            
            # Random frequency filtering
            if np.random.rand() > 0.5:
                cutoff = np.random.uniform(5, 20)
                b, a = butter_bandpass(cutoff, min(cutoff+15, 40), config.sample_rate, order=3)
                for i in range(eeg.shape[0]):
                    eeg[i] = filtfilt(b, a, eeg[i])
            
            # Random amplitude scaling
            if np.random.rand() > 0.7:
                scale = np.random.uniform(0.8, 1.2)
                eeg *= scale
        
        # Apply EEG scaling
        eeg_scaled = self.eeg_scaler.transform(eeg.T).T
        
        # Extract and scale features
        features = extract_features(eeg_scaled)
        features_scaled = self.feature_scaler.transform(features.reshape(1, -1)).flatten()
        
        return (
            torch.tensor(eeg_scaled, dtype=torch.float32),
            torch.tensor(features_scaled, dtype=torch.float32),
            torch.tensor(label, dtype=torch.long)
        )

# Enhanced Hybrid Model Architecture with ShallowConvNet
class ShallowConvNet(nn.Module):
    def __init__(self, num_channels, time_length):
        super().__init__()
        self.conv1 = nn.Conv1d(num_channels, 40, kernel_size=25, padding=12)
        self.conv2 = nn.Conv1d(40, 40, kernel_size=1)
        self.pool = nn.AvgPool1d(kernel_size=75, stride=15)
        self.dropout = nn.Dropout(config.dropout_rate)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = torch.square(x)  # Square activation
        x = self.pool(x)
        x = torch.log(torch.clamp(x, min=1e-6))  # Safe log
        x = self.dropout(x)
        return x

class HybridModel(nn.Module):
    def __init__(self, num_channels, time_length, num_features, num_classes):
        super().__init__()
        
        # CNN Branch (ShallowConvNet style)
        self.cnn = ShallowConvNet(num_channels, time_length)
        
        # Calculate CNN output size
        with torch.no_grad():
            dummy = torch.randn(1, num_channels, time_length)
            cnn_out = self.cnn(dummy)
            cnn_out_size = cnn_out.view(1, -1).shape[1]
        
        # CNN classifier
        self.cnn_classifier = nn.Sequential(
            nn.Linear(cnn_out_size, 128),
            nn.ELU(),
            nn.Dropout(config.dropout_rate),
            nn.Linear(128, 64)
        )
        
        # MLP Branch for engineered features
        self.mlp = nn.Sequential(
            nn.Linear(num_features, 128),
            nn.ELU(),
            nn.Dropout(config.dropout_rate),
            nn.Linear(128, 64)
        )
        
        # Combined Classifier
        self.classifier = nn.Sequential(
            nn.Linear(64 + 64, 128),
            nn.ELU(),
            nn.Dropout(config.dropout_rate),
            nn.Linear(128, num_classes)
        )
    
    def forward(self, eeg, features):
        # CNN processing
        cnn_out = self.cnn(eeg)
        cnn_out = cnn_out.view(cnn_out.size(0), -1)
        cnn_out = self.cnn_classifier(cnn_out)
        
        # MLP processing
        mlp_out = self.mlp(features)
        
        # Combine and classify
        combined = torch.cat((cnn_out, mlp_out), dim=1)
        return self.classifier(combined)

# Enhanced Training Function with Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2, reduction='mean'):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = nn.functional.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = (1 - pt) ** self.gamma * ce_loss
        
        if self.alpha is not None:
            focal_loss = self.alpha[targets] * focal_loss
            
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        return focal_loss

def train_model():
    # Load and prepare data
    train_df = pd.read_csv(os.path.join(config.base_path, 'train.csv'))
    val_df = pd.read_csv(os.path.join(config.base_path, 'validation.csv'))
    
    # Filter for MI task
    train_df = train_df[train_df['task'] == config.task]
    val_df = val_df[val_df['task'] == config.task]
    
    # Create datasets
    train_dataset = MIDataset(train_df, config.base_path, augment=True)
    val_dataset = MIDataset(
        val_df, config.base_path,
        eeg_scaler=train_dataset.eeg_scaler,
        feature_scaler=train_dataset.feature_scaler
    )
    
    # Create dataloaders
    train_loader = DataLoader(
        train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2
    )
    val_loader = DataLoader(
        val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2
    )
    
    # Only 2 classes for MI: Left and Right
    num_classes = 2
    
    # Initialize model with correct feature dimension
    model = HybridModel(
        num_channels=len(config.channels),
        time_length=config.mi_trial_length,
        num_features=train_dataset.feature_dim,
        num_classes=num_classes
    ).to(config.device)
    
    # Print model summary
    print(f"Model initialized with:")
    print(f"- CNN input: {len(config.channels)} channels × {config.mi_trial_length} timepoints")
    print(f"- MLP input: {train_dataset.feature_dim} features")
    print(f"- Output classes: {num_classes}")
    print(f"- Total parameters: {sum(p.numel() for p in model.parameters())/1e6:.2f}M")
    
    # Class weighting for imbalance
    label_counts = np.bincount(train_dataset.labels)
    class_weights = 1. / (label_counts + 1e-6)
    class_weights = torch.tensor(class_weights, dtype=torch.float32).to(config.device)
    
    # Use Focal Loss to handle class imbalance
    criterion = FocalLoss(alpha=class_weights, gamma=2.0)
    
    # Optimizer with weight decay
    optimizer = optim.AdamW(model.parameters(), lr=config.lr, weight_decay=1e-4)
    
    # Learning rate scheduler with longer patience
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=10, verbose=True
    )
    
    # Training state
    best_f1 = 0.0
    epochs_no_improve = 0
    history = []
    
    # Training loop
    for epoch in range(config.epochs):
        # Training phase
        model.train()
        train_loss, train_correct, train_total = 0, 0, 0
        all_preds, all_labels = [], []
        
        for eeg, features, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}'):
            eeg, features, labels = (
                eeg.to(config.device),
                features.to(config.device),
                labels.to(config.device)
            )
            
            # Forward pass
            outputs = model(eeg, features)
            loss = criterion(outputs, labels)
            
            # Backward pass with gradient clipping
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)
            optimizer.step()
            
            # Track metrics
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
        # Calculate training metrics
        train_loss /= len(train_loader)
        train_acc = train_correct / train_total
        train_f1 = f1_score(all_labels, all_preds, average='binary')  # Binary for 2 classes
        
        # Validation phase
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for eeg, features, labels in val_loader:
                eeg, features, labels = (
                    eeg.to(config.device),
                    features.to(config.device),
                    labels.to(config.device)
                )
                
                outputs = model(eeg, features)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        # Calculate validation metrics
        val_loss /= len(val_loader)
        val_acc = val_correct / val_total
        val_f1 = f1_score(all_labels, all_preds, average='binary')  # Binary for 2 classes
        
        # Update learning rate
        scheduler.step(val_f1)
        
        # Record history
        history.append({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_acc': train_acc,
            'train_f1': train_f1,
            'val_loss': val_loss,
            'val_acc': val_acc,
            'val_f1': val_f1,
            'lr': optimizer.param_groups[0]['lr']
        })
        
        # Print metrics
        print(f"Epoch {epoch+1}/{config.epochs}: "
              f"Train Loss: {train_loss:.4f}, Train F1: {train_f1:.4f} | "
              f"Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f} | "
              f"LR: {optimizer.param_groups[0]['lr']:.6f}")
        
        # Early stopping and checkpointing
        if val_f1 > best_f1:
            best_f1 = val_f1
            epochs_no_improve = 0
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_f1': val_f1,
                'history': history
            }, config.checkpoint_path)
            print(f"Checkpoint saved with F1: {val_f1:.4f}")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= config.patience:
                print(f"Early stopping at epoch {epoch+1}")
                break
    
    # Save training history
    history_df = pd.DataFrame(history)
    history_df.to_csv(config.history_path, index=False)
    return history_df

# Run training
if __name__ == "__main__":
    history = train_model()

100%|██████████| 2400/2400 [03:34<00:00, 11.18it/s]
100%|██████████| 50/50 [00:03<00:00, 14.30it/s]


Model initialized with:
- CNN input: 8 channels × 2250 timepoints
- MLP input: 84 features
- Output classes: 2
- Total parameters: 0.80M


Epoch 1: 100%|██████████| 75/75 [00:16<00:00,  4.54it/s]


Epoch 1/200: Train Loss: 0.0003, Train F1: 0.5169 | Val Loss: 0.0001, Val F1: 0.2308 | LR: 0.000500
Checkpoint saved with F1: 0.2308


Epoch 2: 100%|██████████| 75/75 [00:14<00:00,  5.05it/s]


Epoch 2/200: Train Loss: 0.0002, Train F1: 0.5175 | Val Loss: 0.0001, Val F1: 0.5714 | LR: 0.000500
Checkpoint saved with F1: 0.5714


Epoch 3: 100%|██████████| 75/75 [00:14<00:00,  5.18it/s]


Epoch 3/200: Train Loss: 0.0002, Train F1: 0.5195 | Val Loss: 0.0001, Val F1: 0.5424 | LR: 0.000500


Epoch 4: 100%|██████████| 75/75 [00:15<00:00,  4.96it/s]


Epoch 4/200: Train Loss: 0.0002, Train F1: 0.4907 | Val Loss: 0.0001, Val F1: 0.2222 | LR: 0.000500


Epoch 5: 100%|██████████| 75/75 [00:15<00:00,  4.92it/s]


Epoch 5/200: Train Loss: 0.0001, Train F1: 0.5196 | Val Loss: 0.0001, Val F1: 0.3889 | LR: 0.000500


Epoch 6: 100%|██████████| 75/75 [00:15<00:00,  4.75it/s]


Epoch 6/200: Train Loss: 0.0001, Train F1: 0.5176 | Val Loss: 0.0001, Val F1: 0.3636 | LR: 0.000500


Epoch 7: 100%|██████████| 75/75 [00:14<00:00,  5.05it/s]


Epoch 7/200: Train Loss: 0.0001, Train F1: 0.5104 | Val Loss: 0.0001, Val F1: 0.5283 | LR: 0.000500


Epoch 8: 100%|██████████| 75/75 [00:14<00:00,  5.05it/s]


Epoch 8/200: Train Loss: 0.0001, Train F1: 0.5034 | Val Loss: 0.0001, Val F1: 0.5714 | LR: 0.000500


Epoch 9: 100%|██████████| 75/75 [00:14<00:00,  5.02it/s]


Epoch 9/200: Train Loss: 0.0001, Train F1: 0.5338 | Val Loss: 0.0001, Val F1: 0.5263 | LR: 0.000500


Epoch 10: 100%|██████████| 75/75 [00:14<00:00,  5.22it/s]


Epoch 10/200: Train Loss: 0.0001, Train F1: 0.5210 | Val Loss: 0.0001, Val F1: 0.1538 | LR: 0.000500


Epoch 11: 100%|██████████| 75/75 [00:14<00:00,  5.03it/s]


Epoch 11/200: Train Loss: 0.0001, Train F1: 0.5065 | Val Loss: 0.0001, Val F1: 0.6000 | LR: 0.000500
Checkpoint saved with F1: 0.6000


Epoch 12: 100%|██████████| 75/75 [00:14<00:00,  5.18it/s]


Epoch 12/200: Train Loss: 0.0001, Train F1: 0.5123 | Val Loss: 0.0001, Val F1: 0.0870 | LR: 0.000500


Epoch 13: 100%|██████████| 75/75 [00:14<00:00,  5.02it/s]


Epoch 13/200: Train Loss: 0.0001, Train F1: 0.5339 | Val Loss: 0.0001, Val F1: 0.3030 | LR: 0.000500


Epoch 14: 100%|██████████| 75/75 [00:14<00:00,  5.20it/s]


Epoch 14/200: Train Loss: 0.0001, Train F1: 0.5029 | Val Loss: 0.0001, Val F1: 0.5000 | LR: 0.000500


Epoch 15: 100%|██████████| 75/75 [00:15<00:00,  4.92it/s]


Epoch 15/200: Train Loss: 0.0001, Train F1: 0.5204 | Val Loss: 0.0001, Val F1: 0.4898 | LR: 0.000500


Epoch 16: 100%|██████████| 75/75 [00:14<00:00,  5.25it/s]


Epoch 16/200: Train Loss: 0.0001, Train F1: 0.4911 | Val Loss: 0.0001, Val F1: 0.5667 | LR: 0.000500


Epoch 17: 100%|██████████| 75/75 [00:15<00:00,  5.00it/s]


Epoch 17/200: Train Loss: 0.0001, Train F1: 0.5368 | Val Loss: 0.0001, Val F1: 0.3913 | LR: 0.000500


Epoch 18: 100%|██████████| 75/75 [00:14<00:00,  5.15it/s]


Epoch 18/200: Train Loss: 0.0001, Train F1: 0.5066 | Val Loss: 0.0001, Val F1: 0.5357 | LR: 0.000500


Epoch 19: 100%|██████████| 75/75 [00:15<00:00,  4.91it/s]


Epoch 19/200: Train Loss: 0.0001, Train F1: 0.5402 | Val Loss: 0.0001, Val F1: 0.4615 | LR: 0.000500


Epoch 20: 100%|██████████| 75/75 [00:14<00:00,  5.10it/s]


Epoch 20/200: Train Loss: 0.0001, Train F1: 0.5178 | Val Loss: 0.0001, Val F1: 0.5902 | LR: 0.000500


Epoch 21: 100%|██████████| 75/75 [00:14<00:00,  5.10it/s]


Epoch 21/200: Train Loss: 0.0001, Train F1: 0.5206 | Val Loss: 0.0001, Val F1: 0.5091 | LR: 0.000500


Epoch 22: 100%|██████████| 75/75 [00:14<00:00,  5.11it/s]


Epoch 22/200: Train Loss: 0.0001, Train F1: 0.5531 | Val Loss: 0.0001, Val F1: 0.4898 | LR: 0.000250


Epoch 23: 100%|██████████| 75/75 [00:15<00:00,  4.95it/s]


Epoch 23/200: Train Loss: 0.0001, Train F1: 0.5202 | Val Loss: 0.0001, Val F1: 0.4783 | LR: 0.000250


Epoch 24: 100%|██████████| 75/75 [00:14<00:00,  5.06it/s]


Epoch 24/200: Train Loss: 0.0001, Train F1: 0.5434 | Val Loss: 0.0001, Val F1: 0.4898 | LR: 0.000250


Epoch 25: 100%|██████████| 75/75 [00:14<00:00,  5.21it/s]


Epoch 25/200: Train Loss: 0.0001, Train F1: 0.5067 | Val Loss: 0.0001, Val F1: 0.5098 | LR: 0.000250


Epoch 26: 100%|██████████| 75/75 [00:14<00:00,  5.01it/s]


Epoch 26/200: Train Loss: 0.0001, Train F1: 0.5176 | Val Loss: 0.0001, Val F1: 0.5098 | LR: 0.000250


Epoch 27: 100%|██████████| 75/75 [00:14<00:00,  5.00it/s]


Epoch 27/200: Train Loss: 0.0001, Train F1: 0.5027 | Val Loss: 0.0001, Val F1: 0.5098 | LR: 0.000250


Epoch 28: 100%|██████████| 75/75 [00:15<00:00,  4.76it/s]


Epoch 28/200: Train Loss: 0.0001, Train F1: 0.5087 | Val Loss: 0.0001, Val F1: 0.5385 | LR: 0.000250


Epoch 29: 100%|██████████| 75/75 [00:14<00:00,  5.16it/s]


Epoch 29/200: Train Loss: 0.0001, Train F1: 0.5495 | Val Loss: 0.0001, Val F1: 0.4878 | LR: 0.000250


Epoch 30: 100%|██████████| 75/75 [00:15<00:00,  4.86it/s]


Epoch 30/200: Train Loss: 0.0001, Train F1: 0.5420 | Val Loss: 0.0001, Val F1: 0.4889 | LR: 0.000250


Epoch 31: 100%|██████████| 75/75 [00:14<00:00,  5.11it/s]


Epoch 31/200: Train Loss: 0.0001, Train F1: 0.4994 | Val Loss: 0.0001, Val F1: 0.5098 | LR: 0.000250
Early stopping at epoch 31
