In [1]:
import numpy as np 
import pandas as pd 
import torch
import torch.nn as nn
import sys

In [2]:
df = pd.read_csv('/kaggle/input/training-data-meta-model/training_data_meta.csv')
display(df)

Unnamed: 0,training_instance,model_id,seizure,lpd,gpd,lrda,grda,other
0,0.0,0.0,0.987942,0.001896,0.000311,0.001601,0.000568,0.007682
1,0.0,1.0,0.953424,0.010766,0.009874,0.009344,0.002544,0.014048
2,0.0,2.0,0.999028,0.000045,0.000100,0.000110,0.000053,0.000663
3,0.0,3.0,0.996094,0.000800,0.000262,0.001422,0.000122,0.001300
4,0.0,4.0,0.684275,0.012821,0.022669,0.070409,0.076193,0.133634
...,...,...,...,...,...,...,...,...
167065,11137.0,10.0,0.001482,0.028740,0.001911,0.620885,0.087431,0.259551
167066,11137.0,11.0,0.000743,0.000828,0.000018,0.950585,0.001804,0.046022
167067,11137.0,12.0,0.000651,0.003874,0.000048,0.919886,0.003284,0.072257
167068,11137.0,13.0,0.000248,0.000262,0.000013,0.988694,0.002034,0.008749


In [17]:
# Get additional information to feed to the meta model
labels = ['seizure', 'lpd', 'gpd', 'lrda', 'grda', 'other']
max_df = df.groupby(['training_instance'])[labels].agg('max').reset_index()
min_df = df.groupby(['training_instance'])[labels].agg('min').reset_index()
mean_df = df.groupby(['training_instance'])[labels].agg('mean').reset_index()
median_df = df.groupby(['training_instance'])[labels].agg('median').reset_index()
std_df = df.groupby(['training_instance'])[labels].agg('std').reset_index()
display(mean_df)

Unnamed: 0,training_instance,seizure,lpd,gpd,lrda,grda,other
0,0.0,0.925391,0.011758,0.003779,0.018339,0.007754,0.032979
1,1.0,0.013179,0.011548,0.430501,0.007358,0.128501,0.408913
2,2.0,0.005435,0.048531,0.001314,0.817980,0.057258,0.069482
3,3.0,0.019544,0.013389,0.001033,0.833887,0.028320,0.103826
4,4.0,0.816041,0.007256,0.159430,0.000640,0.005497,0.011136
...,...,...,...,...,...,...,...
11133,11133.0,0.001761,0.008051,0.001109,0.001894,0.004193,0.982992
11134,11134.0,0.003704,0.980797,0.001408,0.001236,0.000135,0.012720
11135,11135.0,0.035923,0.416452,0.009732,0.100241,0.004969,0.432683
11136,11136.0,0.005600,0.804470,0.003268,0.035733,0.004896,0.146032


In [18]:
def df2tensor(df):
    max_list = df[labels].values.tolist()
    max_tensor = torch.tensor(max_list)
    max_tensor.shape
    return max_tensor

max_tensor = df2tensor(max_df)
min_tensor = df2tensor(min_df)
mean_tensor = df2tensor(mean_df)
median_tensor = df2tensor(median_df)
std_tensor = df2tensor(std_df)
mean_tensor.shape

torch.Size([11138, 6])

In [21]:
def df2tensor_alldata(meta_df):
    labels = ['seizure', 'lpd', 'gpd', 'lrda', 'grda', 'other']
    data = []

    for i in range(int(meta_df['training_instance'].max()) + 1):
        instance_data = []
        instance_df = meta_df[meta_df['training_instance'] == i]

        for j in range(len(instance_df)):
            model_data = [instance_df.iloc[j][label] for label in labels]
            instance_data.append(model_data)

        data.append(instance_data)
    data_tensor = torch.tensor(data)
    return data_tensor

input_data = df2tensor_alldata(df)
input_data.shape

torch.Size([11138, 15, 6])

In [22]:
input_meta = torch.cat([max_tensor.unsqueeze(1), min_tensor.unsqueeze(1), 
                        mean_tensor.unsqueeze(1), std_tensor.unsqueeze(1), median_tensor.unsqueeze(1)], 1)
input_meta.shape

torch.Size([11138, 5, 6])

In [23]:
train_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/train.csv")
train_features = pd.DataFrame()

for label in labels:
    train_grouped_by_spectrogram_id = train_df[f'{label}_vote'].groupby(train_df['spectrogram_id']).sum()

    label_vote_sum = pd.DataFrame()
    label_vote_sum["spectrogram_id"] = train_grouped_by_spectrogram_id.index
    label_vote_sum[f"{label}_vote_sum"] = train_grouped_by_spectrogram_id.values

    if label == labels[0]:
        train_features = label_vote_sum
    else:
        train_features = train_features.merge(label_vote_sum, on='spectrogram_id', how='left')

# Add a column to sum all votes
train_features['total_vote'] = 0
for label in labels:
    train_features['total_vote'] += train_features[f'{label}_vote_sum']

# Calculate and store the normalized vote for each label
for label in labels:
    train_features[f'{label}_vote'] = train_features[f'{label}_vote_sum'] / train_features['total_vote']

# Select relevant columns for the training features
choose_cols = ['spectrogram_id']
for label in labels:
    choose_cols += [f'{label}_vote']
train_features = train_features[choose_cols]

# Add a column with the path to the spectrogram files
train_features['path'] = train_features['spectrogram_id'].apply(lambda x: "/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms/" + str(x) + ".parquet")

labels_meta = train_features[['seizure_vote', 'lpd_vote', 'gpd_vote', 'lrda_vote', 'grda_vote', 'other_vote']].values.tolist()
labels_meta_tensor = torch.tensor(labels_meta)
labels_meta_tensor.shape

torch.Size([11138, 6])

In [24]:
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size*6, 600),
            nn.ReLU(),
            nn.Linear(600, 1200),
            nn.ReLU(),
            nn.Linear(1200, 1000),
            nn.ReLU(),
            nn.Linear(1000, 600),
            nn.ReLU(),
            nn.Linear(600, 6),
            nn.Softmax(1)
        )
        
    def forward(self, x):
        # convert tensor (64, 20, 6) --> (64, 20*6)
        x = x.view(x.size(0), -1)
        x = self.layers(x)
        return x

In [25]:
class KLDivergenceLoss(nn.Module):
    def __init__(self, epsilon=1e-15):
        super(KLDivergenceLoss, self).__init__()
        self.epsilon = epsilon

    def forward(self, p, q):
        # Clip probabilities to avoid log(0)
        p = torch.clamp(p, self.epsilon, 1 - self.epsilon)

        # Compute logarithms
        log_p = torch.log(p)
        log_q = nn.functional.log_softmax(q, dim=1)

        # Calculate element-wise KL divergence
        kl_divergence_per_point = p * (log_p - log_q)

        # Sum over classes to get KL divergence per sample
        kl_divergence_per_sample = torch.sum(kl_divergence_per_point, dim=1)

        # Compute mean over samples
        kl_loss = torch.mean(kl_divergence_per_sample)

        return kl_loss


In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import sys

def train_mlp(input_tensor, target_tensor, model, criterion_ce, criterion_kl, optimizer, batch_size=32, num_epochs=10, validation_size=0.1,lambda_=1):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    # Split data into training and validation sets
    total_size = len(input_tensor)
    val_size = int(total_size * validation_size)
    train_size = total_size - val_size
    
    train_indices = torch.randperm(train_size)
    val_indices = torch.arange(train_size, total_size)
    
    train_input = input_tensor[train_indices].to(device)
    train_target = target_tensor[train_indices].to(device)
    
    val_input = input_tensor[val_indices].to(device)
    val_target = target_tensor[val_indices].to(device)
    
    best_total_loss = sys.maxsize
    not_improved_counter = 0
    
    for epoch in range(num_epochs):
        model.train()
        running_ce_loss = 0.0
        running_kl_loss = 0.0
        tel = 0
        
        # Shuffle indices for training
        indices = torch.randperm(train_size)
        
        # Iterate over batches for training
        for i in range(0, len(indices), batch_size):
            batch_indices = indices[i:i+batch_size]
            
            inputs = train_input[batch_indices]
            targets = train_target[batch_indices]

            # Forward pass
            outputs = model(inputs)
            ce_loss = criterion_ce(outputs, targets)
            kl_loss = criterion_kl(targets, outputs)

            # Combined loss
            total_loss = ce_loss + lambda_ * kl_loss

            # Backward pass and optimization
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

            running_ce_loss += ce_loss.item()
            running_kl_loss += kl_loss.item()
            tel += 1

        average_ce_loss = running_ce_loss / tel
        average_kl_loss = running_kl_loss / tel
        print(f"Epoch [{epoch + 1}/{num_epochs}], CE Loss: {average_ce_loss:.4f}, KL Loss: {average_kl_loss:.4f}")
        
        # Validation
        model.eval()
        with torch.no_grad():
            val_outputs = model(val_input)
            val_ce_loss = criterion_ce(val_outputs, val_target)
            val_accuracy = accuracy(val_outputs, val_target)
            val_kl_loss = criterion_kl(val_outputs, val_target)
            val_total_loss = val_ce_loss + lambda_ * val_kl_loss
            
        if val_total_loss < best_total_loss:
            torch.save(model.state_dict(), 'meta_model_best.pth')
            best_total_loss = val_total_loss
            print(f"Model saved on epoch: {epoch} with total loss: {best_total_loss} and accuracy: {val_accuracy}")
            not_improved_counter = 0
        else:
            not_improved_counter += 1
        if not_improved_counter >= 20:
            break

def accuracy(outputs, targets):
    _, predicted = torch.max(outputs, 1)
    _, label = torch.max(targets, 1)
    correct = (predicted == label).sum().item()
    total = targets.size(0)
    return correct / total


In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_tensor = input_meta.float().to(device)
print(input_tensor.shape)
labels_meta_tensor = labels_meta_tensor.float().to(device)

meta_model = MLP(input_tensor.shape[1]).to(device)

optimizer = torch.optim.Adam(meta_model.parameters(), lr=0.001)
loss_ce = nn.CrossEntropyLoss()
loss_bce = nn.BCELoss()
loss_kl = KLDivergenceLoss()

torch.Size([11138, 5, 6])


In [12]:
meta_model

MLP(
  (layers): Sequential(
    (0): Linear(in_features=30, out_features=600, bias=True)
    (1): ReLU()
    (2): Linear(in_features=600, out_features=1200, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1200, out_features=1000, bias=True)
    (5): ReLU()
    (6): Linear(in_features=1000, out_features=600, bias=True)
    (7): ReLU()
    (8): Linear(in_features=600, out_features=6, bias=True)
    (9): Softmax(dim=1)
  )
)

In [19]:
mean_tensor.unsqueeze(1).shape

torch.Size([11138, 1, 6])

In [28]:
labels_meta_tensor.shape

torch.Size([11138, 6])

In [41]:
print(loss_kl(mean_tensor.to(device),labels_meta_tensor))

tensor(0.6482, device='cuda:0')


In [33]:
train_mlp(input_tensor, labels_meta_tensor, meta_model, loss_ce, loss_kl, optimizer, batch_size=64, num_epochs=80, validation_size=0.1, lambda_=2)
torch.save(meta_model.state_dict(), 'meta_model_final.pth')

Epoch [1/80], CE Loss: 1.2759, KL Loss: 0.9034
Model saved on epoch: 0 with total loss: 3.5981764793395996 and accuracy: 0.9299191374663073
Epoch [2/80], CE Loss: 1.2212, KL Loss: 0.8485
Epoch [3/80], CE Loss: 1.2202, KL Loss: 0.8474
Epoch [4/80], CE Loss: 1.2199, KL Loss: 0.8471
Model saved on epoch: 3 with total loss: 3.5908422470092773 and accuracy: 0.9281221922731356
Epoch [5/80], CE Loss: 1.2182, KL Loss: 0.8454
Model saved on epoch: 4 with total loss: 3.565169334411621 and accuracy: 0.9299191374663073
Epoch [6/80], CE Loss: 1.2180, KL Loss: 0.8455
Model saved on epoch: 5 with total loss: 3.549095630645752 and accuracy: 0.9353099730458221
Epoch [7/80], CE Loss: 1.2175, KL Loss: 0.8446
Epoch [8/80], CE Loss: 1.2170, KL Loss: 0.8441
Model saved on epoch: 7 with total loss: 3.5357446670532227 and accuracy: 0.9380053908355795
Epoch [9/80], CE Loss: 1.2167, KL Loss: 0.8441
Epoch [10/80], CE Loss: 1.2171, KL Loss: 0.8444
Model saved on epoch: 9 with total loss: 3.5114283561706543 and ac

In [38]:
with torch.no_grad():
    output = meta_model(input_tensor)

In [39]:
print(loss_kl(output,labels_meta_tensor))

tensor(1.1249, device='cuda:0')


1.1502 input, mean, std
1.1211 max, min, mean, std, median
1.1325 input, max, min, mean, std, median
Current version: max, min, mean, std, median