In [1]:
!pip install linformer

Collecting linformer
  Downloading linformer-0.2.3-py3-none-any.whl.metadata (602 bytes)
Downloading linformer-0.2.3-py3-none-any.whl (6.2 kB)
Installing collected packages: linformer
Successfully installed linformer-0.2.3


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/pathfinder-medium-integer/merged_data.h5
/kaggle/input/pathfinder-hard-integer/merged_data.h5
/kaggle/input/pathfinder-easy-integer/merged_data.h5


In [3]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import pytorch_lightning as pl
from linformer import Linformer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from torchmetrics.functional import accuracy
from torchmetrics import Precision, Recall
import h5py

torch.manual_seed(19)

<torch._C.Generator at 0x79550c4044d0>

In [4]:
# Dataset
class SequenceDataset(Dataset):
    def __init__(self, h5_file):
        self.h5_file = h5py.File(h5_file, "r")
        self.images = self.h5_file["images"]  # Shape: (N, 32, 32)
        self.labels = self.h5_file["labels"]  # Shape: (N,)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # Get image and reshape to a sequence of 1024 pixels
        image = self.images[idx]  # Shape: (32, 32)
        image = image.flatten()  # Shape: (1024,)
        image = torch.tensor(image, dtype=torch.int).unsqueeze(-1)  # Shape: (1024, 1)
        
        # Get label
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        
        return image, label

In [5]:
class TemporalBlock(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size=3, stride=1, dilation=1, padding=1, dropout=0.2):
        super(TemporalBlock, self).__init__()

        self.padding = padding if padding > 0 else None
        # First convolution
        self.conv1 = nn.Conv1d(input_dim, output_dim, kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.norm1 = nn.BatchNorm1d(output_dim)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        # Second convolution
        self.conv2 = nn.Conv1d(output_dim, output_dim, kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.norm2 = nn.BatchNorm1d(output_dim)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        # Residual connection
        self.downsample = nn.Conv1d(input_dim, output_dim, kernel_size=1) if input_dim != output_dim else None
        self.relu = nn.ReLU()

    def forward(self, x):
        # First convolution block
        out = self.conv1(x)
        if self.padding is not None:
            out = out[:,:,:-self.padding]
        out = self.norm1(out)
        out = self.relu1(out)
        out = self.dropout1(out)
        
        # Second convolution block
        out = self.conv2(out)
        if self.padding is not None:
            out = out[:,:,:-self.padding]
        out = self.norm2(out)
        out = self.relu2(out)
        out = self.dropout2(out)
        
        # Residual connection
        res = x if self.downsample is None else self.downsample(x)

        return self.relu(out + res)

class TCN(nn.Module):
    def __init__(self, vocab_size=256, embedding_dim=64, sequence_length=1024, num_channels=64, num_layers=9, kernel_size=3, output_dim=1, dropout=0.2):
        super(TCN, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)
        
        # Temporal Convolutional layers
        layers = []
        for i in range(num_layers):
            dilation = 2 ** i
            in_channels = embedding_dim if i == 0 else num_channels
            layers.append(
                TemporalBlock(in_channels, num_channels, kernel_size, stride=1, dilation=dilation, padding=(kernel_size - 1) * dilation, dropout=dropout)
            )
        self.tcn = nn.Sequential(*layers)
        
        # Global Average Pooling layer
        self.gap = nn.AdaptiveAvgPool1d(1)  # Reduces sequence length to 1
        
        # Fully connected layer
        self.fc = nn.Sequential(
            nn.Linear(num_channels, 128),  # Smaller fully connected layer
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, output_dim)  # Output layer
        )

    def forward(self, x):
        # Embed the input
        x = self.embedding(x).squeeze(2)  # [batch_size, seq_len, embedding_dim]
        x = x.permute(0, 2, 1)  # Change to [batch_size, embedding_dim, seq_len]
        
        # Apply TCN
        x = self.tcn(x)
        
        # Global Average Pooling
        x = self.gap(x).squeeze(-1)  # [batch_size, num_channels]
        
        # Fully connected layer
        x = self.fc(x)
        return x


In [6]:
class LinformerWrapper(nn.Module):
    def __init__(self, vocab_size=256, embedding_dim=64, seq_len=1024, depth=4, heads=2, k=128, dropout=0.2, output_dim=1):
        super(LinformerWrapper, self).__init__()

         # Embedding layer
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)
        
        # Linformer as the backbone
        self.linformer = Linformer(
            dim=embedding_dim,
            seq_len=seq_len,
            depth=depth,
            heads=heads,
            k=k,
            dropout=dropout,
        )
        # Fully connected layer for classification
        self.fc = nn.Linear(embedding_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x).squeeze(2)
        # Pass through Linformer
        x = self.linformer(x)
        # Apply global average pooling
        x = x.mean(dim=1)  # [batch_size, seq_len, dim] -> [batch_size, dim]
        # Classification
        x = self.fc(x)
        return x


In [7]:
class SequenceClassifier(pl.LightningModule):
    def __init__(self, model, learning_rate=1e-3):
        super().__init__()
        self.model = model
        self.loss_fn = nn.BCEWithLogitsLoss()
        self.learning_rate = learning_rate

        self.precision = Precision(task='binary')
        self.recall = Recall(task='binary')

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        sequences, labels = batch
        outputs = self.model(sequences).squeeze(1)  # Shape: [batch_size]
        loss = self.loss_fn(outputs, labels)
        self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
        
         # Log learning rate (retrieved from the optimizer)
        optimizer = self.trainer.optimizers[0]
        current_lr = optimizer.param_groups[0]['lr']
        self.log("learning_rate", current_lr, on_step=False, on_epoch=True, prog_bar=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        sequences, labels = batch
        outputs = self.model(sequences).squeeze(1)
        loss = self.loss_fn(outputs, labels)
        preds = torch.sigmoid(outputs) > 0.5
        acc = accuracy(preds, labels.int(), task="binary")
        prec = self.precision(preds, labels.int())
        rec = self.recall(preds, labels.int())
        self.log("val_loss", loss, on_epoch=True, prog_bar=True)
        self.log("val_acc", acc, on_epoch=True, prog_bar=True)
        self.log("val_precision", prec, on_epoch=True, prog_bar=True)
        self.log("val_recall", rec, on_epoch=True, prog_bar=True)
        
        return loss

    def configure_optimizers(self):
        # Define optimizer
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        
        # Define ReduceLROnPlateau scheduler
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode="min",            # Minimize the monitored metric
            patience=3,            # Number of epochs to wait before reducing LR
            factor=0.1,            # Factor by which to reduce LR
            verbose=True,          # Print messages when LR is reduced
            threshold=0.0001,      # Minimum change to qualify as an improvement
            cooldown=1             # Number of epochs to wait before resuming LR reduction
        )

        # Return as a dictionary
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "interval": "epoch",      # Check metric every epoch
                "monitor": "val_loss",   # Metric to monitor
            },
        }


In [8]:
def train_and_evaluate(
    model,
    train_loader,
    val_loader,
    max_epochs=100,
    learning_rate=1e-3,
    early_stop_patience=5,
    log_dir="logs/",
    model_name="model",  # Pass the model name for logging
    dataset_stage="stage",  # Pass the dataset stage (easy, medium, hard, etc.)
):
    # Create a unique name for the logger based on the model and stage
    logger_name = f"{model_name}_{dataset_stage}"
    
    # TensorBoard logger
    tb_logger = TensorBoardLogger(save_dir=log_dir, name=logger_name)

    # Callbacks
    early_stopping = EarlyStopping(
        monitor="val_loss", patience=early_stop_patience, mode="min", verbose=True
    )
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    # PyTorch Lightning Trainer
    trainer = pl.Trainer(
        max_epochs=max_epochs,
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1 if torch.cuda.is_available() else None,
        log_every_n_steps=1,
        callbacks=[early_stopping, lr_monitor],
        logger=[tb_logger],  # Use the specific logger for this model and stage
    )

    # Initialize the SequenceClassifier with the provided model
    pl_module = SequenceClassifier(model, learning_rate)

    # Train and validate the model
    trainer.fit(pl_module, train_loader, val_loader)

In [9]:
# Paths for the datasets
easy_h5 = "/kaggle/input/pathfinder-easy-integer/merged_data.h5"
medium_h5 = "/kaggle/input/pathfinder-medium-integer/merged_data.h5"
hard_h5 = "/kaggle/input/pathfinder-hard-integer/merged_data.h5"

# Function to create train and validation datasets
def create_dataloaders(h5_file, batch_size=256, train_ratio=0.8):
    dataset = SequenceDataset(h5_file)
    total_size = len(dataset)
    train_size = int(total_size * train_ratio)
    val_size = total_size - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=3)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=3)

    return train_loader, val_loader
    

# Training models with progressive steps
def train_progressive(model_classes, dataset_paths, log_dir="logs/", models_dir="./models/"):
    """
    Args:
        model_classes: List of tuples (model_type, model_class) to train.
        dataset_paths: List of tuples (dataset_path, stage) for progressive training.
        log_dir: Directory to save logs.
        models_dir: Directory to save models.
    """
    os.makedirs(models_dir, exist_ok=True)

    for model_type, model_class in model_classes:
        print(f"\nStarting training for model type: {model_type.upper()}")

        previous_model_path = None
        for i, (dataset_path, stage) in enumerate(dataset_paths):
            print(f"\nTraining on {stage.upper()} dataset...")

            # Create data loaders
            train_loader, val_loader = create_dataloaders(dataset_path)

            # Initialize model
            model = model_class()
            if previous_model_path and i > 0:  # Fine-tuning
                model.load_state_dict(torch.load(previous_model_path))
                print(f"Loaded weights from {previous_model_path} for fine-tuning.")

            # Train and evaluate
            train_and_evaluate(
                model=model,
                train_loader=train_loader,
                val_loader=val_loader,
                max_epochs=15,
                learning_rate=1e-3,
                early_stop_patience=5,
                log_dir=log_dir,
                model_name=model_type,
                dataset_stage=stage,
            )

            # Save the model
            save_path = os.path.join(models_dir, f"{model_type}_{stage}.pth")
            torch.save(model.state_dict(), save_path)
            print(f"Model saved at: {save_path}")

            # Update for the next stage
            previous_model_path = save_path

            del model  # Remove the current model
            torch.cuda.empty_cache()  # Free up all cached memory

In [10]:
# Define model types and datasets
model_classes = [
    ("Linformer", LinformerWrapper),
    ("TCN", TCN),
]

datasets = [
    (easy_h5, "easy"),
    (medium_h5, "medium"),
    (hard_h5, "hard"),
]

# Call the training function for all models
train_progressive(model_classes, datasets)



Starting training for model type: LINFORMER

Training on EASY dataset...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Training: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Model saved at: ./models/Linformer_easy.pth

Training on MEDIUM dataset...
Loaded weights from ./models/Linformer_easy.pth for fine-tuning.


  model.load_state_dict(torch.load(previous_model_path))


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Model saved at: ./models/Linformer_medium.pth

Training on HARD dataset...
Loaded weights from ./models/Linformer_medium.pth for fine-tuning.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Model saved at: ./models/Linformer_hard.pth

Starting training for model type: TCN

Training on EASY dataset...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Model saved at: ./models/TCN_easy.pth

Training on MEDIUM dataset...
Loaded weights from ./models/TCN_easy.pth for fine-tuning.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Model saved at: ./models/TCN_medium.pth

Training on HARD dataset...
Loaded weights from ./models/TCN_medium.pth for fine-tuning.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Model saved at: ./models/TCN_hard.pth
