<a href="https://www.kaggle.com/code/shokhjahonisroilov/radar-best-score?scriptVersionId=295650978" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from collections import Counter
DATASET = '/kaggle/input/radar-ioai-2025'
DATA_PATH = f'{DATASET}/training_set/training_set'
TEST_DATA_PATH = f'{DATASET}/test_set/test_set'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
seed = 42
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, file_paths):
        self.file_paths = file_paths
        self.static_idx = torch.tensor([0,2,4])
        self.dynamic_idx = torch.tensor([1,3,5])


    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        data = torch.load(self.file_paths[idx])

        imgs_in  = data[:6].float()                 # [6, H, W] in original interleaved order
        static3  = imgs_in.index_select(0, self.static_idx)   # [3, H, W]
        dynamic3 = imgs_in.index_select(0, self.dynamic_idx)  # [3, H, W]
        images   = torch.cat([static3, dynamic3], dim=0)

        labels = data[6]

        images = images.float()
        labels = labels.long()

        return images, labels

class TrainDataset(CustomDataset):
    def __getitem__(self, idx):
        images, labels = super().__getitem__(idx)

        if random.random() < 0.5:
            images[0] = images[0].flip(-1)
            images[1] = images[1].flip(-1)
            labels = labels.flip(-1)

        return images, labels

file_paths = [f'{DATA_PATH}/{file}' for file in os.listdir(DATA_PATH) if file.endswith('.mat.pt')]

train_dataset = TrainDataset(file_paths=file_paths)

train_loader = torch.utils.data.DataLoader(
    dataset = train_dataset,
    shuffle = True,
    batch_size = 8,
    num_workers = 2,
    drop_last = True,
    pin_memory = True
)

In [None]:
file_paths

In [None]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.s = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size= 3, padding='same'),
            nn.BatchNorm2d(out_channels),
            nn.GELU(),
            nn.Dropout2d(0.3),
            nn.Conv2d(out_channels, out_channels, kernel_size= 3, padding='same'),
            nn.BatchNorm2d(out_channels),
            nn.GELU(),
        )
    def forward(self, x):
        return self.s(x)

In [None]:
class ResBlock2d(nn.Module):
  def __init__(self, in_channels, out_channels,res_scale=1.0):
    super().__init__()
    self.proj = nn.Conv2d(in_channels, out_channels, 1, bias=False)
    self.res_scale = res_scale

    self.block = DoubleConv(in_channels, out_channels)

  def forward(self, x):
    y = self.block(x)
    s = self.proj(x)
    return F.relu(s + self.res_scale * y, inplace=True)

In [None]:
class Encode(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = ResBlock2d(in_channels, out_channels)
        self.pool = nn.MaxPool2d(2)
        self.dropout = nn.Dropout2d(0.3)
    def forward(self, x):
        x = self.conv(x)
        p = self.pool(x)
        p = self.dropout(p)
        return x, p

In [None]:
class AttentionGate(nn.Module):
    def __init__(self, F_g, F_l, F_int):
        super().__init__()
        self.W_g = nn.Sequential(
            nn.Conv2d(F_g, F_int, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(F_int)
        )
        self.W_x = nn.Sequential(
            nn.Conv2d(F_l, F_int, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(F_int)
        )
        self.psi = nn.Sequential(
            nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True),
            nn.BatchNorm2d(1),
            nn.Sigmoid()
        )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, g, x):  # g = decoder feature, x = encoder skip feature
        g1 = self.W_g(g)
        x1 = self.W_x(x)
        psi = self.relu(g1 + x1)
        psi = self.psi(psi)
        return x * psi  # element-wise gating


In [None]:
class Decode(nn.Module):
    def __init__(self, in_channels, skip, out_channels, pad):
        super().__init__()
        # F_g must be out_channels, not in_channels
        self.attn = AttentionGate(F_g=out_channels, F_l=skip, F_int=skip // 2)

        self.unpool = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2, output_padding=pad)
        self.dropout = nn.Dropout2d(0.3)
        self.conv = ResBlock2d(out_channels + skip, out_channels)

    def forward(self, x, skip):
        x = self.unpool(x)
        skip = self.attn(x, skip)
        x = torch.cat([x, skip], dim=1)
        x = self.dropout(x)
        x = self.conv(x)
        return x

In [None]:
class DuoUNet(nn.Module):
    def __init__(self, in_channels=6, num_filters=32, num_classes=5):
        super().__init__()
        
        # --- Helper for Fusion ---
        # Fuses 2 inputs of size 'ch' into 1 output of size 'ch'
        def fuse_block(ch): 
            return nn.Conv2d(ch * 2, ch, kernel_size=1, bias=False)

        # --- Encoders (Static & Dynamic) ---
        # Assuming Encode returns (skip, downsampled)
        # Level 1: 3 -> 32
        self.enc_s1 = Encode(3, num_filters)
        self.enc_d1 = Encode(3, num_filters)
        self.fuse1 = fuse_block(num_filters)

        # Level 2: 32 -> 64
        self.enc_s2 = Encode(num_filters, num_filters * 2)
        self.enc_d2 = Encode(num_filters, num_filters * 2)
        self.fuse2 = fuse_block(num_filters * 2)

        # Level 3: 64 -> 128
        self.enc_s3 = Encode(num_filters * 2, num_filters * 4)
        self.enc_d3 = Encode(num_filters * 2, num_filters * 4)
        self.fuse3 = fuse_block(num_filters * 4)

        # Level 4: 128 -> 256
        self.enc_s4 = Encode(num_filters * 4, num_filters * 8)
        self.enc_d4 = Encode(num_filters * 4, num_filters * 8)
        self.fuse4 = fuse_block(num_filters * 8)

        # --- The Bottleneck Fusion ---
        # We must fuse the final *downsampled* outputs (xs, xd), not just the skips.
        self.fuse_bottleneck = fuse_block(num_filters * 8)

        # --- Bridge ---
        # 256 -> 512 (Standard U-Net doubles channels at bridge)
        self.bridge = DoubleConv(num_filters * 8, num_filters * 16)

        # --- Decoder ---
        # Note: Input channels = (Upsampled Input) + (Skip Connection)
        
        # Up 1: Input (Bridge 512) + Skip x4 (Fused 256) -> Output 256
        self.up1 = Decode(num_filters * 16, num_filters * 8, num_filters * 8, (0, 0))
        
        # Up 2: Input (Up1 256) + Skip x3 (Fused 128) -> Output 128
        self.up2 = Decode(num_filters * 8, num_filters * 4, num_filters * 4, (0, 1))
        
        # Up 3: Input (Up2 128) + Skip x2 (Fused 64) -> Output 64
        self.up3 = Decode(num_filters * 4, num_filters * 2, num_filters * 2, (1, 0))
        
        # Up 4: Input (Up3 64) + Skip x1 (Fused 32) -> Output 64 (or 32 depending on preference)
        self.up4 = Decode(num_filters * 2, num_filters, num_filters, (0, 1))

        self.out_conv = nn.Conv2d(num_filters, num_classes, kernel_size=1)

    def forward(self, x):
        # 1. Split Input
        xs_in = x[:, [0, 2, 4], :, :]
        xd_in = x[:, [1, 3, 5], :, :]

        # 2. Encoders
        # Level 1
        x1s, xs = self.enc_s1(xs_in)
        x1d, xd = self.enc_d1(xd_in)
        x1 = self.fuse1(torch.cat([x1s, x1d], dim=1)) # Fused Skip 1

        # Level 2
        x2s, xs = self.enc_s2(xs)
        x2d, xd = self.enc_d2(xd)
        x2 = self.fuse2(torch.cat([x2s, x2d], dim=1)) # Fused Skip 2

        # Level 3
        x3s, xs = self.enc_s3(xs)
        x3d, xd = self.enc_d3(xd)
        x3 = self.fuse3(torch.cat([x3s, x3d], dim=1)) # Fused Skip 3

        # Level 4
        x4s, xs = self.enc_s4(xs)
        x4d, xd = self.enc_d4(xd)
        x4 = self.fuse4(torch.cat([x4s, x4d], dim=1)) # Fused Skip 4

        # 3. Bottleneck
        # FIX: We now fuse the actual downsampled outputs (xs, xd)
        x_bottleneck = self.fuse_bottleneck(torch.cat([xs, xd], dim=1))
        
        # 4. Bridge
        x = self.bridge(x_bottleneck)

        # 5. Decoder (Pass the fused skips)
        x = self.up1(x, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)

        return self.out_conv(x)

In [None]:
class GroupedStem(nn.Module):
    def __init__(self, in_channels=6, out_channels=32, groups=3):
        super().__init__()
        mid = out_channels * groups
        self.net = nn.Sequential(
            nn.Conv2d(in_channels, mid, 3, padding=1, groups=groups, bias=False),
            nn.BatchNorm2d(mid),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
     return self.net(x)

In [None]:
class TriNet(nn.Module):
    def __init__(self, in_channels=6, num_filters=32, num_classes=5):
        super().__init__()
        self.stem = GroupedStem(in_channels, num_filters, groups=3)
        
        # --- Encoder ---
        # 32 -> 64
        self.down1 = Encode(num_filters, num_filters * 2) 
        # 64 -> 128
        self.down2 = Encode(num_filters * 2, num_filters * 4)
        # 128 -> 256
        self.down3 = Encode(num_filters * 4, num_filters * 8)
        
        # --- Bridge ---
        # 256 -> 512
        self.bridge = DoubleConv(num_filters * 8, num_filters * 16)
        
        # --- Decoder ---
        # Up 1: Input 512 (Bridge) + Skip 256 (x3) -> Output 256
        self.up1 = Decode(num_filters * 16, num_filters * 8, num_filters * 8, (0, 1))
        
        # Up 2: Input 256 + Skip 128 (x2) -> Output 128
        self.up2 = Decode(num_filters * 8, num_filters * 4, num_filters * 4, (1, 0))
        
        # Up 3: Input 128 + Skip 64 (x1) -> Output 32 (Matches num_filters)
        self.up3 = Decode(num_filters * 4, num_filters * 2, num_filters, (0, 1))
        
        # --- Output ---
        # Now accepts 32 channels, outputs 2 classes (logits)
        self.out_conv = nn.Conv2d(num_filters, num_classes, 1)

    def forward(self, x):
        x0 = self.stem(x)         # H
        x1, x = self.down1(x0)    # H/2 (x1 is H)
        x2, x = self.down2(x)     # H/4
        x3, x = self.down3(x)     # H/8
        
        x = self.bridge(x)        # H/8
        
        x = self.up1(x, x3)       # H/4
        x = self.up2(x, x2)       # H/2
        x = self.up3(x, x1)       # H (Matches x1 resolution)
        
        return self.out_conv(x)   # Output: (B, num_classes, H, W)

In [None]:
class MegaEnsemble(nn.Module):
    def __init__(self, num_filters=32, num_classes=5):
        super().__init__()
        self.tri = TriNet(num_filters=num_filters, num_classes=num_classes)
        self.double_net = DuoUNet(num_filters=num_filters, num_classes=num_classes)

    def forward(self, x):
        out_tri = self.tri(x)        # logits from geometry-grouped
        out_double = self.double_net(x)  # logits from modality-grouped
        return (out_tri + out_double) / 2  # average logits

In [None]:
class_weights = torch.tensor([1.0, 50.0, 50.0, 50.0, 50.0]).to(device)

def train(model):

    model.train()
    model.to(device)

    loss_fn = nn.CrossEntropyLoss(
        reduction='mean',
        weight = class_weights,
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    warmup = 1
    epochs = 19

    main_sch = torch.optim.lr_scheduler.LinearLR(
        optimizer, start_factor=1.0, end_factor=0.0,
        total_iters=epochs * len(train_loader)
    )

    warmup_sch = torch.optim.lr_scheduler.LinearLR(
        optimizer, start_factor=0.01, end_factor=1.0,
        total_iters=warmup * len(train_loader)
    )

    scheduler = torch.optim.lr_scheduler.SequentialLR(
        optimizer,
        schedulers=[warmup_sch, main_sch],
        milestones=[warmup * len(train_loader)]
    )

    scaler = torch.amp.GradScaler(device)

    for epoch in range(warmup + epochs):
        model.train()

        running_loss = 0.0
        weighted_correct_sum = 0.0
        weight_sum = 0.0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            y += 1

            optimizer.zero_grad(set_to_none=True)

            with torch.amp.autocast(device):
                outputs = model(x)
                loss = loss_fn(outputs, y)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()

            running_loss += loss.item()

            # ---- FIXED WEIGHTED ACCURACY ----
            preds = outputs.argmax(dim=1)
            sample_weights = class_weights[y]
            correct = (preds == y).float()

            weighted_correct_sum += (correct * sample_weights).sum().item()
            weight_sum += sample_weights.sum().item()

        running_loss /= len(train_loader)
        train_acc = 100.0 * weighted_correct_sum / weight_sum

        print(f"Epoch {epoch+1}")
        print(f"Train Loss: {running_loss:.4f}")
        print(f"Train Accuracy (weighted): {train_acc:.2f}")

    return model, running_loss

In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.models = nn.ModuleList(MegaEnsemble() for _ in range(5))

    def forward(self, x):
        return sum(model(x) for model in self.models) / len(self.models)

In [None]:
model = MyModel().to(device)
model, loss = train(model)

In [None]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, file_paths):
        self.file_paths = file_paths
        self.static_idx = torch.tensor([0, 2, 4])
        self.dynamic_idx = torch.tensor([1, 3, 5])

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        path = self.file_paths[idx]
        data = torch.load(path)

        imgs_in = data[:6].float()
        static3 = imgs_in.index_select(0, self.static_idx)
        dynamic3 = imgs_in.index_select(0, self.dynamic_idx)
        images = torch.cat([static3, dynamic3], dim=0)

        filename = os.path.basename(path)
        return images, filename



test_file_paths = [
    f"{TEST_DATA_PATH}/{file}"
    for file in os.listdir(TEST_DATA_PATH)
    if file.endswith(".mat.pt")
]

test_dataset = TestDataset(file_paths=test_file_paths)

test_loader = torch.utils.data.DataLoader(
    dataset=TestDataset(test_file_paths),
    batch_size=8,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

In [None]:
def run_inference(model, data_loader):
    model.eval()
    model.to(device)

    predictions = []
    filenames = []

    with torch.no_grad():
        for images, file_names in data_loader:
            images = images.to(device)

            outputs = model(images)

            # shifted back: [0..4] â†’ [-1..3]
            preds = outputs.argmax(dim=1) - 1

            for i in range(len(preds)):
                predictions.append(preds[i].cpu().numpy())
                filenames.append(file_names[i])

    return predictions, filenames

In [None]:
test_predictions, test_filenames = run_inference(model, test_loader)

rows = []
for fname, pred in zip(test_filenames, test_predictions):
    row = {"filename": fname}
    for i, p in enumerate(pred.flatten()):
        row[f"pixel_{i}"] = p
    rows.append(row)

submission = pd.DataFrame(rows)
submission.to_csv("final.csv", index=False)
