In [3]:
!pip install --upgrade pip

Collecting pip
  Downloading pip-25.0.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-25.0.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.0.1


In [4]:
!pip cache purge

Files removed: 84 (1695.3 MB)


In [1]:
!wget 'https://cernbox.cern.ch/remote.php/dav/public-files/e3pqxcIznqdYyRv/Dataset_Specific_Unlabelled.h5'

--2025-03-30 02:38:39--  https://cernbox.cern.ch/remote.php/dav/public-files/e3pqxcIznqdYyRv/Dataset_Specific_Unlabelled.h5
Resolving cernbox.cern.ch (cernbox.cern.ch)... 137.138.120.151, 128.142.53.28, 128.142.53.35, ...
Connecting to cernbox.cern.ch (cernbox.cern.ch)|137.138.120.151|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 30000002048 (28G) [application/octet-stream]
Saving to: ‘Dataset_Specific_Unlabelled.h5’


2025-03-30 03:05:18 (17.9 MB/s) - ‘Dataset_Specific_Unlabelled.h5’ saved [30000002048/30000002048]



In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.optim import SGD,Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
import h5py
from torch.utils.data import DataLoader
import numpy as np

In [4]:
def explore_h5_file(file_path):
    with h5py.File(file_path, 'r') as f:
        print(f"Top-level keys: {list(f.keys())}")

        def explore_group(group, prefix=""):
            for key in group.keys():
                item = group[key]
                path = f"{prefix}/{key}" if prefix else key

                if isinstance(item, h5py.Group):
                    print(f"GROUP: {path}")
                    explore_group(item, path)
                elif isinstance(item, h5py.Dataset):
                    shape = item.shape
                    dtype = item.dtype

                    print(f"DATASET: {path}")
                    print(f"  Shape: {shape}")
                    print(f"  Data type: {dtype}")


                    try:
                        sample = item[0]
                        if isinstance(sample, np.ndarray):
                            print(f"  Sample shape: {sample.shape}")
                            print(f"  Sample min/max: {sample.min()}/{sample.max()}")
                        else:
                            print(f"  Sample value: {sample}")
                    except Exception as e:
                        print(f"  Error sampling data: {e}")

                    if len(item.attrs) > 0:
                        print(f"  Attributes: {list(item.attrs.keys())}")

                    print("-" * 40)

        explore_group(f)


explore_h5_file('Dataset_Specific_labelled.h5')

Top-level keys: ['Y', 'jet']
DATASET: Y
  Shape: (10000, 1)
  Data type: float32
  Sample shape: (1,)
  Sample min/max: 1.0/1.0
----------------------------------------
DATASET: jet
  Shape: (10000, 125, 125, 8)
  Data type: float32
  Sample shape: (125, 125, 8)
  Sample min/max: 0.0/255.0
----------------------------------------


In [4]:
import torch.multiprocessing as mp
mp.set_start_method('fork', force=True)

In [5]:
class H5Dataset(torch.utils.data.Dataset):
    def __init__(self, file_path, transform=None):
        self.file_path = file_path
        self.transform = transform

        with h5py.File(file_path, 'r') as f:
            self.data_key = 'jet'
            self.length = len(f[self.data_key])

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        with h5py.File(self.file_path, 'r') as f:
            data = f[self.data_key][index]

        if data.shape[2] == 8:
            rgb_data = data[:, :, :3]
        else:
            rgb_data = data

        if rgb_data.max() > 1.0:
            rgb_data = rgb_data / 255.0

        if self.transform:
            if isinstance(self.transform, SimCLRDataTransform):
                view1 = self.transform(rgb_data)
                view2 = self.transform(rgb_data)
                return (view1, view2), -1
            else:
                transformed_data = self.transform(rgb_data)
                return transformed_data, -1

        tensor_data = torch.tensor(rgb_data, dtype=torch.float32).permute(2, 0, 1)
        return tensor_data, -1

In [25]:
class Resnet18Backbone(nn.Module):
    def __init__(self, num_classes=1000, backbone_only=False):
        super(Resnet18Backbone, self).__init__()
        self.backbone_only = backbone_only

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64, 64, 2)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)
        self.layer4 = self._make_layer(256, 512, 2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        if not backbone_only:
            self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, in_channels, out_channels, blocks, stride=1):
        layers = []
        layers.append(self._block(in_channels, out_channels, stride))
        for _ in range(1, blocks):
            layers.append(self._block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def _block(self, in_channels, out_channels, stride=1):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)

        if not self.backbone_only:
            x = self.fc(x)

        return x


In [21]:
class SimCLR(nn.Module):
  def __init__(self, feature_dim=128):
    super(SimCLR, self).__init__()

    self.encoder = Resnet18Backbone(backbone_only=True)
    self.projection_head = nn.Sequential(
    nn.Linear(512, 1024),
    nn.ReLU(inplace=True),
    nn.Linear(1024, feature_dim)
  )

  def forward(self,x):
    h = self.encoder(x)
    z = self.projection_head(h)
    return F.normalize(z, dim=1)

  def get_encoder(self):
    return self.encoder

In [9]:

# class SimCLRDataTransform:
#     def __init__(self, size=224):
#         # self.transform = transforms.Compose([
#         #     transforms.RandomResizedCrop(size=size),
#         #     transforms.RandomHorizontalFlip(),
#         #     transforms.RandomApply([
#         #         transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)
#         #     ], p=0.8),
#         #     transforms.RandomGrayscale(p=0.2),
#         #     transforms.GaussianBlur(kernel_size=int(0.1 * size)),
#         #     transforms.ToTensor(),
#         #     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#         # ])
#         self.transform = transforms.Compose([
#             transforms.RandomResizedCrop(size=224, scale=(0.2, 1.0)),
#             transforms.RandomHorizontalFlip(p=0.5),
#             transforms.RandomApply([transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8),
#             transforms.RandomGrayscale(p=0.2),
#             transforms.GaussianBlur(kernel_size=23, sigma=(0.1, 2.0)),
#             transforms.ToTensor(),
#             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#         ])
#     def __call__(self, x):
#       return self.transform(x), self.transform(x)

from PIL import Image

class SimCLRDataTransform:
    def __init__(self, size=224):
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomResizedCrop(size=size),
            transforms.RandomHorizontalFlip(),
            transforms.RandomApply([
                transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)
            ], p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.GaussianBlur(kernel_size=23, sigma=(0.1, 2.0)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __call__(self, x):
        return self.transform(x), self.transform(x)

In [28]:
class NTXentLoss(nn.Module):
    def __init__(self, temperature=0.15):
        super(NTXentLoss, self).__init__()
        self.temperature = temperature
        self.criterion = nn.CrossEntropyLoss(reduction='sum')
        self.similarity_f = nn.CosineSimilarity(dim=2)

    def _get_mask(self, batch_size):
        mask = torch.zeros((2 * batch_size, 2 * batch_size))
        for i in range(batch_size):
            mask[i, batch_size + i] = 1
            mask[batch_size + i, i] = 1
        mask = mask.fill_diagonal_(0)
        return mask

    def forward(self, z_i, z_j):
        batch_size = z_i.shape[0]

        mask = self._get_mask(batch_size).to(z_i.device)

        representations = torch.cat([z_i, z_j], dim=0)

        similarity_matrix = torch.matmul(representations, representations.T) / self.temperature
        sim_i_j = torch.diag(similarity_matrix, batch_size)
        sim_j_i = torch.diag(similarity_matrix, -batch_size)

        positive_pairs = torch.cat([sim_i_j, sim_j_i], dim=0)

        labels = torch.zeros(2 * batch_size).to(positive_pairs.device).long()

        similarity_matrix = similarity_matrix * mask

        logits_max, _ = torch.max(similarity_matrix, dim=1, keepdim=True)
        logits = similarity_matrix - logits_max.detach()

        exp_logits = torch.exp(logits)
        log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True))

        mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)

        loss = -mean_log_prob_pos.mean()
        return loss

In [41]:
import math
# def get_lr(epoch, warmup_epochs=7, max_epochs=35, initial_lr=1e-4, base_lr=1e-3):
#     if epoch < warmup_epochs:
#         return initial_lr + (base_lr - initial_lr) * epoch / warmup_epochs
#     else:
#         return base_lr * 0.5 * (1 + math.cos(math.pi * (epoch - warmup_epochs) / (max_epochs - warmup_epochs)))
def get_lr(epoch, warmup_epochs=5, max_epochs=35, initial_lr=1e-5, base_lr=5e-3, min_lr=1e-4):
    if epoch < warmup_epochs:

        return initial_lr + (base_lr - initial_lr) * epoch / warmup_epochs
    else:

        cosine_factor = 0.5 * (1 + math.cos(math.pi * (epoch - warmup_epochs) / (max_epochs - warmup_epochs)))
        return min_lr + (base_lr - min_lr) * cosine_factor


In [42]:
from torch.cuda.amp import autocast, GradScaler
def train_simclr(model, data_loader, optimizer, epochs=100, device='cuda'):
    criterion = NTXentLoss(temperature=0.20)
    model = model.to(device)
    scheduler = CosineAnnealingLR(optimizer, T_max=epochs)

    scaler = GradScaler()

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0

        current_lr = get_lr(epoch, warmup_epochs=5, max_epochs=epochs, initial_lr=1e-5, base_lr=5e-3)
        for param_group in optimizer.param_groups:
          param_group['lr'] = current_lr

        print(f"Epoch [{epoch+1}/{epochs}], Learning Rate: {current_lr:.6f}")

        for batch in data_loader:
            views, _ = batch


            # x_i = torch.stack([view[0] for view in views]).to(device)
            # x_j = torch.stack([view[1] for view in views]).to(device)

            # z_i = model(x_i)
            # z_j = model(x_j)
            x_i = torch.stack([view[0] for view in views])
            x_j = torch.stack([view[1] for view in views])

            x_i = x_i.view(-1, 3, 224, 224).to(device)
            x_j = x_j.view(-1, 3, 224, 224).to(device)


            with autocast():
                z_i = model(x_i)
                z_j = model(x_j)
                loss = criterion(z_i, z_j)


            # loss = criterion(z_i, z_j)
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()


        scheduler.step()



        #     optimizer.zero_grad()
        #     loss.backward()
        #     optimizer.step()

        #     total_loss += loss.item()


        # scheduler.step()


        avg_loss = total_loss / len(data_loader)
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")


        if (epoch + 1) % 10 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': avg_loss,
            }, f'simclr_resnet15_epoch_{epoch+1}.pt')


    torch.save(model.state_dict(), 'simclr_resnet15_final.pt')
    return model


In [57]:
class ClassificationModel(nn.Module):
    def __init__(self, encoder, num_classes, feature_dim=1000):
        super(ClassificationModel, self).__init__()
        self.encoder = encoder

        self.classifier = nn.Sequential(
            nn.Linear(1000, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):

        features = self.encoder(x)
        return self.classifier(features)

In [64]:
class RegressionModel(nn.Module):
    def __init__(self, encoder, output_dim=1, feature_dim=1000):
        super(RegressionModel, self).__init__()
        self.encoder = encoder

        for param in self.encoder.parameters():
            param.requires_grad = False

        self.regressor = nn.Sequential(
            nn.Linear(feature_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        features = self.encoder(x)
        return self.regressor(features)

In [None]:
# class SimCLRDataTransform:
#     def __init__(self, size=224):
#         self.transform = transforms.Compose([
#             transforms.ToPILImage(),  # Convert numpy array to PIL Image
#             transforms.Resize(size),  # Resize to desired size
#             transforms.RandomResizedCrop(size=size),
#             transforms.RandomHorizontalFlip(),
#             transforms.RandomApply([
#                 transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)
#             ], p=0.8),
#             transforms.RandomGrayscale(p=0.2),
#             transforms.GaussianBlur(kernel_size=23, sigma=(0.1, 2.0)),
#             transforms.ToTensor(),
#             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#         ])

#     def __call__(self, x):
#         return self.transform(x)

In [None]:
# def main():
#     # Initialize model
#     model = SimCLR(feature_dim=128)

#     # Setup data loading
#     transform = SimCLRDataTransform(size=224)
#     dataset = H5Dataset('Dataset_Specific_Unlabelled.h5', transform=transform)

#     # Start with num_workers=0 for debugging
#     data_loader = DataLoader(
#         dataset,
#         batch_size=32,  # Smaller batch size for testing
#         shuffle=True,
#         num_workers=0,  # No multiprocessing for initial testing
#         pin_memory=True
#     )

#     # Try loading a single batch to test
#     try:
#         sample_batch = next(iter(data_loader))
#         print(f"Successfully loaded a batch: {type(sample_batch)}")

#         if isinstance(sample_batch, list):
#             print("Batch is a list. Inspecting its contents:")
#             for i, item in enumerate(sample_batch):
#                 if isinstance(item, list):
#                     print(f"Item {i} is a list with {len(item)} elements")
#                     # Inspect all elements of this nested list
#                     for j, nested_item in enumerate(item):
#                         print(f"  Element {j} type: {type(nested_item)}")
#                         if hasattr(nested_item, 'shape'):
#                             print(f"  Element {j} shape: {nested_item.shape}")
#                         else:
#                             print(f"  Element {j} has no shape attribute")
#                 elif hasattr(item, 'shape'):
#                     print(f"Item {i}: Type: {type(item)}, Shape: {item.shape}")
#                 else:
#                     print(f"Item {i}: Type: {type(item)}, No shape attribute")
#         else:
#             print("Batch is not a list. Unexpected format.")
#     except Exception as e:
#         print(f"Error inspecting batch: {e}")
#         import traceback
#         traceback.print_exc()




In [43]:
def main():

    torch.manual_seed(42)
    np.random.seed(42)


    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")


    model = SimCLR(feature_dim=128)
    model = model.to(device)

    transform = SimCLRDataTransform(size=224)
    unlabeled_dataset = H5Dataset('Dataset_Specific_Unlabelled.h5', transform=transform)

    batch_size = 128
    unlabeled_loader = DataLoader(
        unlabeled_dataset,
        batch_size=batch_size,
        shuffle=True,
        pin_memory=True,
        drop_last=True,
        num_workers=6,
        prefetch_factor=4
    )

    print(f"Unlabeled dataset size: {len(unlabeled_dataset)}")
    print(f"Number of batches: {len(unlabeled_loader)}")


    try:
        sample_batch = next(iter(unlabeled_loader))
        print(f"Batch structure verification:")
        print(f"  Batch type: {type(sample_batch)}")

        views, labels = sample_batch
        print(f"  Views type: {type(views)}, Labels shape: {labels.shape}")

        if isinstance(views, list):

            if isinstance(views[0], torch.Tensor):
                print(f"  First view shape: {views[0].shape}")
                if len(views) > 1:
                    print(f"  Second view shape: {views[1].shape}")

            elif isinstance(views[0], list):
                print(f"  Views[0] is a list with {len(views[0])} elements")
                if isinstance(views[0][0], torch.Tensor):
                    print(f"  First view shape: {views[0][0].shape}")
                    if len(views[0]) > 1:
                        print(f"  Second view shape: {views[0][1].shape}")

        print("Batch structure looks correct for SimCLR training")
    except Exception as e:
        print(f"Error verifying batch structure: {e}")
        import traceback
        traceback.print_exc()

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-6)
    # optimizer = torch.optim.SGD(model.parameters(), lr=0.06, momentum=0.9, weight_decay=5e-4)
    # from torchlars import LARS
    # optimizer = LARS(torch.optim.SGD(model.parameters(), lr=1e-5, momentum=0.9))

    print("Starting SimCLR pretraining...")
    pretrained_model = train_simclr(
        model=model,
        data_loader=unlabeled_loader,
        optimizer=optimizer,
        epochs=35,
        device=device
    )

    torch.save(pretrained_model.state_dict(), 'simclr_resnet15_pretrained.pt')
    print("SimCLR pretraining completed and model saved")
    encoder = pretrained_model.get_encoder()

if __name__ == "__main__":
  main()


Using device: cuda
Model initialized
Unlabeled dataset size: 60000
Number of batches: 468
Batch structure verification:
  Batch type: <class 'list'>
  Views type: <class 'list'>, Labels shape: torch.Size([128])
  Views[0] is a list with 2 elements
  First view shape: torch.Size([128, 3, 224, 224])
  Second view shape: torch.Size([128, 3, 224, 224])
Batch structure looks correct for SimCLR training
Starting SimCLR pretraining...
Epoch [1/35], Learning Rate: 0.000010


  scaler = GradScaler()
  with autocast():


Epoch [1/35], Loss: 1.4944
Epoch [2/35], Learning Rate: 0.001008
Epoch [2/35], Loss: 1.4914
Epoch [3/35], Learning Rate: 0.002006
Epoch [3/35], Loss: 1.4914
Epoch [4/35], Learning Rate: 0.003004
Epoch [4/35], Loss: 1.4914
Epoch [5/35], Learning Rate: 0.004002
Epoch [5/35], Loss: 1.4914
Epoch [6/35], Learning Rate: 0.005000
Epoch [6/35], Loss: 1.4917
Epoch [7/35], Learning Rate: 0.004987
Epoch [7/35], Loss: 1.4919
Epoch [8/35], Learning Rate: 0.004946
Epoch [8/35], Loss: 1.4921
Epoch [9/35], Learning Rate: 0.004880
Epoch [9/35], Loss: 1.4920
Epoch [10/35], Learning Rate: 0.004788
Epoch [10/35], Loss: 1.4920
Epoch [11/35], Learning Rate: 0.004672
Epoch [11/35], Loss: 1.4920
Epoch [12/35], Learning Rate: 0.004532
Epoch [12/35], Loss: 1.4921
Epoch [13/35], Learning Rate: 0.004371
Epoch [13/35], Loss: 1.4917
Epoch [14/35], Learning Rate: 0.004189
Epoch [14/35], Loss: 1.4917
Epoch [15/35], Learning Rate: 0.003990
Epoch [15/35], Loss: 1.4916
Epoch [16/35], Learning Rate: 0.003775
Epoch [16/35

In [37]:
torch.cuda.empty_cache()


In [1]:
!wget 'https://cernbox.cern.ch/remote.php/dav/public-files/e3pqxcIznqdYyRv/Dataset_Specific_labelled.h5'

--2025-03-30 21:02:19--  https://cernbox.cern.ch/remote.php/dav/public-files/e3pqxcIznqdYyRv/Dataset_Specific_labelled.h5
Resolving cernbox.cern.ch (cernbox.cern.ch)... 137.138.120.151, 128.142.170.17, 128.142.53.28, ...
Connecting to cernbox.cern.ch (cernbox.cern.ch)|137.138.120.151|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5000042048 (4.7G) [application/octet-stream]
Saving to: ‘Dataset_Specific_labelled.h5’


2025-03-30 21:07:16 (16.2 MB/s) - ‘Dataset_Specific_labelled.h5’ saved [5000042048/5000042048]



In [5]:
encoder = torch.load('/content/simclr_resnet15_pretrained.pt')

In [8]:
import torch
import torch.nn as nn
from torch.optim import Adam

def load_pretrained_encoder(path='simclr_resnet15_pretrained.pt', device='cuda'):

    encoder = Resnet18Backbone()
    state_dict = torch.load(path, map_location=device)
    encoder.load_state_dict(state_dict)
    return encoder

In [7]:
def train_model(model, train_loader, optimizer, criterion_cls, criterion_reg, num_epochs=35, device='cuda'):
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for x, y_cls, y_reg in train_loader:
            x, y_cls, y_reg = x.to(device), y_cls.to(device), y_reg.to(device)

            optimizer.zero_grad()
            cls_pred, reg_pred = model(x)

            loss_cls = criterion_cls(cls_pred, y_cls)
            loss_reg = criterion_reg(reg_pred.squeeze(), y_reg)

            loss = loss_cls + loss_reg
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}")

In [26]:
from torch.utils.data import Dataset
class JetDataset(Dataset):
    def __init__(self, file_path, transform=None):
        self.file_path = file_path
        self.transform = transform

        with h5py.File(file_path, 'r') as f:
            self.length = len(f['jet'])

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        with h5py.File(self.file_path, 'r') as f:

            jet_img = f['jet'][idx]
            label = f['Y'][idx][0]

        if jet_img.max() > 1.0:
            jet_img = jet_img / 255.0

        jet_img = torch.tensor(jet_img, dtype=torch.float32).permute(2, 0, 1)

        if self.transform:
            jet_img = self.transform(jet_img)

        cls_label = torch.tensor(int(label), dtype=torch.long)
        reg_label = torch.tensor(label, dtype=torch.float32)

        return jet_img, cls_label, reg_label


In [89]:
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

def train_classification(model, train_loader, val_loader, num_epochs=35, device='cuda'):

    optimizer = optim.Adam([
        {'params': model.encoder.parameters(), 'lr': 1e-4},
        {'params': model.classifier.parameters(), 'lr': 1e-3}
    ])

    criterion = nn.CrossEntropyLoss()

    print("=== Phase 1: Training Classification Head Only ===")
    for param in model.encoder.parameters():
        param.requires_grad = False

    optimizer = optim.Adam(model.classifier.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)

    for epoch in range(20):
        model.train()
        total_loss = 0.0

        for x, cls_label, _ in train_loader:
            x = x.to(device)
            cls_label = cls_label.to(device)

            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, cls_label)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        scheduler.step()
        avg_loss = total_loss / len(train_loader)
        print(f"Phase 1 - Epoch [{epoch+1}/20], Loss: {avg_loss:.4f}")

        if (epoch + 1) % 5 == 0:
            evaluate_classification(model, val_loader, device)

    print("\n=== Phase 2: Fine-tuning Entire Model ===")
    for param in model.encoder.parameters():
        param.requires_grad = True

    optimizer = optim.Adam([
        {'params': model.encoder.parameters(), 'lr': 5e-5},
        {'params': model.classifier.parameters(), 'lr': 1e-3}
    ])

    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

    for epoch in range(30):
        model.train()
        total_loss = 0.0

        for x, cls_label, _ in train_loader:
            x = x.to(device)
            cls_label = cls_label.to(device)

            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, cls_label)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        scheduler.step()
        avg_loss = total_loss / len(train_loader)
        print(f"Phase 2 - Epoch [{epoch+1}/30], Loss: {avg_loss:.4f}")

        if (epoch + 1) % 5 == 0:
            evaluate_classification(model, val_loader, device)

    return model


def train_regression(model, train_loader, val_loader, num_epochs=35, device='cuda'):

    optimizer = optim.Adam([
        {'params': model.encoder.parameters(), 'lr': 1e-4},
        {'params': model.regressor.parameters(), 'lr': 1e-3}
    ])

    scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
    criterion = nn.MSELoss()
    best_val_loss = float('inf')

    for param in model.encoder.parameters():
        param.requires_grad = False

    head_optimizer = optim.Adam(model.regressor.parameters(), lr=1e-3)
    head_scheduler = CosineAnnealingLR(head_optimizer, T_max=15)

    for epoch in range(15):
        model.train()
        total_loss = 0.0

        for x, _, reg_label in train_loader:
            x = x.to(device)
            reg_label = reg_label.to(device)

            pred = model(x)
            loss = criterion(pred.squeeze(), reg_label)

            head_optimizer.zero_grad()
            loss.backward()
            head_optimizer.step()

            total_loss += loss.item()

        head_scheduler.step()
        avg_loss = total_loss / len(train_loader)
        print(f"Phase 1 - Epoch [{epoch+1}/15], Loss: {avg_loss:.4f}")

        if (epoch + 1) % 5 == 0:
            val_loss = evaluate_regression(model, val_loader, device)
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(), 'best_regression_model_phase1.pt')


    print("\nPhase 2: Fine-tuning Entire Model")
    for param in model.encoder.parameters():
        param.requires_grad = True

    full_optimizer = optim.Adam([
        {'params': model.encoder.parameters(), 'lr': 5e-5},
        {'params': model.regressor.parameters(), 'lr': 1e-3}
    ], weight_decay=1e-5)

    full_scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        full_optimizer, T_0=10, T_mult=2)

    for epoch in range(35):
        model.train()
        total_loss = 0.0

        for x, _, reg_label in train_loader:
            x = x.to(device)
            reg_label = reg_label.to(device)

            pred = model(x)
            loss = criterion(pred.squeeze(), reg_label)

            full_optimizer.zero_grad()
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            full_optimizer.step()

            total_loss += loss.item()

        full_scheduler.step()
        avg_loss = total_loss / len(train_loader)
        print(f"Phase 2 - Epoch [{epoch+1}/35], Loss: {avg_loss:.4f}")

        if (epoch + 1) % 5 == 0:
            val_loss = evaluate_regression(model, val_loader, device)
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(), 'best_regression_model.pt')

    model.load_state_dict(torch.load('best_regression_model.pt'))
    return model


def evaluate_classification(model, test_loader, device='cuda'):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for x, cls_label, _ in test_loader:
            x = x.to(device)
            cls_label = cls_label.to(device)

            outputs = model(x)
            if isinstance(outputs, tuple) or isinstance(outputs, list):
                cls_outputs = outputs[0]  # Assume first output is classification
            else:
                cls_outputs = outputs  # Model returns only classification

            _, predicted = torch.max(cls_outputs.data, 1)
            total += cls_label.size(0)
            correct += (predicted == cls_label).sum().item()

    accuracy = 100 * correct / total
    print(f"Classification Accuracy: {accuracy:.2f}%")
    return accuracy


def evaluate_regression(model, test_loader, device='cuda'):
    model.eval()
    criterion = nn.MSELoss()
    total_loss = 0.0

    with torch.no_grad():
        for x, _, reg_label in test_loader:
            x = x.to(device)
            reg_label = reg_label.to(device)

            pred = model(x)
            loss = criterion(pred.squeeze(), reg_label)
            total_loss += loss.item()

    avg_loss = total_loss / len(test_loader)
    print(f"Regression MSE: {avg_loss:.4f}")
    return avg_loss

In [35]:
def load_pretrained_encoder(path='simclr_resnet15_pretrained.pt', device='cuda'):
    encoder = Resnet18Backbone()
    state_dict = torch.load(path, map_location=device)

    new_state_dict = {}
    for key, value in state_dict.items():
        if key.startswith("encoder."):
            new_key = key[8:]
            new_state_dict[new_key] = value
        else:
            new_state_dict[key] = value

    encoder.load_state_dict(new_state_dict, strict=False)

    print("Pretrained encoder loaded successfully")
    return encoder

    return encoder

In [None]:
import matplotlib.pyplot as plt

def plot_metrics(train_metrics, val_metrics, title="Metrics", ylabel="Value", xlabel="Epochs"):
    plt.figure(figsize=(10, 5))
    plt.plot(train_metrics, label="Train")
    plt.plot(val_metrics, label="Validation")

    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend()

    plt.show()

plot_metrics(train_acc_cls, val_acc_cls, title="Classification Accuracy", ylabel="Accuracy (%)")

# For regression MSE
plot_metrics(train_mse_reg, val_mse_reg, title="Regression MSE", ylabel="MSE")

In [90]:
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using {device}")


    dataset = JetDataset('Dataset_Specific_labelled.h5')

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)


    pretrained_encoder = load_pretrained_encoder()
    pretrained_encoder = adapt_encoder_for_8_channels(pretrained_encoder)

    pretrained_encoder=pretrained_encoder.to(device)

    with torch.no_grad():
      dummy_input = torch.randn(1, 8, 125, 125).to(device)
      dummy_output = pretrained_encoder(dummy_input)
      feature_dim = dummy_output.shape[1]
      print(f"Encoder output feature dimension: {feature_dim}")

    enhanced_cls_model = ClassificationModel(
        pretrained_encoder,
        num_classes=2,
        feature_dim=128
    ).to(device)



    # cls_model = train_classification(cls_model, train_loader, val_loader, num_epochs=150, device=device)

    enhanced_cls_model = train_classification(
        enhanced_cls_model,
        train_loader,
        val_loader,
        device=device
    )
    torch.save(enhanced_cls_model.state_dict(), 'classification_model.pt')

    # reg_model = RegressionModel(
    #     encoder=pretrained_encoder,
    #     output_dim=1,
    #     feature_dim=feature_dim
    # ).to(device)

    # reg_model = train_regression(
    #     reg_model,
    #     train_loader,
    #     val_loader,
    #     num_epochs=50,
    #     device=device)
    # torch.save(reg_model.state_dict(), 'regression_model.pt')


    # reg_model = train_regression(reg_model, train_loader, val_loader, num_epochs=150, device=device)

    # print("Pretrained Models:")
    # cls_acc = evaluate_classification(cls_model, val_loader, device)
    # reg_loss = evaluate_regression(reg_model, val_loader, device)

if __name__ == "__main__":
    main()

Using cuda
Pretrained encoder loaded successfully
Encoder output feature dimension: 1000
=== Phase 1: Training Classification Head Only ===
Phase 1 - Epoch [1/20], Loss: 0.7113
Phase 1 - Epoch [2/20], Loss: 0.6980
Phase 1 - Epoch [3/20], Loss: 0.6968
Phase 1 - Epoch [4/20], Loss: 0.6927
Phase 1 - Epoch [5/20], Loss: 0.6911
Classification Accuracy: 52.10%
Phase 1 - Epoch [6/20], Loss: 0.6895
Phase 1 - Epoch [7/20], Loss: 0.6903
Phase 1 - Epoch [8/20], Loss: 0.6900
Phase 1 - Epoch [9/20], Loss: 0.6888
Phase 1 - Epoch [10/20], Loss: 0.6865
Classification Accuracy: 53.20%
Phase 1 - Epoch [11/20], Loss: 0.6862
Phase 1 - Epoch [12/20], Loss: 0.6853
Phase 1 - Epoch [13/20], Loss: 0.6848
Phase 1 - Epoch [14/20], Loss: 0.6845
Phase 1 - Epoch [15/20], Loss: 0.6836
Classification Accuracy: 55.80%
Phase 1 - Epoch [16/20], Loss: 0.6840
Phase 1 - Epoch [17/20], Loss: 0.6833
Phase 1 - Epoch [18/20], Loss: 0.6818
Phase 1 - Epoch [19/20], Loss: 0.6816
Phase 1 - Epoch [20/20], Loss: 0.6825
Classificatio

In [92]:
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using {device}")


    dataset = JetDataset('Dataset_Specific_labelled.h5')

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)


    pretrained_encoder = load_pretrained_encoder()
    pretrained_encoder = adapt_encoder_for_8_channels(pretrained_encoder)

    pretrained_encoder=pretrained_encoder.to(device)

    with torch.no_grad():
      dummy_input = torch.randn(1, 8, 125, 125).to(device)
      dummy_output = pretrained_encoder(dummy_input)
      feature_dim = dummy_output.shape[1]
      print(f"Encoder output feature dimension: {feature_dim}")

    reg_model = RegressionModel(
        encoder=pretrained_encoder,
        output_dim=1,
        feature_dim=feature_dim
    ).to(device)

    reg_model = train_regression(
        reg_model,
        train_loader,
        val_loader,
        num_epochs=50,
        device=device)
    torch.save(reg_model.state_dict(), 'regression_model.pt')


if __name__ == "__main__":
    main()



Using cuda
Pretrained encoder loaded successfully
Encoder output feature dimension: 1000
Phase 1 - Epoch [1/15], Loss: 0.2925
Phase 1 - Epoch [2/15], Loss: 0.2604
Phase 1 - Epoch [3/15], Loss: 0.2572
Phase 1 - Epoch [4/15], Loss: 0.2579
Phase 1 - Epoch [5/15], Loss: 0.2535
Regression MSE: 0.2538
Phase 1 - Epoch [6/15], Loss: 0.2513
Phase 1 - Epoch [7/15], Loss: 0.2494
Phase 1 - Epoch [8/15], Loss: 0.2522
Phase 1 - Epoch [9/15], Loss: 0.2497
Phase 1 - Epoch [10/15], Loss: 0.2490
Regression MSE: 0.2494
Phase 1 - Epoch [11/15], Loss: 0.2482
Phase 1 - Epoch [12/15], Loss: 0.2479
Phase 1 - Epoch [13/15], Loss: 0.2472
Phase 1 - Epoch [14/15], Loss: 0.2465
Phase 1 - Epoch [15/15], Loss: 0.2464
Regression MSE: 0.2465

Phase 2: Fine-tuning Entire Model
Phase 2 - Epoch [1/35], Loss: 0.2510
Phase 2 - Epoch [2/35], Loss: 0.1867
Phase 2 - Epoch [3/35], Loss: 0.1197
Phase 2 - Epoch [4/35], Loss: 0.1030
Phase 2 - Epoch [5/35], Loss: 0.0959
Regression MSE: 0.1192
Phase 2 - Epoch [6/35], Loss: 0.0932
P

In [37]:
def adapt_encoder_for_8_channels(encoder, device='cuda'):

    first_conv = encoder.conv1

    original_weights = first_conv.weight.data

    new_conv = nn.Conv2d(
        in_channels=8,
        out_channels=first_conv.out_channels,
        kernel_size=first_conv.kernel_size,
        stride=first_conv.stride,
        padding=first_conv.padding,
        bias=first_conv.bias is not None
    ).to(device)

    new_conv.weight.data[:, :3, :, :] = original_weights
    new_conv.weight.data[:, 3:, :, :].normal_(0, 0.01)


    if first_conv.bias is not None:
        new_conv.bias.data = first_conv.bias.data.clone()


    encoder.conv1 = new_conv

    return encoder


In [13]:
def evaluate_model(model, test_loader, device='cuda'):
    model.eval()
    cls_criterion = nn.CrossEntropyLoss()
    reg_criterion = nn.MSELoss()

    correct = 0
    total = 0
    cls_loss_sum = 0.0
    reg_loss_sum = 0.0

    with torch.no_grad():
        for images, cls_labels, reg_labels in test_loader:
            images = images.to(device)
            cls_labels = cls_labels.to(device)
            reg_labels = reg_labels.to(device)

            cls_outputs, reg_outputs = model(images)

            _, predicted = torch.max(cls_outputs.data, 1)
            total += cls_labels.size(0)
            correct += (predicted == cls_labels).sum().item()

            cls_loss = cls_criterion(cls_outputs, cls_labels)
            reg_loss = reg_criterion(reg_outputs.squeeze(), reg_labels)

            cls_loss_sum += cls_loss.item()
            reg_loss_sum += reg_loss.item()

    accuracy = 100 * correct / total
    avg_cls_loss = cls_loss_sum / len(test_loader)
    avg_reg_loss = reg_loss_sum / len(test_loader)

    print(f"Test Accuracy: {accuracy:.2f}%, Cls Loss: {avg_cls_loss:.4f}, Reg Loss: {avg_reg_loss:.4f}")
    return accuracy, avg_cls_loss, avg_reg_loss


In [70]:
class ScratchModel(nn.Module):
    def __init__(self, input_channels=8, num_classes=2):
        super(ScratchModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten()
        )

        self.classifier = nn.Linear(512, num_classes)


        self.regressor = nn.Linear(512, 1)

    def forward(self, x):
        features = self.features(x)
        cls_output = self.classifier(features)
        reg_output = self.regressor(features)
        return cls_output, reg_output


In [80]:
def train_scratch_model(model, train_loader, val_loader, task_type, num_epochs=50, device='cuda'):
    if task_type == 'classification':
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.MSELoss()

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)


    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for x, cls_label, reg_label in train_loader:
            x = x.to(device)

            if task_type == 'classification':
                target = cls_label.to(device)
                output = model(x)[0]
            else:
                target = reg_label.to(device)
                output = model(x)[1]

            optimizer.zero_grad()
            loss = criterion(output.squeeze(), target)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        scheduler.step()
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

        if (epoch + 1) % 5 == 0:
            if task_type == 'classification':
                evaluate_classification(model, val_loader, device)
            else:
                evaluate_regression(model, val_loader, device)

    return model


In [81]:
def evaluate_classification(model, test_loader, device='cuda'):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for x, cls_label, _ in test_loader:
            x = x.to(device)
            cls_label = cls_label.to(device)

            outputs, _ = model(x)
            _, predicted = torch.max(outputs.data, 1)
            total += cls_label.size(0)
            correct += (predicted == cls_label).sum().item()

    accuracy = 100 * correct / total
    print(f"Classification Accuracy: {accuracy:.2f}%")
    return accuracy

def evaluate_regression(model, test_loader, device='cuda'):

    model.eval()
    criterion = nn.MSELoss()
    total_loss = 0.0

    with torch.no_grad():
        for x, _, reg_label in test_loader:
            x = x.to(device)
            reg_label = reg_label.to(device)

            _, pred = model(x)
            loss = criterion(pred.squeeze(), reg_label)
            total_loss += loss.item()

    avg_loss = total_loss / len(test_loader)
    print(f"Regression MSE: {avg_loss:.4f}")
    return avg_loss


In [82]:
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using {device}")

    dataset = JetDataset('Dataset_Specific_labelled.h5')
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)


    scratch_cls_model = ScratchModel(input_channels=8, num_classes=2).to(device)
    scratch_cls_model = train_scratch_model(
        scratch_cls_model,
        train_loader,
        val_loader,
        task_type='classification',
        num_epochs=50,
        device=device
    )

    scratch_reg_model = ScratchModel(input_channels=8, num_classes=2).to(device)
    scratch_reg_model = train_scratch_model(
        scratch_reg_model,
        train_loader,
        val_loader,
        task_type='regression',
        num_epochs=50,
        device=device
    )

    torch.save(scratch_cls_model.state_dict(), 'scratch_classification_model.pt')
    torch.save(scratch_reg_model.state_dict(), 'scratch_regression_model.pt')
    print("Classification model from scratch:")
    cls_acc = evaluate_classification(scratch_cls_model, val_loader, device)

    print("Regression model from scratch:")
    reg_mse = evaluate_regression(scratch_reg_model, val_loader, device)

    print(f"Classification Accuracy: {cls_acc:.2f}%")
    print(f"Regression MSE: {reg_mse:.4f}")

if __name__ == '__main__':
  main()

Using cuda
Epoch [1/50], Loss: 0.4499
Epoch [2/50], Loss: 0.3050
Epoch [3/50], Loss: 0.2519
Epoch [4/50], Loss: 0.1960
Epoch [5/50], Loss: 0.1399
Classification Accuracy: 86.20%
Epoch [6/50], Loss: 0.0951
Epoch [7/50], Loss: 0.0674
Epoch [8/50], Loss: 0.0435
Epoch [9/50], Loss: 0.0404
Epoch [10/50], Loss: 0.0303
Classification Accuracy: 77.75%
Epoch [11/50], Loss: 0.0132
Epoch [12/50], Loss: 0.0099
Epoch [13/50], Loss: 0.0085
Epoch [14/50], Loss: 0.0173
Epoch [15/50], Loss: 0.0226
Classification Accuracy: 83.45%
Epoch [16/50], Loss: 0.0266
Epoch [17/50], Loss: 0.0143
Epoch [18/50], Loss: 0.0060
Epoch [19/50], Loss: 0.0046
Epoch [20/50], Loss: 0.0031
Classification Accuracy: 84.65%
Epoch [21/50], Loss: 0.0027
Epoch [22/50], Loss: 0.0019
Epoch [23/50], Loss: 0.0011
Epoch [24/50], Loss: 0.0016
Epoch [25/50], Loss: 0.0004
Classification Accuracy: 85.00%
Epoch [26/50], Loss: 0.0004
Epoch [27/50], Loss: 0.0003
Epoch [28/50], Loss: 0.0002
Epoch [29/50], Loss: 0.0001
Epoch [30/50], Loss: 0.000