## Imports

In [1]:
import os
import h5py
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms
from tqdm import tqdm
from PIL import Image
import io
import numpy as np

# Set random seed for reproducibility

In [None]:
SEED = 42
torch.manual_seed(SEED)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cuda


## Model Definition

In [None]:

class ConformerTinyBinary(nn.Module):
    def __init__(self, img_size=128, num_classes=2, embed_dim=32, num_heads=4, num_transformer_layers=1):
        super(ConformerTinyBinary, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, embed_dim, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(embed_dim),
            nn.ReLU(),
            nn.Conv2d(embed_dim, embed_dim, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(embed_dim),
            nn.ReLU(),
            nn.Conv2d(embed_dim, embed_dim, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(embed_dim),
            nn.ReLU()
        )
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads),
            num_layers=num_transformer_layers
        )
        self.fc = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        x = self.conv(x)
        b, c, h, w = x.shape
        x = x.view(b, c, -1).permute(2, 0, 1)  
        x = self.transformer(x)
        x = x.mean(dim=0)  
        x = self.fc(x)
        return x


## Load Model

In [None]:

model_path = "./scratch/saved_models_2/epoch17.pth"
print(f"Loading model from: {model_path}")
model = ConformerTinyBinary(
    img_size=128,         
    num_classes=2,        
    embed_dim=128,        
    num_heads=4,          
    num_transformer_layers=4  
).to(device)
checkpoint = torch.load(model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
print("Model loaded successfully.")


Loading model from: ./scratch/saved_models_2/epoch17.pth
Model loaded successfully.


  checkpoint = torch.load(model_path, map_location=device)


## Load Test Data

In [None]:

test_metadata_path = "../data/test-metadata.csv"
test_metadata = pd.read_csv(test_metadata_path)
print(f"Test metadata loaded. {len(test_metadata)} samples found.")


test_image_path = "../data/test-image.hdf5"
print(f"Loading test images from: {test_image_path}")


def decode_image(byte_string):
    
    byte_stream = io.BytesIO(byte_string)
    
    image = Image.open(byte_stream).convert("RGB")  
    return np.array(image)  


with h5py.File(test_image_path, "r") as hdf5_file:
    test_images = {}
    for key in hdf5_file.keys():
        byte_string = hdf5_file[key][()]  
        if len(byte_string) > 0:
            try:
                test_images[key] = decode_image(byte_string)
            except Exception as e:
                print(f"Error decoding image {key}: {e}")
        else:
            print(f"Skipping empty data for key: {key}")

print(f"{len(test_images)} test images loaded.")

Test metadata loaded. 3 samples found.
Loading test images from: ../data/test-image.hdf5
3 test images loaded.


## Image Processing

In [None]:

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])


## Inference

In [None]:

results = []
print("Starting inference...")
for isic_id, image in tqdm(test_images.items()):
    
    
    input_image = transform(image).unsqueeze(0).to(device)  

    
    with torch.no_grad():
        outputs = model(input_image)
        probabilities = torch.softmax(outputs, dim=1)[:, 1].item()  
    
    
    results.append({"isic_id": isic_id, "target": probabilities})


submission = pd.DataFrame(results)


submission = submission[["isic_id", "target"]]


submission_file = "submission.csv"
submission.to_csv(submission_file, index=False)
print(f"Submission file saved: {submission_file}")


print(submission.head())

Starting inference...


  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)
100%|██████████| 3/3 [00:01<00:00,  2.86it/s]

Submission file saved: submission.csv
        isic_id    target
0  ISIC_0015657  0.009598
1  ISIC_0015729  0.134887
2  ISIC_0015740  0.018930



