In [1]:
import os
import glob
import json
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
from torchvision.transforms.functional import to_pil_image
from torchvision.datasets import ImageFolder
from PIL import Image

# from google.colab import drive

from collections import Counter

In [2]:
class SemiSupervisedDataset(Dataset):
    def __init__(self, image_dir, mask_dir, label_json, transform=None, mask_transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.mask_transform = mask_transform

        with open(label_json, 'r') as f:
            self.labels = json.load(f)

        self.image_files = [entry["image"] for entry in self.labels]
        self.label_dict = {entry["image"]: entry["class_id"] for entry in self.labels}

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_name = self.image_files[idx]
        image_path = os.path.join(self.image_dir, image_name)
        basename, _ = os.path.splitext(image_name)
        mask_path = os.path.join(self.mask_dir, basename + '.png')

        image = Image.open(image_path).convert("RGB")
        mask = Image.open(mask_path)

        label = self.label_dict[image_name]

        if self.transform:
            image = self.transform(image)
        if self.mask_transform:
            mask = self.mask_transform(mask)

        return image, torch.tensor(label), mask

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to ResNet-18 input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

mask_transform = transforms.Compose([
    transforms.Resize((224, 224), interpolation=Image.NEAREST),
    transforms.PILToTensor()
])

dataset = SemiSupervisedDataset(
    image_dir="train-semi",
    mask_dir="train-semi-segmentation",
    label_json="train_semi_annotations_with_seg_ids.json",
    transform=image_transform,
    mask_transform=mask_transform
)

dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=1)

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        return F.relu(out)

class ResNet18(nn.Module):
    def __init__(self, num_classes=50):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64, 2)
        self.layer2 = self._make_layer(128, 2, stride=2)
        self.layer3 = self._make_layer(256, 2, stride=2)
        self.layer4 = self._make_layer(512, 2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, out_channels, blocks, stride=1):
        downsample = None

        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

        layers = [BasicBlock(self.in_channels, out_channels, stride, downsample)]
        self.in_channels = out_channels

        for _ in range(1, blocks):
            layers.append(BasicBlock(out_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        return self.fc(x)


In [4]:
model = ResNet18(num_classes=50)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ResNet18(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  

In [5]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 25

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels, _ in dataloader:  # ignore masks
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss:.4f}, Accuracy: {100 * correct / total:.2f}%")


Epoch 1/25, Loss: 279.6385, Accuracy: 1.80%
Epoch 2/25, Loss: 249.8083, Accuracy: 3.40%
Epoch 3/25, Loss: 244.5553, Accuracy: 4.00%
Epoch 4/25, Loss: 236.8269, Accuracy: 5.40%
Epoch 5/25, Loss: 232.9206, Accuracy: 4.40%
Epoch 6/25, Loss: 235.1426, Accuracy: 5.20%
Epoch 7/25, Loss: 231.5453, Accuracy: 6.20%
Epoch 8/25, Loss: 229.1608, Accuracy: 7.00%
Epoch 9/25, Loss: 230.7529, Accuracy: 6.80%
Epoch 10/25, Loss: 223.4472, Accuracy: 9.80%
Epoch 11/25, Loss: 222.1544, Accuracy: 6.60%
Epoch 12/25, Loss: 218.1771, Accuracy: 12.00%
Epoch 13/25, Loss: 213.9266, Accuracy: 9.80%
Epoch 14/25, Loss: 210.6674, Accuracy: 11.20%
Epoch 15/25, Loss: 209.9971, Accuracy: 13.60%
Epoch 16/25, Loss: 204.7737, Accuracy: 13.80%
Epoch 17/25, Loss: 201.2036, Accuracy: 13.80%
Epoch 18/25, Loss: 198.3831, Accuracy: 17.40%
Epoch 19/25, Loss: 194.9945, Accuracy: 16.80%
Epoch 20/25, Loss: 189.3805, Accuracy: 19.60%
Epoch 21/25, Loss: 182.7401, Accuracy: 18.40%
Epoch 22/25, Loss: 173.8675, Accuracy: 23.20%
Epoch 23/

In [6]:
class UnlabeledDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_files = sorted(os.listdir(image_dir))
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_name = self.image_files[idx]
        image_path = os.path.join(self.image_dir, image_name)
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, image_name


In [14]:
unlabeled_dataset = UnlabeledDataset("train-unlabeled/", transform=image_transform)
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=32, shuffle=False)

from torch.nn.functional import softmax

pseudo_labels = []
pseudo_images = []
pseudo_names = []

confidence_threshold = 0.9  # You can tune this value (typical: 0.8 - 0.95)

model.eval()
with torch.no_grad():
    for images, names in unlabeled_loader:
        images = images.to(device)
        outputs = model(images)
        probs = softmax(outputs, dim=1)
        max_probs, predicted = probs.max(1)
        
        # Filter by confidence
        for i in range(len(images)):
            if max_probs[i].item() >= confidence_threshold:
                pseudo_labels.append(predicted[i].cpu().item())
                pseudo_images.append(images[i].cpu())  # OR save the file name and reload later if memory is tight
                pseudo_names.append(names[i])  # Track file names if helpful for later visualization



In [15]:
class PseudoLabeledDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        placeholder_mask = torch.zeros((3, 224, 224), dtype=torch.uint8)  # or same dtype as your real masks
        placeholder_mask = placeholder_mask.contiguous()  # ensure contiguity
        return image, label, placeholder_mask



pseudo_dataset = PseudoLabeledDataset(pseudo_images, pseudo_labels)
combined_dataset = torch.utils.data.ConcatDataset([dataset, pseudo_dataset])

combined_loader = DataLoader(combined_dataset, batch_size=8, shuffle=True, num_workers=4)

In [16]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels, _ in combined_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(combined_loader)
    epoch_accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

# Final results after training
print(f"Training complete! Final average loss: {epoch_loss:.4f}, Final accuracy: {epoch_accuracy:.2f}%")


Epoch 1/5, Loss: 1.6242, Accuracy: 55.95%
Epoch 2/5, Loss: 1.6605, Accuracy: 52.21%
Epoch 3/5, Loss: 1.4719, Accuracy: 55.95%
Epoch 4/5, Loss: 1.2455, Accuracy: 65.48%
Epoch 5/5, Loss: 1.1661, Accuracy: 69.05%
Training complete! Final average loss: 1.1661, Final accuracy: 69.05%


In [10]:
# import os
# import csv
# from PIL import Image
# import torch
# from torchvision import transforms

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.eval()  # Make sure your model is in eval mode

# test_dir = "test"
# test_files = sorted([f for f in os.listdir(test_dir) if f.endswith(".JPEG")])

# transform = transforms.Compose([
#     transforms.Resize((224, 224)),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                          std=[0.229, 0.224, 0.225]),
# ])

# rows = [("ID", "Label", "MASK_RLE")]

# for fname in test_files:
#     img_path = os.path.join(test_dir, fname)
#     image = Image.open(img_path).convert("RGB")
#     input_tensor = transform(image).unsqueeze(0).to(device)

#     with torch.no_grad():
#         class_logits = model(input_tensor)  # assuming model outputs class logits directly
#         pred_class = torch.argmax(class_logits, dim=1).item()

#     # Use dummy RLE for all-zero mask (224 * 224 = 50176 pixels)
#     dummy_rle = "50176 0"

#     rows.append((fname, pred_class, dummy_rle))

# # Save to CSV
# with open("submission.csv", "w", newline="") as f:
#     writer = csv.writer(f)
#     writer.writerows(rows)


In [17]:
from datetime import datetime
def save_model_with_timestamp(model, save_dir="models", model_name="my_model"):
    """
    Saves a PyTorch model to a specified directory with a timestamp in its filename.

    Args:
        model (torch.nn.Module): The PyTorch model to save.
        save_dir (str): The directory where the model should be saved.
                        Defaults to "models".
        model_name (str): The base name for the model file.
                          Defaults to "my_model".
    """
    # 1. Ensure the save directory exists
    os.makedirs(save_dir, exist_ok=True)

    # 2. Generate a timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Format: YYYYMMDD_HHMMSS

    # 3. Create the full filename with path
    filename = f"{model_name}.pth" # Using .pth or .pt extension
    filepath = os.path.join(save_dir, filename)

    # 4. Save the model
    torch.save(model, filepath)
    print(f"Model saved successfully to: {filepath}")

save_model_with_timestamp(model, save_dir="saved_models", model_name="resnet18v4")

Model saved successfully to: saved_models/resnet18v4.pth
