Training Model

In [None]:
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, hamming_loss, accuracy_score

# Define constants
IMG_HEIGHT = 224
IMG_WIDTH = 224
NUM_CLASSES = 49
BATCH_SIZE = 32
EPOCHS = 50
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define paths
drive_base_path = '/content/drive/MyDrive/ANSYS/VRL_challenge_PAR1/VRL_challenge_PAR/'
train_path = os.path.join(drive_base_path, 'path_to_your_output_file.txt')
images_folder = os.path.join(drive_base_path, 'augmented_images')

# Load dataset
train_df = pd.read_csv(train_path, sep=' ', header=None)
image_names = train_df.iloc[:, 0].astype(str).values
labels = train_df.iloc[:, 1:].values.astype(int)

# Split dataset
image_names_train, image_names_val, labels_train, labels_val = train_test_split(image_names, labels, test_size=0.2, random_state=42)

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, image_names, labels, images_folder, transform=None):
        self.image_names = image_names
        self.labels = labels
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.images_folder, f"{img_name}.jpg")
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        return image, label

# Define transforms
transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create DataLoader
train_dataset = CustomDataset(image_names_train, labels_train, images_folder, transform=transform)
val_dataset = CustomDataset(image_names_val, labels_val, images_folder, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Define the ResNet model
class ResNetModel(nn.Module):
    def __init__(self, num_classes):
        super(ResNetModel, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Sequential(
            nn.Linear(self.resnet.fc.in_features, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.resnet(x)

# Instantiate and compile the model
model = ResNetModel(NUM_CLASSES).to(DEVICE)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE).float()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {epoch_loss:.4f}')

# Save the trained model
model_path = "resnet_model.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")

# Validation with metrics calculation
model.eval()
val_loss = 0.0
val_corrects = 0
total = 0
total_mae = 0.0
label_mae = np.zeros(NUM_CLASSES)
num_samples = 0
all_preds = []
all_targets = []

with torch.no_grad():
    for inputs, targets in val_loader:
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE).float()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        val_loss += loss.item() * inputs.size(0)

        preds = (outputs > 0.5).float()
        val_corrects += (preds == targets).float().sum()
        total += targets.size(0) * NUM_CLASSES

        mae = torch.abs(preds - targets).sum(dim=0)  # MAE for each label
        label_mae += mae.cpu().numpy()
        total_mae += mae.sum().item()  # Overall MAE
        num_samples += targets.size(0)

        all_preds.append(preds.cpu().numpy())
        all_targets.append(targets.cpu().numpy())

all_preds = np.vstack(all_preds)
all_targets = np.vstack(all_targets)

val_loss = val_loss / len(val_loader.dataset)
val_accuracy = val_corrects / total
overall_mae = total_mae / total
label_mae = label_mae / num_samples  # Average MAE per label

# Calculate additional metrics
precision = precision_score(all_targets, all_preds, average='micro')
recall = recall_score(all_targets, all_preds, average='micro')
f1 = f1_score(all_targets, all_preds, average='micro')
roc_auc = roc_auc_score(all_targets, all_preds, average='micro')
hamming = hamming_loss(all_targets, all_preds)
subset_acc = accuracy_score(all_targets, all_preds)

print(f'Validation Loss: {val_loss:.4f}')
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Overall MAE: {overall_mae:.4f}')
print(f'Label-wise MAE: {label_mae}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'ROC-AUC: {roc_auc:.4f}')
print(f'Hamming Loss: {hamming:.4f}')
print(f'Subset Accuracy: {subset_acc:.4f}')


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 102MB/s]


Epoch 1/50, Loss: 0.3398
Epoch 2/50, Loss: 0.2793
Epoch 3/50, Loss: 0.2422
Epoch 4/50, Loss: 0.2144
Epoch 5/50, Loss: 0.1878
Epoch 6/50, Loss: 0.1718
Epoch 7/50, Loss: 0.1554
Epoch 8/50, Loss: 0.1400
Epoch 9/50, Loss: 0.1296
Epoch 10/50, Loss: 0.1180
Epoch 11/50, Loss: 0.1082
Epoch 12/50, Loss: 0.0994
Epoch 13/50, Loss: 0.0911
Epoch 14/50, Loss: 0.0851
Epoch 15/50, Loss: 0.0790
Epoch 16/50, Loss: 0.0749
Epoch 17/50, Loss: 0.0689
Epoch 18/50, Loss: 0.0660
Epoch 19/50, Loss: 0.0607
Epoch 20/50, Loss: 0.0573
Epoch 21/50, Loss: 0.0557
Epoch 22/50, Loss: 0.0516
Epoch 23/50, Loss: 0.0486
Epoch 24/50, Loss: 0.0479
Epoch 25/50, Loss: 0.0443
Epoch 26/50, Loss: 0.0423
Epoch 27/50, Loss: 0.0435
Epoch 28/50, Loss: 0.0406
Epoch 29/50, Loss: 0.0364
Epoch 30/50, Loss: 0.0373
Epoch 31/50, Loss: 0.0362
Epoch 32/50, Loss: 0.0338
Epoch 33/50, Loss: 0.0366
Epoch 34/50, Loss: 0.0333
Epoch 35/50, Loss: 0.0320
Epoch 36/50, Loss: 0.0285
Epoch 37/50, Loss: 0.0284
Epoch 38/50, Loss: 0.0277
Epoch 39/50, Loss: 0.

PREDICTING THE LABELS

In [None]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import os
import glob

# Define constants
NUM_CLASSES = 49
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TEST_BATCH_SIZE = 1  # Batch size for test predictions

# Define paths
drive_base_path = '/content/drive/MyDrive/Colab Notebooks/VRL_challenge_PAR/VRL_challenge_PAR/'
test_folder = os.path.join(drive_base_path, 'test_images')

# Custom Dataset class for the test set
class TestDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, os.path.basename(img_path)

# Get all image paths in the test folder
test_image_paths = glob.glob(os.path.join(test_folder, '*.jpg'))

# Define transforms for the test images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create DataLoader for test dataset
test_dataset = TestDataset(test_image_paths, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE, shuffle=False, num_workers=2)

# Define the ResNet model
class ResNetModel(nn.Module):
    def __init__(self, num_classes):
        super(ResNetModel, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Sequential(
            nn.Linear(self.resnet.fc.in_features, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.resnet(x)

# Instantiate the model and load the saved weights
model = ResNetModel(NUM_CLASSES).to(DEVICE)
model_path = "/content/resnet_model.pth"
model.load_state_dict(torch.load(model_path, map_location=DEVICE))
model.eval()

# Function to predict labels for the test dataset
def predict_test_dataset(test_loader, model, device):
    model.eval()
    predictions = []
    image_names = []

    with torch.no_grad():
        for images, img_names in test_loader:
            images = images.to(device)
            outputs = model(images)
            preds = (outputs > 0.5).float().cpu().numpy().squeeze()  # Squeeze to remove batch dimension
            predictions.append(preds)
            image_names.extend(img_names)

    return predictions, image_names

# Make predictions
test_predictions, test_image_names = predict_test_dataset(test_loader, model, DEVICE)

# Save predictions to a text file
output_file = "test_predictions.txt"
with open(output_file, 'w') as f:
    for img_name, preds in zip(test_image_names, test_predictions):
        pred_str = ",".join([str(int(pred)) for pred in preds])
        f.write(f"{img_name}, {pred_str}\n")

print(f"Predictions saved to {output_file}")


Predictions saved to test_predictions.txt
