In [6]:
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import train_test_split

# Define constants
IMG_HEIGHT = 224
IMG_WIDTH = 224
NUM_CLASSES = 49
BATCH_SIZE = 32
EPOCHS = 100
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define paths
drive_base_path = '/content/drive/MyDrive/ANSYS/VRL_challenge_PAR1/VRL_challenge_PAR/'
train_path = os.path.join(drive_base_path, 'train.txt')
images_folder = os.path.join(drive_base_path, 'images')

# Load dataset
train_df = pd.read_csv(train_path, sep=' ', header=None)
image_names = train_df.iloc[:, 0].astype(str).values
labels = train_df.iloc[:, 1:].values.astype(int)

# Split dataset
image_names_train, image_names_val, labels_train, labels_val = train_test_split(image_names, labels, test_size=0.2, random_state=42)

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, image_names, labels, images_folder, transform=None):
        self.image_names = image_names
        self.labels = labels
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.images_folder, f"{img_name}.jpg")
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        return image, label

# Define transforms
transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create DataLoader
train_dataset = CustomDataset(image_names_train, labels_train, images_folder, transform=transform)
val_dataset = CustomDataset(image_names_val, labels_val, images_folder, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Define the model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Instantiate and compile the model
model = SimpleCNN(NUM_CLASSES).to(DEVICE)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE).float()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {epoch_loss:.4f}')

# Save the trained model
model_path = "simple_cnn_model.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")

# Validation
model.eval()
val_loss = 0.0
val_corrects = 0
with torch.no_grad():
    for inputs, targets in val_loader:
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE).float()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        val_loss += loss.item() * inputs.size(0)
        preds = (outputs > 0.5).float()
        val_corrects += (preds == targets).float().sum()

val_loss = val_loss / len(val_loader.dataset)
val_accuracy = val_corrects / (len(val_loader.dataset) * NUM_CLASSES)
print(f'Validation Loss: {val_loss:.4f}')
print(f'Validation Accuracy: {val_accuracy:.4f}')

# # Load the model for inference
# loaded_model = SimpleCNN(NUM_CLASSES).to(DEVICE)
# loaded_model.load_state_dict(torch.load(model_path))
# loaded_model.eval()

# Function to make predictions on test data
# def predict(model, image_path):
#     transform = transforms.Compose([
#         transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#     ])
#     image = Image.open(image_path).convert("RGB")
#     image = transform(image).unsqueeze(0).to(DEVICE)
#     with torch.no_grad():
#         output = model(image)
#     return output.cpu().numpy()

# # Example usage
# test_image_path = os.path.join(images_folder, "test_image.jpg")
# prediction = predict(loaded_model, test_image_path)
# print(f"Prediction: {prediction}")


Epoch 1/100, Loss: 0.4793
Epoch 2/100, Loss: 0.3818
Epoch 3/100, Loss: 0.3519
Epoch 4/100, Loss: 0.3300
Epoch 5/100, Loss: 0.3070
Epoch 6/100, Loss: 0.2766
Epoch 7/100, Loss: 0.2418
Epoch 8/100, Loss: 0.2076
Epoch 9/100, Loss: 0.1767
Epoch 10/100, Loss: 0.1501
Epoch 11/100, Loss: 0.1287
Epoch 12/100, Loss: 0.1149
Epoch 13/100, Loss: 0.0974
Epoch 14/100, Loss: 0.0908
Epoch 15/100, Loss: 0.0832
Epoch 16/100, Loss: 0.0791
Epoch 17/100, Loss: 0.0817
Epoch 18/100, Loss: 0.0696
Epoch 19/100, Loss: 0.0662
Epoch 20/100, Loss: 0.0621
Epoch 21/100, Loss: 0.0633
Epoch 22/100, Loss: 0.0557
Epoch 23/100, Loss: 0.0529
Epoch 24/100, Loss: 0.0550
Epoch 25/100, Loss: 0.0513
Epoch 26/100, Loss: 0.0488
Epoch 27/100, Loss: 0.0484
Epoch 28/100, Loss: 0.0510
Epoch 29/100, Loss: 0.0465
Epoch 30/100, Loss: 0.0447
Epoch 31/100, Loss: 0.0468
Epoch 32/100, Loss: 0.0429
Epoch 33/100, Loss: 0.0400
Epoch 34/100, Loss: 0.0445
Epoch 35/100, Loss: 0.0414
Epoch 36/100, Loss: 0.0404
Epoch 37/100, Loss: 0.0378
Epoch 38/1

In [7]:
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# Define constants
IMG_HEIGHT = 224
IMG_WIDTH = 224
NUM_CLASSES = 49
BATCH_SIZE = 32
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define paths
drive_base_path = '/content/drive/MyDrive/ANSYS/VRL_challenge_PAR1/VRL_challenge_PAR/'
test_images_folder = os.path.join(drive_base_path, 'test_images')  # Folder containing test images

# List all test image files
test_image_names = [f.split('.')[0] for f in os.listdir(test_images_folder) if f.endswith('.jpg')]

# Custom Dataset for Test Data
class TestDataset(Dataset):
    def __init__(self, image_names, images_folder, transform=None):
        self.image_names = image_names
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.images_folder, f"{img_name}.jpg")
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, img_name

# Define transforms
transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create DataLoader for test data
test_dataset = TestDataset(test_image_names, test_images_folder, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Define the model (same as before)
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Load the model
model_path = "simple_cnn_model.pth"
model = SimpleCNN(NUM_CLASSES).to(DEVICE)
model.load_state_dict(torch.load(model_path))
model.eval()

# Function to make predictions on test data
def predict(model, dataloader):
    model.eval()
    predictions = {}
    with torch.no_grad():
        for inputs, img_names in dataloader:
            inputs = inputs.to(DEVICE)
            outputs = model(inputs)
            outputs = outputs.cpu().numpy()
            for img_name, output in zip(img_names, outputs):
                predictions[img_name] = output
    return predictions

# Make predictions on the test dataset
predictions = predict(model, test_loader)

# Example: Print the predictions for the first few test images
for img_name, output in list(predictions.items())[:5]:
    print(f"Image: {img_name}, Prediction: {output}")

# Save predictions to a CSV file
predictions_df = pd.DataFrame.from_dict(predictions, orient='index')
predictions_df.to_csv('predictions.csv', header=False)


Image: 144, Prediction: [1.3969039e-09 5.6472782e-09 1.3411777e-02 3.5671253e-06 1.5681745e-06
 1.0906715e-02 3.0135669e-05 8.3145094e-01 3.9098426e-04 5.9748261e-04
 2.5925738e-06 7.5860029e-01 6.5264367e-06 2.1310354e-05 7.8810444e-05
 2.5033567e-04 1.3196809e-04 2.8477572e-03 2.2091040e-08 3.7270127e-04
 5.3147909e-05 6.8726023e-03 1.4687663e-02 4.5408669e-04 8.7678543e-04
 2.6945725e-06 7.6059740e-07 2.3910549e-02 4.2543764e-08 6.5023892e-02
 2.1795513e-05 3.7720434e-02 6.3160801e-06 4.4323850e-02 4.3876025e-01
 6.8218127e-04 1.4952344e-03 4.1276618e-07 8.8219261e-01 1.0212062e-02
 3.5972789e-05 9.1548462e-04 5.1822895e-01 1.5949301e-04 4.4522469e-05
 9.9966395e-01 7.2351009e-05 9.9943620e-01 1.8247033e-03]
Image: 24, Prediction: [1.2388267e-04 5.0449162e-06 6.8719138e-04 3.4082568e-01 8.3987676e-02
 1.5452424e-05 2.1961877e-02 8.2681058e-03 2.3885326e-04 3.4537405e-02
 3.1676758e-05 4.2362455e-03 7.4313857e-02 4.3675154e-03 1.5410262e-05
 2.1310790e-01 1.6526175e-03 1.1134360e-03 

In [8]:
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# Define constants
IMG_HEIGHT = 224
IMG_WIDTH = 224
NUM_CLASSES = 49
BATCH_SIZE = 32
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define paths
drive_base_path = '/content/drive/MyDrive/ANSYS/VRL_challenge_PAR1/VRL_challenge_PAR/'
test_images_folder = os.path.join(drive_base_path, 'test_images')  # Folder containing test images

# List all test image files
test_image_names = [f.split('.')[0] for f in os.listdir(test_images_folder) if f.endswith('.jpg')]

# Custom Dataset for Test Data
class TestDataset(Dataset):
    def __init__(self, image_names, images_folder, transform=None):
        self.image_names = image_names
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.images_folder, f"{img_name}.jpg")
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, img_name

# Define transforms
transform = transforms.Compose([
    transforms.Resize((IMG_HEIGHT, IMG_WIDTH)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create DataLoader for test data
test_dataset = TestDataset(test_image_names, test_images_folder, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Define the model (same as before)
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Load the model
model_path = "simple_cnn_model.pth"
model = SimpleCNN(NUM_CLASSES).to(DEVICE)
model.load_state_dict(torch.load(model_path))
model.eval()

# Function to make predictions on test data
def predict(model, dataloader, threshold=0.5):
    model.eval()
    predictions = {}
    with torch.no_grad():
        for inputs, img_names in dataloader:
            inputs = inputs.to(DEVICE)
            outputs = model(inputs)
            outputs = outputs.cpu().numpy()
            binary_outputs = (outputs >= threshold).astype(int)
            for img_name, output in zip(img_names, binary_outputs):
                predictions[img_name] = output
    return predictions

# Make predictions on the test dataset
predictions = predict(model, test_loader)

# Example: Print the predictions for the first few test images
for img_name, output in list(predictions.items())[:5]:
    print(f"Image: {img_name}, Prediction: {output}")

# Save predictions to a CSV file
predictions_df = pd.DataFrame.from_dict(predictions, orient='index')
predictions_df.to_csv('binary_predictions.csv', header=False)


Image: 144, Prediction: [0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 1 0 0 1 0 1 0]
Image: 24, Prediction: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 0 0 1 1 0 0]
Image: 124, Prediction: [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0]
Image: 66, Prediction: [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 1 1 0 0]
Image: 107, Prediction: [0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0 0
 0 0 1 0 0 0 1 0 0 0 1 0]


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
