In [1]:
import zipfile
import os

# Replace 'your_file.zip' with the name of your uploaded file
zip_file_name = '/content/drive/MyDrive/ANSYS/VRL_challenge_PAR.zip'

# Specify the directory to extract to
extract_to = '/content/'

# Open and extract the zip file
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

# List the contents to verify
print("Files extracted to:", extract_to)
os.listdir(extract_to)


Files extracted to: /content/


['.config', 'drive', 'VRL_challenge_PAR', 'sample_data']

In [2]:
!pip install torch torchvision numpy


Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:
import os
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class PAR_Dataset(Dataset):
    def __init__(self, image_dir, annotation_file, label_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.annotations = self.load_annotations(annotation_file)
        self.labels = self.load_labels(label_file)

    def load_annotations(self, annotation_file):
        with open(annotation_file, 'r') as f:
            lines = f.readlines()
        annotations = []
        for line in lines:
            parts = line.strip().split()
            image_name = parts[0]
            attributes = list(map(int, parts[1:]))
            annotations.append((image_name, attributes))
        return annotations

    def load_labels(self, label_file):
        with open(label_file, 'r') as f:
            labels = [line.strip() for line in f.readlines()]
        return labels

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        image_name, attributes = self.annotations[idx]
        img_path = os.path.join(self.image_dir, image_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        attributes = torch.tensor(attributes, dtype=torch.float32)
        return image, attributes

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet50 standard input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ResNet50 normalization
])
import torch.nn as nn
import torchvision.models as models

class PAR_Model(nn.Module):
    def __init__(self, num_attributes):
        super(PAR_Model, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_attributes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.resnet(x)
        x = self.sigmoid(x)
        return x
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch.optim import Adam

class PAR_Dataset(Dataset):
    def __init__(self, image_dir, annotation_file, label_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.annotations = self.load_annotations(annotation_file)
        self.labels = self.load_labels(label_file)

    def load_annotations(self, annotation_file):
        with open(annotation_file, 'r') as f:
            lines = f.readlines()
        annotations = []
        for line in lines:
            parts = line.strip().split()
            image_name = parts[0] + '.jpg'  # Ensure the .jpg extension
            attributes = list(map(int, parts[1:]))
            annotations.append((image_name, attributes))
        return annotations

    def load_labels(self, label_file):
        with open(label_file, 'r') as f:
            labels = [line.strip() for line in f.readlines()]
        return labels

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        image_name, attributes = self.annotations[idx]
        img_path = os.path.join(self.image_dir, image_name)

        if not os.path.isfile(img_path):
            raise FileNotFoundError(f"File {img_path} not found.")

        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        attributes = torch.tensor(attributes, dtype=torch.float32)
        return image, attributes

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class PAR_Model(nn.Module):
    def __init__(self, num_attributes):
        super(PAR_Model, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_attributes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.resnet(x)
        x = self.sigmoid(x)
        return x

# Parameters
num_epochs = 10
learning_rate = 0.001
batch_size = 16

# Load dataset
train_dataset = PAR_Dataset('/content/VRL_challenge_PAR/VRL_challenge_PAR/images', '/content/VRL_challenge_PAR/VRL_challenge_PAR/train.txt', '/content/VRL_challenge_PAR/VRL_challenge_PAR/label.txt', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Model, loss function, and optimizer
num_attributes = len(train_dataset.labels)
model = PAR_Model(num_attributes)
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=learning_rate)

# Training loop
model.train()
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for images, labels in train_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader):.4f}')

# Save the model
torch.save(model.state_dict(), 'par_model_resnet50.pth')


In [None]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch.optim import Adam

class PAR_Dataset(Dataset):
    def __init__(self, image_dir, annotation_file, label_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.annotations = self.load_annotations(annotation_file)
        self.labels = self.load_labels(label_file)

    def load_annotations(self, annotation_file):
        with open(annotation_file, 'r') as f:
            lines = f.readlines()
        annotations = []
        for line in lines:
            parts = line.strip().split()
            image_name = parts[0] + '.jpg'  # Ensure the .jpg extension
            attributes = list(map(int, parts[1:]))
            annotations.append((image_name, attributes))
        return annotations

    def load_labels(self, label_file):
        with open(label_file, 'r') as f:
            labels = [line.strip() for line in f.readlines()]
        return labels

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        image_name, attributes = self.annotations[idx]
        img_path = os.path.join(self.image_dir, image_name)

        if not os.path.isfile(img_path):
            raise FileNotFoundError(f"File {img_path} not found.")

        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        attributes = torch.tensor(attributes, dtype=torch.float32)
        return image, attributes

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet50 standard input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ResNet50 normalization
])

class PAR_Model(nn.Module):
    def __init__(self, num_attributes):
        super(PAR_Model, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_attributes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.resnet(x)
        x = self.sigmoid(x)
        return x

# Parameters
num_epochs = 10
learning_rate = 0.001
batch_size = 16

# Load dataset
train_dataset = PAR_Dataset('/content/VRL_challenge_PAR/VRL_challenge_PAR/images', '/content/VRL_challenge_PAR/VRL_challenge_PAR/train.txt', '/content/VRL_challenge_PAR/VRL_challenge_PAR/label.txt', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Model, loss function, and optimizer
num_attributes = len(train_dataset.labels)
model = PAR_Model(num_attributes)
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=learning_rate)

# Training loop
model.train()
for epoch in range(num_epochs):
    epoch_loss = 0.0
    for images, labels in train_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader):.4f}')

# Save the model
torch.save(model.state_dict(), 'par_model_resnet50.pth')


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 131MB/s]


Epoch [1/10], Loss: 0.3767
Epoch [2/10], Loss: 0.2775
Epoch [3/10], Loss: 0.2293
Epoch [4/10], Loss: 0.1895
Epoch [5/10], Loss: 0.1601
Epoch [6/10], Loss: 0.1290
Epoch [7/10], Loss: 0.0984
Epoch [8/10], Loss: 0.0715
Epoch [9/10], Loss: 0.0517
Epoch [10/10], Loss: 0.0406


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch.optim import Adam
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import numpy as np
from tqdm import tqdm

# Dataset class for loading images and annotations
class PAR_Dataset(Dataset):
    def __init__(self, image_dir, annotation_file, label_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.annotations = self.load_annotations(annotation_file)
        self.labels = self.load_labels(label_file)

    def load_annotations(self, annotation_file):
        with open(annotation_file, 'r') as f:
            lines = f.readlines()
        annotations = []
        for line in lines:
            parts = line.strip().split()
            image_name = parts[0] + '.jpg'  # Ensure the .jpg extension
            attributes = list(map(int, parts[1:]))
            annotations.append((image_name, attributes))
        return annotations

    def load_labels(self, label_file):
        with open(label_file, 'r') as f:
            labels = [line.strip() for line in f.readlines()]
        return labels

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        image_name, attributes = self.annotations[idx]
        img_path = os.path.join(self.image_dir, image_name)

        if not os.path.isfile(img_path):
            raise FileNotFoundError(f"File {img_path} not found.")

        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        attributes = torch.tensor(attributes, dtype=torch.float32)
        return image, attributes

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet50 standard input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ResNet50 normalization
])

# Model class for Person Attribute Recognition
class PAR_Model(nn.Module):
    def __init__(self, num_attributes):
        super(PAR_Model, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_attributes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.resnet(x)
        x = self.sigmoid(x)
        return x

# Load labels
def load_labels(label_file):
    with open(label_file, 'r') as f:
        labels = [line.strip() for line in f.readlines()]
    return labels

# Parameters
label_file = '/content/VRL_challenge_PAR/VRL_challenge_PAR/label.txt'
num_attributes = len(load_labels(label_file))
model_path = '/content/drive/MyDrive/ANSYS/par_model_resnet50.pth'

# Load the model
model = PAR_Model(num_attributes)
model.load_state_dict(torch.load(model_path))
model.eval()

# Load dataset
train_dataset = PAR_Dataset(
    '/content/VRL_challenge_PAR/VRL_challenge_PAR/images',
    '/content/VRL_challenge_PAR/VRL_challenge_PAR/train.txt',
    '/content/VRL_challenge_PAR/VRL_challenge_PAR/label.txt',
    transform=transform
)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False)

# Initialize lists to store ground truth and predictions
all_labels = []
all_predictions = []

# Iterate over the training dataset
with torch.no_grad():
    for images, labels in tqdm(train_loader):
        outputs = model(images)
        all_labels.append(labels.numpy())
        all_predictions.append(outputs.numpy())

# Convert lists to numpy arrays
all_labels = np.concatenate(all_labels)
all_predictions = np.concatenate(all_predictions)

# Compute metrics
threshold = 0.5
all_predictions_binary = (all_predictions >= threshold).astype(np.float32)

accuracy = accuracy_score(all_labels, all_predictions_binary)
f1 = f1_score(all_labels, all_predictions_binary, average='macro')
roc_auc = roc_auc_score(all_labels, all_predictions, average='macro')

print(f'Accuracy: {accuracy:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')


100%|██████████| 38/38 [02:56<00:00,  4.64s/it]

Accuracy: 0.7967
F1 Score: 0.9784
ROC AUC: 0.9997





In [3]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image

class PAR_Model(nn.Module):
    def __init__(self, num_attributes):
        super(PAR_Model, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_attributes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.resnet(x)
        x = self.sigmoid(x)
        return x

# Load the model
num_attributes = 49  # Adjust this based on your labels.txt
model = PAR_Model(num_attributes)
model.load_state_dict(torch.load('/content/drive/MyDrive/ANSYS/par_model_resnet50.pth'))
model.eval()  # Set the model to evaluation mode


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 112MB/s]


PAR_Model(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
         

In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet50 standard input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ResNet50 normalization
])


In [5]:
import os

def load_and_preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    return image


In [6]:
import os
import torch

def predict_on_images(model, image_folder):
    # Make sure the model is in evaluation mode
    model.eval()

    image_paths = [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(('.jpg', '.jpeg', '.png'))]
    image_paths.sort(key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))  # Sort by image name assuming names are like 1.jpg, 2.jpg, etc.

    for image_path in image_paths:
        image = load_and_preprocess_image(image_path)
        image = image.unsqueeze(0)  # Add batch dimension

        with torch.no_grad():
            outputs = model(image)

        # Apply threshold to get binary outputs (if necessary)
        outputs = outputs > 0.5

        # Convert tensor to list
        outputs = outputs.squeeze().tolist()

        print(f"Predictions for {os.path.basename(image_path)}: {outputs}")

# Path to the folder containing the images
image_folder = '/content/drive/MyDrive/ANSYS/SCSPAR24_Testdata'

# Run predictions
predict_on_images(model, image_folder)


Predictions for 1.jpg: [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False]
Predictions for 2.jpg: [True, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, False, False, True, False, False, False, True, False, False, True, False, False, False, False, True, False, False, False, True, True, False, False]
Predictions for 3.jpg: [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False,

In [8]:
import os
import torch
import csv
from PIL import Image
from torchvision import transforms
import torch.nn as nn
import torchvision.models as models

# Define the PAR_Model class
class PAR_Model(nn.Module):
    def __init__(self, num_attributes):
        super(PAR_Model, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_attributes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.resnet(x)
        x = self.sigmoid(x)
        return x

# Load the model
num_attributes = 49  # Adjust this based on your labels.txt
model = PAR_Model(num_attributes)
model.load_state_dict(torch.load('/content/drive/MyDrive/ANSYS/par_model_resnet50.pth', map_location=torch.device('cpu')))
model.eval()  # Set the model to evaluation mode

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet50 standard input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ResNet50 normalization
])

# Function to load and preprocess an image
def load_and_preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    return image

# Function to predict on images and save results to CSV
def predict_on_images(model, image_folder, output_csv):
    # Ensure the model is in evaluation mode
    model.eval()

    # List image paths
    image_paths = [os.path.join(image_folder, img) for img in os.listdir(image_folder) if img.endswith(('.jpg', '.jpeg', '.png'))]
    image_paths.sort(key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))  # Sort by image name assuming names are like 1.jpg, 2.jpg, etc.

    # Open a CSV file to write the results
    with open(output_csv, mode='w', newline='') as file:
        writer = csv.writer(file)

        # Write the header
        header = ['image_name'] + [f'attribute_{i}' for i in range(1, num_attributes + 1)]
        writer.writerow(header)

        for image_path in image_paths:
            image = load_and_preprocess_image(image_path)
            image = image.unsqueeze(0)  # Add batch dimension

            with torch.no_grad():
                outputs = model(image)

            # Apply threshold to get binary outputs (if necessary)
            outputs = (outputs > 0.5).int()

            # Convert tensor to list
            outputs = outputs.squeeze().tolist()

            # Get the image name
            image_name = os.path.basename(image_path)

            # Write the results to the CSV file
            writer.writerow([image_name] + outputs)

    print(f"Predictions saved to {output_csv}")

# Path to the folder containing the images
image_folder = '/content/drive/MyDrive/ANSYS/SCSPAR24_Testdata'  # Replace with your actual image folder path
# Path to the output CSV file
output_csv = '/content/predictions.csv'

# Run predictions and save to CSV
predict_on_images(model, image_folder, output_csv)


Predictions saved to /content/predictions.csv
