# Arnas Variakojis, LSP: 2213811, Variantas: resnet50 [Goose, Jellyfish, Snail] 

## Ensure all dependencies exist

In [1]:
!pip install torch torchvision openimages torchmetrics



## Download 1000 images from openImages 

In [4]:
import os
from openimages.download import download_dataset
import shutil

# initialize global variables
data_dir = "./images"
number_for_samples = 334
classes = ["Goose", "Jellyfish", "Snail"] 

# check if the directories and images already exist, if yes delete them
if os.path.exists(data_dir):
    shutil.rmtree(data_dir)
    os.makedirs(data_dir)
else:
    os.makedirs(data_dir)

# download dataset from openImages
download_dataset(data_dir, classes, limit=number_for_samples)
print("Goose images:\n\n")
print(os.listdir("./images/goose/images"))
print("\nJellyfish images:\n\n")
print(os.listdir("./images/jellyfish/images"))
print("\nSnail images:\n\n")
print(os.listdir("./images/snail/images"))


2025-02-18  21:45:31 INFO Downloading 334 train images for class 'goose'
100%|████████████████████████████████████████████████████████████████████████████████| 334/334 [00:18<00:00, 17.85it/s]
2025-02-18  21:45:50 INFO Downloading 334 train images for class 'jellyfish'
100%|████████████████████████████████████████████████████████████████████████████████| 334/334 [00:15<00:00, 21.93it/s]
2025-02-18  21:46:05 INFO Downloading 334 train images for class 'snail'
100%|████████████████████████████████████████████████████████████████████████████████| 334/334 [00:17<00:00, 19.00it/s]


Goose images:


['003c701082c87db8.jpg', '003ce6309d13dde0.jpg', '00626fcb7b2a8645.jpg', '0064360fc5b84cd6.jpg', '008af93d188c0829.jpg', '008cfd3de74748a7.jpg', '00943c2ff0f74b08.jpg', '00ae8d969149f745.jpg', '011a3e3913f893af.jpg', '0149297f91b4d21d.jpg', '02651ed3aa467ae7.jpg', '02889b232f588386.jpg', '02f89bcd6036d1a1.jpg', '0330e1f32fbe09aa.jpg', '039ac59de8424fbc.jpg', '03c75f58f0f08e81.jpg', '03d0efd66087d746.jpg', '03ee33991b036733.jpg', '03f90cc8906fbb1e.jpg', '04135b72c6b66640.jpg', '04173148b7e6b30b.jpg', '045ae91a87968b94.jpg', '046eb0828521bc79.jpg', '04dc12d841f38ece.jpg', '050ddb50a117d98b.jpg', '056bf7447657441c.jpg', '056fbf1e0ccc849f.jpg', '05f27e8026b9d461.jpg', '060deaaadf577697.jpg', '060fd84b670fd5a0.jpg', '062e9d57fd88c532.jpg', '06960aeca9491287.jpg', '06e6691fbc29c705.jpg', '06ec4e3da36be589.jpg', '06f93e5471138de7.jpg', '0794002e95c8baa8.jpg', '0810832fc9919151.jpg', '08973600da11230f.jpg', '08cc17d82c4ff34a.jpg', '0925d869a29d6931.jpg', '09357042ba52af35.jpg',

## Evaluate the model by performing inference and metrics calculations

In [16]:
# Arnas Variakojis
# LSP: 2213811
# Variantas: resnet50 [Goose, Jellyfish, Snail] 


import torch
import torchvision.models as models
from PIL import Image
import os
from torchmetrics import Accuracy, Precision, Recall, F1Score
from torch.utils.data import Dataset, DataLoader
from functools import lru_cache

# initialize global variables
dataset_path = "./images"
batch_size = 32
num_workers = 2
thresholds = [0.5, 0.5, 0.5] # order in which images folder is set, i.e. : [Goose, Jellyfish, Snail]
prefetch_factor = 2
task = "multilabel"
num_labels = len(thresholds)

# initialize metrics
accuracy_metric = Accuracy(task=task, num_labels=num_labels, average="macro")
precision_metric = Precision(task=task, num_labels=num_labels, average="macro")
recall_metric = Recall(task=task, num_labels=num_labels, average="macro")
f1_metric = F1Score(task=task, num_labels=num_labels, average="macro")

# load the model and set it to be using CUDA cors of gpu if available
def load_resnet():

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    weights = models.ResNet50_Weights.DEFAULT
    model = models.resnet50(weights=weights)
    model.eval()
    model = model.to(device)
    return model, device, weights

# custom dataset class for dataloader
class ImagesDataset(Dataset):
    
    # called once upon creation, to initialize and create objects fields
    def __init__(self, images, labels, num_classes, transform):
        self.images = images
        self.labels = labels
        self.num_classes = num_classes
        self.transform = transform

    # used by the dataloader to know how long the dataset is
    def __len__(self):
        return len(self.images)

    
    def load_and_transform(self, image_path):
        image = Image.open(image_path).convert("RGB")
        return self.transform(image)
    
    # used by dataloader workers to retrieve transformed images from the dataset into batch
    def __getitem__(self, index):
        image_path = self.images[index]
        label = self.labels[index]
        image_transformed = self.load_and_transform(image_path)

        # from scalar label value create a vector which is tensor([num_classes]) made up of zeros
        # at the index of the label add 1
        label_one_hot = torch.zeros(self.num_classes)
        label_one_hot[label] = 1

        return image_transformed, label_one_hot


# loads the openImage images from a folder and creates labels for the different classes
def load_images(image_folder):

    image_paths = []
    labels = []
    
    class_names = os.listdir(image_folder)

    # creation of each picture's unique path variable and label
    for id, class_name in enumerate(class_names):
        class_path = os.path.join(image_folder, class_name, "images") 
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            image_paths.append(img_path)
            labels.append(id)

    return image_paths, labels, class_names


# feeding the images to the model, getting logits, transforming to probabilities and applying the thresholds
def predict_image(data_loader, model, device, indices):

    # inference_mode decorator to make calculations more efficient, by removing gradient calculations and flags from tensors 
    with torch.inference_mode():

        # taking batches instead of single images from data_loader
        for images_batch, labels in data_loader:

            # add images and labels to the GPU device
            images_batch = images_batch.to(device)
            labels = labels.to(device)

            # retrieve a batch of logits
            logits = model(images_batch)

            # apply sigmoid to convert logits to probabilities
            probabilities = torch.sigmoid(logits)

            # find the probabilities of the relevant indices of the 3 chosen classes
            selected_probs = probabilities[:, indices]

            # apply thresholds to the batch
            thresholds = evaluate_threshold(selected_probs)

            # Add batches to the metrics, inside of which TP, TN, FP, FN will be retrieved from predictions and labels
            accuracy_metric.update(thresholds.cpu(), labels.cpu())
            precision_metric.update(thresholds.cpu(), labels.cpu())
            recall_metric.update(thresholds.cpu(), labels.cpu())
            f1_metric.update(thresholds.cpu(), labels.cpu())


# apply the threshold to a batch of probabilties
def evaluate_threshold(output_probs):

    # transform threshold array to a tensor with the correct shape, for comparison operations
    thresholds_tensor = torch.tensor(thresholds, device=output_probs.device).view(1, -1)
    return (output_probs > thresholds_tensor).int()


# caclulate metrics
def compute_metrics():
    accuracy = accuracy_metric.compute()
    precision = precision_metric.compute()
    recall = recall_metric.compute()
    f1 = f1_metric.compute()
    return accuracy.item(), precision.item(), recall.item(), f1.item()


def main():

    model, device, weights = load_resnet()

    images, labels, class_names = load_images(dataset_path)

    # retrieve all of the classes from resnet50 dataset
    # find the relevant (the ones chosen from openImages) classes indices
    resnet_classes = weights.meta["categories"][:]
    indices = [resnet_classes.index(name) for name in class_names if name in resnet_classes]

    # intializes custom dataset, and pass transform function of resnet50
    dataset = ImagesDataset(images, labels, len(set(labels)), weights.transforms())

    # intialize dataloader with batch size, workers and prefetch factor
    # removed workers and prefetching, because jupyter environment doesn't handle multiprocessing well, and the data loader crashes
    # python script ran in vs code environment does work with workers and prefetching
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    predict_image(data_loader, model, device, indices)

    accuracy, precision, recall, f1 = compute_metrics()

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")


if __name__ == "__main__":
    main()

Using device: cuda
Accuracy: 0.7369
Precision: 0.5727
Recall: 0.9960
F1 Score: 0.7230
