Žygimantas Augustas Nemura 2110605 VGG19 [Strawberry, Bee, Goldfish]

Gauname nuotraukų

In [75]:
!pip install openimages



In [76]:
import os
from openimages.download import download_dataset

In [77]:
data_dir = "OpenImages" # Nurodome direktorija
number_for_samples = 333
classes = ["Strawberry", "Bee", "Goldfish"] # Pasirinktos klases is OpenImages

In [78]:
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

In [79]:
print("Downloading is starting...")
download_dataset(data_dir, classes, limit=number_for_samples) #Atsisiunciame nuotraukas

Downloading is starting...


100%|██████████| 333/333 [00:06<00:00, 48.85it/s]
100%|██████████| 333/333 [00:05<00:00, 55.75it/s]
100%|██████████| 333/333 [00:06<00:00, 54.67it/s]


{'strawberry': {'images_dir': 'OpenImages/strawberry/images'},
 'bee': {'images_dir': 'OpenImages/bee/images'},
 'goldfish': {'images_dir': 'OpenImages/goldfish/images'}}

Paruošiam transformacijos parametrus

In [80]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]) # Transformacijos bus reikalingos, kad nuotraukos tiktu modeliui

Custom Dataset Class

In [81]:
from torch.utils.data import Dataset
from PIL import Image

class GMMDataset(Dataset): # Custom Dataset naudojant pytorch dataset kaip pagrinda
    def __init__(self, data_dir, transform=None):

        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # Priskiriam klasems skaitines reiksmes, kad juos galetu nuskaityti modelis
        self.class_to_idx = {"strawberry": 0, "bee": 1, "goldfish": 2}

        # Scan the dataset structure correctly
        for class_name, class_idx in self.class_to_idx.items():
            class_path = os.path.join(data_dir, class_name, "images")  # Pasiziurim i klases image aplanka
            if os.path.exists(class_path):
                for img_name in os.listdir(class_path): # Priskiriam kiekvienai klases nuotraukai po index
                    img_path = os.path.join(class_path, img_name)
                    self.image_paths.append(img_path)
                    self.labels.append(class_idx)

        print(f"Loaded {len(self.image_paths)} images from {data_dir}")

    def __len__(self): # grazinam nuotrauku skaiciu
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        image = Image.open(img_path).convert("RGB") # Pasiverciam i rgb formata

        if self.transform: # Pritaikom transformacijas
            image = self.transform(image)

        return image, label



Create a dataset and put it in a dataloader



In [82]:
from torch.utils.data import DataLoader
dataset = GMMDataset(data_dir="OpenImages", transform=transform)

# Sukuriam dataloader ir uzkraunam ji su musu dataset
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers = 2)
iterator = iter(dataloader)

Loaded 999 images from OpenImages


Paruošiam pre-trained modelį darbui

In [83]:
import torch
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg19(pretrained=True) # Naudojam modeli vgg19
model.eval().to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

Skaičiavimai

In [84]:
import numpy as np

# Imagenet ID klasiu, kurioms modelis yra treniruotas
STRAWBERRY_IDX = 949  # Strawberry
BEE_IDX = 309  # Bee
GOLDFISH_IDX = 1  # Goldfish

ground_truth = []  # Laikom nuotraukos label
# Laikome tikimybes, kad nuotraukos atitinka kazkuria klase
predictions_strawberry = []
predictions_bee = []
predictions_goldfish = []

# Paduoda visas nuotraukas modeliui ir gauna tikimybes
while True:
    try:
        features, labels = next(iterator)
        output = model(features.to(device))

        for i in range(output.shape[0]):
            predictions = torch.sigmoid(output[i])

            predictions_strawberry = np.append(predictions_strawberry, predictions[STRAWBERRY_IDX].cpu().detach())
            predictions_bee = np.append(predictions_bee, predictions[BEE_IDX].cpu().detach())
            predictions_goldfish = np.append(predictions_goldfish, predictions[GOLDFISH_IDX].cpu().detach())

        ground_truth = np.append(ground_truth, labels)
    except StopIteration:
        break

# Confusion matrica, pritaikom threshold
def calculate_confusion_matrix(ground_truth, predictions, class_idx, threshold=0.5):
    predictions = np.array(predictions)
    predictions_thresholded = (predictions >= threshold).astype(np.float64)

    matrix = {
        'TP': np.sum((ground_truth == class_idx) & (predictions_thresholded == 1)),
        'TN': np.sum((ground_truth != class_idx) & (predictions_thresholded == 0)),
        'FP': np.sum((ground_truth != class_idx) & (predictions_thresholded == 1)),
        'FN': np.sum((ground_truth == class_idx) & (predictions_thresholded == 0)),
    }
    return matrix


# matavimai
def calculate_metrics(TP, TN, FP, FN):
    metrics = {}
    metrics['accuracy'] = (TP + TN) / (TP + FP + TN + FN)
    metrics['recall'] = TP / (TP + FN) if (TP + FN) > 0 else 0
    metrics['precision'] = TP / (TP + FP) if (TP + FP) > 0 else 0
    metrics['f1'] = 2 * (metrics['recall'] * metrics['precision']) / (metrics['recall'] + metrics['precision']) if (metrics['recall'] + metrics['precision']) > 0 else 0
    return metrics

# Ivertinam kiekviena klase su threshold T=0.5
threshold = 0.5
matrix_strawberry = calculate_confusion_matrix(ground_truth, predictions_strawberry, 0, threshold)
matrix_bee = calculate_confusion_matrix(ground_truth, predictions_bee, 1, threshold)
matrix_goldfish = calculate_confusion_matrix(ground_truth, predictions_goldfish, 2, threshold)

metrics_strawberry = calculate_metrics(matrix_strawberry['TP'], matrix_strawberry['TN'], matrix_strawberry['FP'], matrix_strawberry['FN'])
metrics_bee = calculate_metrics(matrix_bee['TP'], matrix_bee['TN'], matrix_bee['FP'], matrix_bee['FN'])
metrics_goldfish = calculate_metrics(matrix_goldfish['TP'], matrix_goldfish['TN'], matrix_goldfish['FP'], matrix_goldfish['FN'])

# Rezultatai
def print_metrics(metrics, class_name):
    print(f'Class {class_name} Metrics:')
    print(f'  Accuracy : {metrics["accuracy"]:.2f}')
    print(f'  Recall   : {metrics["recall"]:.2f}')
    print(f'  Precision: {metrics["precision"]:.2f}')
    print(f'  F1-score : {metrics["f1"]:.2f}\n')


print_metrics(metrics_strawberry, "Strawberry")
print_metrics(metrics_bee, "Bee")
print_metrics(metrics_goldfish, "Goldfish")

Class Strawberry Metrics:
  Accuracy : 0.40
  Recall   : 1.00
  Precision: 0.36
  F1-score : 0.52

Class Bee Metrics:
  Accuracy : 0.44
  Recall   : 1.00
  Precision: 0.37
  F1-score : 0.54

Class Goldfish Metrics:
  Accuracy : 0.41
  Recall   : 0.98
  Precision: 0.36
  F1-score : 0.53



Individuali nuotrauka

In [85]:
import json
import urllib

# Imagenet klases
url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
imagenet_classes = urllib.request.urlopen(url).read().decode("utf-8").split("\n")


# Nuotrauka, kuri bus analizuojama
image_path = "dog.jpg"
img = transform(Image.open(image_path)).unsqueeze(0).to(device)  # Pritaikom transformacija

# Bandom klasifikuoti
with torch.no_grad():
    output = model(img)
    probabilities = torch.sigmoid(output).squeeze()
    predicted_idx = probabilities.argmax().item()

# Rezutlatas
predicted_class = imagenet_classes[predicted_idx]
print(f"Predicted class: {predicted_class}")



Predicted class: Border collie
