# Paws and Prediction
## Overview of the project
0. Import libraries
1. Load and transform data
2. Visualize data
3. Import model
4. Perform feature analysis of model output

In [None]:
import torch
from torchvision.datasets import OxfordIIITPet
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, random_split
from torchvision.transforms.functional import InterpolationMode
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
import numpy as np
from torch import nn, optim
from torch.utils.data import Subset
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import time

In [None]:
## for gpu usage, check true
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print('Using gpu: %s ' % torch.cuda.is_available())

Using gpu: True 


## 1. Load and transform data

We might want to think about how we transform the data and the splitting
Also about the size (now [3, 384, 384]), will probably require a lot of compute to train? Look up what seems reasonable

""The inference transforms are available at EfficientNet_B4_Weights.IMAGENET1K_V1.transforms and perform the following preprocessing operations: Accepts PIL.Image, batched (B, C, H, W) and single (C, H, W) image torch.Tensor objects. The images are resized to resize_size=[384] using interpolation=InterpolationMode.BICUBIC, followed by a central crop of crop_size=[380]. Finally the values are first rescaled to [0.0, 1.0] and then normalized using mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].""


####ser ut som att man kan addera random augmentations :))

def get_train_transform(IMAGE_SIZE, pretrained):
    train_transform = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
        transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.5),
        transforms.ToTensor(),
        normalize_transform(pretrained)
    ])
    retur

In [None]:
#TODO - load data much faster

class CustomOxfordPets(OxfordIIITPet):
    def __init__(self, *args, label_map=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.label_map = label_map

    def __getitem__(self, index):
        image, target = super().__getitem__(index)
        if int(target) in self.label_map:
            target = self.label_map[target]
        return image, target
class LoadData:
    def __init__(self, binary=True):
        self.transform = self.transform_images()

        if binary:
            self.classes = 2  # Might not be necessary
            self.label_map = self.initialize_label_map()  # Initialize label_map for binary classification
            self.bin_dataset()
        else:
            self.classes = 37
            self.mult_dataset()

    def initialize_label_map(self):
        """Initialize the label map for binary classification."""
        # Dictionary mapping original labels to binary labels
        label_dict = {
            0: 0, 1: 1, 2: 1, 3: 1, 4: 1, 5: 0,
            6: 0, 7: 0, 8: 1, 9: 0, 10: 1, 11: 0, 12: 1,
            13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1,
            19: 1, 20: 0, 21: 1, 22: 1, 23: 0, 24: 1,
            25: 1, 26: 0, 27: 0, 28: 1, 29: 1, 30: 1,
            31: 1, 32: 0, 33: 0, 34: 1, 35: 1, 36: 1
        }
        return label_dict

    def bin_dataset(self):
        """Load the binary dataset."""
        # Use the initialized label map
        self.dataset_trainval = CustomOxfordPets(
            root='./data', transform=self.transform, download=True, label_map=self.label_map, split='trainval'
        )
        self.test_dataset = CustomOxfordPets(
            root='./data', transform=self.transform, download=True, label_map=self.label_map, split='test'
        )

    def mult_dataset(self):
        """Load the multiclass dataset."""
        self.dataset_trainval = OxfordIIITPet(
            root='./data', transform=self.transform, download=True, split='trainval'
        )
        self.test_dataset = OxfordIIITPet(
            root='./data', transform=self.transform, download=True, split='test'
        )

    def data_loaders(self, batch_size, split_ratio, labeled_percent = 1):
        """Initialize dataloaders."""
        train_size = int(split_ratio * len(self.dataset_trainval))

        val_size = len(self.dataset_trainval) - train_size

        train_dataset, val_dataset = random_split(self.dataset_trainval, [train_size, val_size])

        self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        if labeled_percent <1:
          print("true size = " + str(train_size))
          pseudo_train_size = int(labeled_percent *train_size)

          print("labeled train size = " + str(pseudo_train_size))


          pseudo_guess_size =  train_size -pseudo_train_size

          print("unlabeled size = " + str(pseudo_guess_size))

          train_pseudo, guess_pseudo = random_split(train_dataset, [pseudo_train_size, pseudo_guess_size])



          self.trainPseudo_loader =  DataLoader(train_pseudo, batch_size=batch_size, shuffle=True)


          self.guess_loader = DataLoader(guess_pseudo, batch_size=batch_size, shuffle=True)


        else:
          self.trainPseudo_loader = [];
          self.guess_loader = [];



        self.val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
        self.test_loader = DataLoader(self.test_dataset, batch_size=batch_size, shuffle=False)

    def transform_images(self):
        transform = transforms.Compose([
            transforms.Resize(384, interpolation=InterpolationMode.BICUBIC),
            transforms.CenterCrop(380),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        return transform





## 2. Train Class to initialize all training

In [None]:

class Train:
    def __init__(self, lr: float, bs: int, epochs: int, split_ratio: float, binary: bool, model, _LoadData=LoadData(), labeled_percent = 1):
        self.lr = lr
        self.batch_size = bs
        self.split_ratio = split_ratio
        self.epochs = epochs
        self.binary = binary

        self.labeled_percent = labeled_percent
        self.data = _LoadData #kom på snyggare variablenamn
        self.data.data_loaders(self.batch_size, self.split_ratio, labeled_percent=self.labeled_percent)

        self.train_loader = self.data.train_loader
        self.val_loader = self.data.val_loader
        self.test_loader = self.data.test_loader

        self.train_pseudo_loader = self.data.trainPseudo_loader

        self.guess_loader = self.data.guess_loader

        self.model = self.load_model()
        if model is not None:
            self.model = torch.load(model) #kolla upp hur vi laddar in färdigtränade modeller för detta funkar inte

        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #torch.device("cpu") #
        print('Using gpu: %s ' % torch.cuda.is_available())

        self.validation_losses = [] #eller vill jag initialisera det i träningsfunktionen?
        self.training_losses = []

        self.loss_func = nn.BCEWithLogitsLoss() #fixa för binare
        self.optimizer = optim.Adam(self.model.classifier.parameters(), lr=self.lr)


    def load_model(self):
        """Load the weights of the model"""
        model_b4 = models.efficientnet_b4(weights = models.EfficientNet_B4_Weights.IMAGENET1K_V1)

        # TODO - fundera på om vi vill frysa alla lager
        for param in model_b4.parameters(): #Freeze
            param.requires_grad = False

        num_features = model_b4.classifier[1].in_features

        # Replace the classifier layer
        model_b4.classifier[1] = nn.Linear(num_features, 1)

        return model_b4

    def pseudo_labels(self):
        temp_model = self.load_model()
        temp_model.classifier = torch.nn.Identity()
        temp_model.eval()
        temp_model.to(self.device)
        modified_dataset = []

        start_time = time.time()  # Start timing

        modified_dataset = []

        list_of_outputs = []
        labels = []
        count = 0
        with torch.no_grad():
            for inputs, _ in self.train_loader: #OBS: Vi tittar ej på targets
                inputs = inputs.to(device)
                outputs = temp_model(inputs)
                list_of_outputs.append(outputs.cpu())

        list_of_outputs = torch.cat(list_of_outputs, dim=0)
        kmeans = KMeans(n_clusters=2, random_state=0)
        cluster_labels = kmeans.fit_predict(list_of_outputs)

        temp_sum = 0
        compare = 0
        with torch.no_grad():
            for inputs, labels in self.train_pseudo_loader:
                inputs = inputs.to(device)
                outputs = temp_model(inputs).cpu()
                #two_D_outputs = pca.transform(outputs)
                pred_labels = kmeans.predict(outputs)
                temp_sum += sum(np.array(pred_labels) == np.array(labels))
                #compare += len(labels)
        #print(temp_sum)
        #print(compare)
        if temp_sum < compare/2:
          label_dict = {0: 1, 1:0}
        else:
          label_dict = {0:0, 1:1}


        count_similar = 0

        with torch.no_grad():
            for images, _ in self.train_loader: #OBS: Vi tittar ej på targets
                inputs = images.to(device)
                outputs_model = temp_model(inputs).cpu()
                #two_D_outputs = pca.transform(outputs_model)
                pred_labels = kmeans.predict(outputs_model)
                outputs = pred_labels
                images = images.to("cpu")
                j = 0
                for image in images:

                  modified_dataset.append((image, label_dict[outputs[j]]))
                  j += 1


        #shuffle true eller false här?
        self.train_loader = DataLoader(modified_dataset, batch_size=self.batch_size, shuffle=True)


    def train(self):
        """Train the network"""

        self.model.train() #Bör inte störa "freezed" layers
        self.model.to(self.device)

        start_time = time.time()  # Start timing

        for epoch in range(self.epochs):
            epoch_loss = 0.0  # Initialize epoch loss
            for images, labels in self.train_loader:
                labels = labels.float()  #Läste att det kunde vara viktigt

                images, labels = images.to(self.device), labels.to(self.device)
                #print(labels)
                self.optimizer.zero_grad()
                outputs = self.model(images)
                loss = self.loss_func(outputs, labels.unsqueeze(1))
                loss.backward()
                self.optimizer.step()

                epoch_loss += loss.item()

            # Compute validation loss
            avg_epoch_loss = epoch_loss / len(self.train_loader)
            val_loss = self.compute_validation_loss()
            print(f'Epoch {epoch+1}, Training Loss: {avg_epoch_loss} \t Validation Loss: {val_loss}')

            self.training_losses.append(avg_epoch_loss)
            self.validation_losses.append(val_loss)

        end_time = time.time()  # End timing
        elapsed_time = end_time - start_time
        print(f'Training finished in {elapsed_time:.2f} seconds')

        accuracy = self.evaluate()

        save_path = f'model_bs{self.batch_size}_lr{self.lr}_acc{np.round(accuracy, 4)}.pt'
        torch.save(self.model.state_dict(), save_path) # är det såhär vi vill spara modeller? Eller bara spara vikter i vårt lager?
        print(f"Model saved at '{save_path}'")

        return accuracy


    def compute_validation_loss(self):
        """Compute the loss on the validation set"""
        self.model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in self.val_loader:
                labels = labels.float()
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
                loss = self.loss_func(outputs, labels.unsqueeze(1))
                val_loss += loss.item() * images.size(0)  # Multiply by batch size to account for varying batch sizes
        val_loss /= len(self.val_loader.dataset)  # Divide by total number of validation samples
        self.model.train()
        return val_loss

    #funkar endast på binary dataset, skapa en till funktion? eller nåt if statement
    def evaluate(self): #TODO - går så brutalt långsamt, varför
        """Compute the accuracy of the model"""
        self.model.eval()
        count_similar = 0
        for images, labels in self.test_loader:
              #print(labels)
              labels = labels.float()  #Läste att det kunde vara viktigt

              images, labels = images.to(self.device), labels.to(self.device)
              outputs = [1 if output > 0.5 else 0 for output in torch.sigmoid(self.model(images))]
              labels_cpu = labels.cpu()  # Move to CPU
              labels_np = labels_cpu.numpy()
              count_similar += np.sum(np.array([1 if label > 0.5 else 0 for label in labels_np]) == np.array(outputs))

        accuracy = count_similar/(len(self.test_loader)*self.batch_size)

        print("Accuracy on test set: ", np.round(accuracy, 4))

        return np.round(accuracy, 4)



Downloading https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz to data/oxford-iiit-pet/images.tar.gz


100%|██████████| 791918971/791918971 [00:27<00:00, 29220584.84it/s]


Extracting data/oxford-iiit-pet/images.tar.gz to data/oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz to data/oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19173078/19173078 [00:01<00:00, 13980404.23it/s]


Extracting data/oxford-iiit-pet/annotations.tar.gz to data/oxford-iiit-pet


# K-means run

In [None]:
bs = 8
lr = 0.0005
labeled_percent = 0.01
_LoadData = LoadData(binary=True)
train = Train(lr=lr, bs=bs, epochs=16, split_ratio=0.8, binary=True, model=None, _LoadData=_LoadData, labeled_percent=labeled_percent)
train.pseudo_labels()
train.train()

true size = 2944
labeled train size = 29
unlabeled size = 2915
Using gpu: True 




29
29
Epoch 1, Training Loss: 0.42390454981638037 	 Validation Loss: 0.30980203867606493
Epoch 2, Training Loss: 0.22318633736880578 	 Validation Loss: 0.20029697194695473
Epoch 3, Training Loss: 0.16273100171035723 	 Validation Loss: 0.13997929000660128
Epoch 4, Training Loss: 0.13644226754853583 	 Validation Loss: 0.11775481077316015
Epoch 5, Training Loss: 0.12114921688486863 	 Validation Loss: 0.09214107356155696
Epoch 6, Training Loss: 0.10349188435255834 	 Validation Loss: 0.08092344240487917
Epoch 7, Training Loss: 0.09826500561726077 	 Validation Loss: 0.0679983485530576
Epoch 8, Training Loss: 0.09247445941993805 	 Validation Loss: 0.06645629010365708
Epoch 9, Training Loss: 0.08447076550953131 	 Validation Loss: 0.05392550795501017
Epoch 10, Training Loss: 0.07828910330072572 	 Validation Loss: 0.04760298908323697
Epoch 11, Training Loss: 0.08083131039798584 	 Validation Loss: 0.04917869884925692
Epoch 12, Training Loss: 0.07719514288043644 	 Validation Loss: 0.03856950100389

0.9869