# 1. Extract features of training and test images using a pre-trained ResNet50 model (50 points)
Please print the size of extracted features, e.g., training features: 1400 * d, test features: 400 *d.

### Dataset Preparation:

Define labeled (for train) and unlabeled (for test) dataset classes:

In [1]:
import os
from typing import List, Tuple

from PIL import Image

import torch
import torch.utils
import torchvision
from torch.utils.data import Dataset
from torchvision.datasets import ImageFolder

In [2]:
class DogHeartLabeledDataset(ImageFolder):

    #extend
    def __init__(self, data_root: str) -> None:
        self.transformation = torchvision.transforms.Compose([
            torchvision.transforms.Resize((224, 224)),
            torchvision.transforms.ToTensor(),
        ])
        super().__init__(root=data_root, transform=self.transformation)
        self.data_root: str = data_root

        self.filepaths: List[str] = [path for path, _ in self.samples]
        self.filenames: List[str] = [path.split('/')[-1] for path in self.filepaths]
        self.labels: List[int] = [label for _, label in self.samples]

    #extend
    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int, str]:
        tensor: torch.Tensor; label: int
        tensor, label = super().__getitem__(idx)
        tensor = tensor.half()
        filename: str = self.filenames[idx]
        return tensor, label, filename

In [3]:
class DogHearUnlabeledDataset(Dataset):

    def __init__(self, data_root: str) -> None:
        self.data_root: str = data_root
        self.transformation = torchvision.transforms.Compose([
            torchvision.transforms.Resize((224, 224)),
            torchvision.transforms.ToTensor(),
        ])
        self.filenames: List[str] = os.listdir(self.data_root)
    
    def __len__(self) -> int:
        return len(self.filenames)
    
    def __getitem__(self, idx) -> Tuple[torch.Tensor, str]:
        filename: str = self.filenames[idx]
        image: Image = Image.open(os.path.join(self.data_root, filename))
        if image.mode == 'RGBA':
            image = image.convert('RGB')
        tensor: torch.Tensor = self.transformation(image).half()
        return tensor, filename

### Feature Extraction

Define `FeatureExtractor` class:

In [4]:
from typing import List

import numpy as np
import pandas as pd

from sklearn.base import BaseEstimator
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50, ResNet50_Weights

In [5]:
class FeatureExtractor(nn.Module):

    def __init__(self) -> None:
        super().__init__()
        resnet = resnet50(weights=ResNet50_Weights.DEFAULT)
        # get the feature extractor blocks, drop last FC layer
        self.__extractor = nn.Sequential(
            *list(resnet.children())[:-1], 
            nn.Flatten(start_dim=1, end_dim=-1)
        ).half()

    def forward(self, input: torch.Tensor) -> torch.Tensor:
        assert input.ndim == 4
        assert input.shape[1:] == (3, 224, 224)
        return self.__extractor(input)

Call the `FeatureExtractor` on train data:

In [6]:
# Choose device
device = torch.device('cuda')

# Instantiate train data
train_dataset = DogHeartLabeledDataset(data_root='Dog_heart/Train')
train_dataloader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=False)

# Instantiate a feature extractor
feature_extractor = FeatureExtractor().to(device)

# Apply the feature extractor on train data
train_dataloader = DataLoader(
    dataset=train_dataset, 
    batch_size=32, 
    shuffle=True,
)
input_tensors: List[torch.Tensor] = []
label_tensors: List[torch.Tensor] = []
for input_tensor, label_tensor, _ in train_dataloader:
    input_tensors.append(input_tensor.to(device=device, dtype=torch.half))
    label_tensors.append(label_tensor.to(device=device))

input_tensor: torch.Tensor = torch.cat(input_tensors)
label_tensor: torch.Tensor = torch.cat(label_tensors)

with torch.no_grad():
    feature_extractor.eval()
    train_features: torch.Tensor = feature_extractor(input=input_tensor)

Print the shape of output features:

In [7]:
print(train_features.shape)

torch.Size([1400, 2048])


# 2. Call SVM and kNN from scikit-learn and train the extracted deep features, respectively (30 points)

Define the class `Predict` that accepts one feature extractor and one classifier:

In [8]:
class Predictor:

    def __init__(
        self, 
        feature_extractor: nn.Module,
        classifier: BaseEstimator,
        device: torch.device = torch.device('cuda')
    ):
        self.feature_extractor = feature_extractor.to(device)
        self.classifier = classifier
        self.device = device

    def fit(self, train_dataset: Dataset) -> None:
        train_dataloader = DataLoader(
            dataset=train_dataset, 
            batch_size=32, 
            shuffle=True,
        )
        input_tensors: List[torch.Tensor] = []
        label_tensors: List[torch.Tensor] = []
        for input_tensor, label_tensor, _ in train_dataloader:
            input_tensors.append(input_tensor.to(device=self.device, dtype=torch.half))
            label_tensors.append(label_tensor.to(device=self.device))

        input_tensor: torch.Tensor = torch.cat(input_tensors)
        label_tensor: torch.Tensor = torch.cat(label_tensors)

        with torch.no_grad():
            self.feature_extractor.eval()
            train_features: torch.Tensor = self.feature_extractor(input=input_tensor)

        self.classifier.fit(
            X=train_features.detach().cpu().numpy(), 
            y=label_tensor.cpu().numpy()
        )

    def predict(self, test_dataset: Dataset) -> pd.DataFrame:
        test_dataloader = DataLoader(
            dataset=test_dataset, 
            batch_size=32, 
            shuffle=False
        )
        input_tensors: List[torch.Tensor] = []
        filenames: List[str] = []
        for input_tensor, fnames in test_dataloader:
            input_tensors.append(input_tensor.to(device=self.device, dtype=torch.half))
            filenames.extend(fnames)

        input_tensor: torch.Tensor = torch.cat(input_tensors)

        with torch.no_grad():
            self.feature_extractor.eval()
            test_features: torch.Tensor = self.feature_extractor(input=input_tensor)

        predicted_labels: np.ndarray = self.classifier.predict(
            X=test_features.detach().cpu().numpy(),
        )
        prediction_table = pd.DataFrame(
            data={'image': filenames, 'label': predicted_labels}
        )
        prediction_table.to_csv(
            f'{self.classifier.__class__.__name__}_'
            f'{self.feature_extractor.__class__.__name__}.csv', 
            header=False, 
            index=False,
        )
        return prediction_table

Extract the features and use them to train 2 models: `SVM` and `kNN`, the predictions on test data are output as `.csv` files in the working directory:

In [9]:
device = torch.device('cuda')

train_dataset = DogHeartLabeledDataset(data_root='Dog_heart/Train')
test_dataset = DogHearUnlabeledDataset(data_root='Dog_heart/Test')

In [10]:
# SVM
resnet_svm = Predictor(
    feature_extractor=FeatureExtractor(), 
    classifier=SVC(), 
    device=device, 
)
resnet_svm.fit(train_dataset)
resnet_svm.predict(test_dataset)

Unnamed: 0,image,label
0,1967.png,0
1,1804.png,2
2,1988.png,1
3,1657.png,0
4,1810.png,0
...,...,...
395,1955.png,1
396,1969.png,0
397,1835.png,1
398,1821.png,0


In [11]:
# kNN with k=5
resnet_5nn = Predictor(
    feature_extractor=FeatureExtractor(), 
    classifier=KNeighborsClassifier(n_neighbors=5),
    device=device, 
)
resnet_5nn.fit(train_dataset)
resnet_5nn.predict(test_dataset)

Unnamed: 0,image,label
0,1967.png,1
1,1804.png,1
2,1988.png,1
3,1657.png,1
4,1810.png,0
...,...,...
395,1955.png,0
396,1969.png,0
397,1835.png,0
398,1821.png,0


# 3. Report the accuracy using Dog_X_ray_classfication_accuracy software, please attach the results image here (20 points)

SVM Classifier:

<img src="https://raw.githubusercontent.com/hiepdang-ml/dnn_assignment5/master/SVC_FeatureExtractor.png" alt="SVMPredictionImage" style="width:40%;"/>

5-NN Classifier:

<img src="https://raw.githubusercontent.com/hiepdang-ml/dnn_assignment5/master/KNeighborsClassifier_FeatureExtractor.png" alt="KNNPredictionImage" style="width:40%;"/>

---