# 1. Build your own neural network with 3 hidden layers using pytorch (60 points)

In [1]:
import os
from typing import List, Dict, Tuple
from abc import ABC, abstractmethod

from PIL import Image
import numpy as np
import skimage.feature

import torch
import torch.utils
import torchvision
from torch.utils.data import Dataset

Redefine dataset classes and feature extractors:

In [2]:
class DogHeartLabeledDataset(Dataset):

    def __init__(self, data_root: str) -> None:
        self.data_root: str = data_root
        self.classes: List[str] = os.listdir(data_root)
        self.class_to_idx: Dict[str, int] = {cls_name: i for i, cls_name in enumerate(self.classes)}

        self.transformation = torchvision.transforms.Compose([
            torchvision.transforms.Grayscale(),
            torchvision.transforms.Resize((224, 224)),
            torchvision.transforms.ToTensor(),
        ])

        self.filenames: List[str] = []
        self.filepaths: List[str] = []
        self.labels: List[int] = []

        for class_name in self.classes:
            path: str = os.path.join(data_root, class_name)
            for filename in os.listdir(path):
                self.filenames.append(filename)
                self.filepaths.append(os.path.join(path, filename))
                self.labels.append(self.class_to_idx[class_name])

    def __len__(self) -> int:
        return len(self.filenames)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, str]:
        filename: str = self.filenames[idx]
        filepath: str = self.filepaths[idx]
        image: Image = Image.open(filepath)
        label: torch.Tensor = torch.tensor(self.labels[idx])
        tensor: torch.Tensor = self.transformation(image)
        tensor = tensor.squeeze(0)
        return tensor, label, filename
    

class DogHearUnlabeledDataset(Dataset):

    def __init__(self, data_root: str) -> None:
        self.data_root: str = data_root
        self.transformation = torchvision.transforms.Compose([
            torchvision.transforms.Grayscale(),
            torchvision.transforms.Resize((224, 224)),
            torchvision.transforms.ToTensor(),
        ])
        self.filenames: List[str] = os.listdir(self.data_root)
    
    def __len__(self) -> int:
        return len(self.filenames)
    
    def __getitem__(self, idx) -> Tuple[torch.Tensor, str]:
        filename: str = self.filenames[idx]
        image: Image = Image.open(os.path.join(self.data_root, filename))
        tensor: torch.Tensor = self.transformation(image)
        tensor = tensor.squeeze(0)
        return tensor, filename

In [3]:
class FeatureExtractor(ABC):

    @abstractmethod
    def __call__(self, image_array: np.ndarray) -> np.ndarray:
        pass
    

class HOG(FeatureExtractor):

    def __init__(self, channel_axis: int = None) -> None:
        self.channel_axis: int = channel_axis

    def __call__(self, image_array: np.ndarray) -> np.ndarray:
        return skimage.feature.hog(image=image_array)

Build model architecture:

In [4]:
from typing import List

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

In [5]:
class NeuralNet(nn.Module):

    def __init__(self, n_hiddens: int, n_classes: int, feature_extractor: FeatureExtractor):
        super().__init__()
        self.n_hiddens: int = n_hiddens
        self.n_classes: int = n_classes
        self.feature_extractor: FeatureExtractor = feature_extractor
        self.fc1 = nn.LazyLinear(out_features=n_hiddens)
        self.fc2 = nn.LazyLinear(out_features=n_hiddens)
        self.fc3 = nn.LazyLinear(out_features=n_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x: np.ndarray = x.numpy()
        
        features: List[np.ndarray] = []
        for image in x:
            feature = self.feature_extractor(image_array=image)
            features.append(feature)
        
        features = torch.tensor(data=np.array(features), dtype=torch.float)
        y = torch.relu(self.fc1(features))
        y = torch.relu(self.fc2(y))
        y = torch.softmax(self.fc3(y), dim=1)
        return y

    def predict(self, test_dataloader: DataLoader) -> None:
        self.eval()

        filenames = []
        predictions = []
        with torch.no_grad():
            for images, fnames in test_dataloader:
                outputs = self(images)
                _, predicted = torch.max(outputs, 1)
                filenames.extend(fnames)
                predictions.extend(list(predicted.numpy()))

        prediction_table = pd.DataFrame(
            data={'image': filenames, 'label': predictions}
        )
        prediction_table.to_csv('neural_net.csv', header=False, index=False)
        return prediction_table

# 2. Train your model using dog heart datasets (30 points)

Load datasets and dataloaders:

In [6]:
train_dataset = DogHeartLabeledDataset(data_root='Dog_heart/Train')
valid_dataset = DogHeartLabeledDataset(data_root='Dog_heart/Valid')
test_dataset = DogHearUnlabeledDataset(data_root='Test')

train_dataloader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset), shuffle=False)

Train the model:

In [7]:
net = NeuralNet(n_hiddens=64, n_classes=3, feature_extractor=HOG())

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for images, labels, filenames in train_dataloader:
        optimizer.zero_grad()
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}')
    
test_dataset = DogHearUnlabeledDataset(data_root='Test')
test_dataloader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset), shuffle=False)

net.predict(test_dataloader=test_dataloader)



Epoch [1/5], Loss: 1.0503
Epoch [2/5], Loss: 0.9989
Epoch [3/5], Loss: 0.9158
Epoch [4/5], Loss: 0.8659
Epoch [5/5], Loss: 0.7836


Unnamed: 0,image,label
0,1922.png,1
1,1709.png,1
2,1919.png,0
3,1639.png,0
4,1804.png,2
...,...,...
395,1685.png,2
396,1833.png,2
397,1900.png,0
398,1824.png,2


# 3. Evaluate your model using the developed software (10 points)

`Neural Network + HOG`:
<br>
<img src="https://raw.githubusercontent.com/hiepdang-ml/DLAssignment3/master/NeuralNet.png" style="width:50%;">

---