In [73]:
try: 
    import cv2
    import torch
    import torchvision
    import sklearn.svm
except:
    %pip install opencv-python-headless==4.9.0.80
    %pip install torch
    %pip install torchvision
    %pip install torchsummary 
    %pip install sklearn

from torch.utils.tensorboard import SummaryWriter
import torch
from torch.utils.data import Dataset
from torch import cuda
from torchvision import transforms, datasets, models
import torch.optim as optim
import torch.nn as nn
from torch.optim import lr_scheduler

from pathlib import Path
from timeit import default_timer as timer
from tqdm import tqdm
import matplotlib.pyplot as plt
from collections import Counter

from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, KFold, train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import random
import numpy as np
import time
import copy

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)


from torchsummary import summary
from PIL import Image

np.set_printoptions(threshold=np.inf)

print('import successful')

import successful
gpu accessible


True

In [93]:
# Data paths
EMOREACT = Path('EmoReact')
FER = Path('FER-2013')
KDEF = Path('KDEF-AKDEF')
NIMH = Path('NIMH-CHEFS')

# General paths
BASE_PATH = Path('/home/jovyan/work/data/out')
MODEL_PATH = Path('/home/jovyan/work/models')

# Set dataset here
DATA = EMOREACT

# Dataset-specific paths
CURRENT_PATH = BASE_PATH / DATA
LABELS = [f.name for f in CURRENT_PATH.iterdir() if f.is_dir()]
IMAGE_PATHS = list(CURRENT_PATH.rglob('*.jpg'))

# Constants for splitting dataset
TRAIN = 'train'
TEST = 'test'
VAL = 'val'

FEATURES = 'feature-extraction'
TRANSFER = 'transfer-learning'
FINETUNE = 'fine-tuning'

# batch size
BATCH_SIZE = 10

# CUDA
train_on_gpu = cuda.is_available()
print(f'[INFO] Train on gpu ...{train_on_gpu}')
if train_on_gpu:
    gpu_count = cuda.device_count()
    print(f'[INFO] {gpu_count} gpus detected.')
    if gpu_count > 1:
        multi_gpu = True
    else:
        multi_gpu = False
        
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

torch.cuda.empty_cache()

[INFO] Train on gpu ...True
[INFO] 2 gpus detected.
cuda


In [94]:
class Dataset(Dataset):
    def __init__(self, data_path, img_size, transforms=None, phase='train'):
        self.data_path = Path(data_path) / phase
        self.img_size = img_size
        self.transform = transforms[phase]
        self.phase = phase

        self.classes = self._get_classes()
        self.image_paths = self._get_image_paths()
        self.num_classes = len(self.classes)

        self.class_to_int = {class_name: idx for idx, class_name in enumerate(self.classes)}
        self.int_to_class = {idx: class_name for class_name, idx in self.class_to_int.items()}

    def _get_classes(self):
        return [f.name for f in self.data_path.iterdir() if f.is_dir()]

    def _get_image_paths(self):
        paths = list(self.data_path.rglob('*.jpg'))
        random.shuffle(paths)
        return paths

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx])
        img_path = self.image_paths[idx]

        if self.transform:
            img = self.transform(img)

        label = Path(img_path).parent.name
        label = self.class_to_int[label]  # Convert label to integer

        return img, label

    def show_samples(self):
        fig = plt.figure(figsize=(20, 20))

        for i in range(10):
            ax = fig.add_subplot(1, 10, i + 1)
            _, label = self.__getitem__(i)
            img_cv2 = self.get_cv2_img(i)

            ax.imshow(img_cv2, cmap='gray')
            ax.set_title(self.int_to_class[label])  # Show class name instead of index
            ax.axis('off')
        plt.show()

    def show_distribution(self):
        labels_count = Counter([self.__getitem__(i)[1] for i in range(len(self.image_paths))])
        sorted_counts = sorted(labels_count.items())
        labels, counts = zip(*sorted_counts)

        plt.figure(figsize=(10, 3))
        bars = plt.bar(labels, counts, color='skyblue')
        plt.xlabel('Class')
        plt.ylabel('Count')
        plt.title('Counts per Class')
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', linestyle='--', alpha=0.7)

        for bar, count in zip(bars, counts):
            plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, count,
                    ha='center', va='bottom', color='black', fontsize=8)

        plt.tight_layout()
        plt.show()

    def get_cv2_img(self, idx):
        img_path = self.image_paths[idx]
        return cv2.imread(str(img_path))

    def idx_to_class(self, idx_list):
        return [self.int_to_class[idx] for idx in idx_list]

    def class_to_idx(self, class_list):
        return [self.class_to_int[class_name] for class_name in class_list]

    def print_info(self):
        print(f"[INFO] Total number of images: {len(self)}")
        print("[INFO] Number of classes:", self.num_classes)
        print("[INFO] Classes:", self.classes)


In [95]:
data_transforms = {
    TRAIN: transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    VAL: transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ]),
    TEST: transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
}

datasets = { x: Dataset(CURRENT_PATH, img_size=224, transforms=data_transforms, phase=x) for x in [TRAIN, VAL, TEST] }
dataloaders = { x: torch.utils.data.DataLoader(datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in [TRAIN, VAL, TEST] }
dataset_sizes = { x : len(datasets[x]) for x in [TRAIN, VAL, TEST] }

N_CLASSES = datasets[TRAIN].num_classes

In [96]:
# init the VGG model
vgg16 = models.vgg16(weights='IMAGENET1K_V1')
vgg16 = vgg16.to(DEVICE)

# freeze parameters for feature extraction
for param in vgg16.features.parameters():
    param.require_grad = False

# number of input features for last layer
num_features = vgg16.classifier[6].in_features

vgg16.classifier = torch.nn.Identity()
vgg16.classifier = vgg16.classifier.to(DEVICE)

## Feature Extraction

In [97]:
def extract_features(loader, conv_base):
    conv_base.eval()
    features = []
    labels = []

    with torch.no_grad():
        for images, targets in loader:
            # move data to cuda
            images = images.to(device)
            targets = torch.as_tensor(targets).to(device)
            
            # Extract features using conv_base
            features_batch = conv_base(images)
            features.append(features_batch.cpu().numpy())  # Convert to numpy array
            labels.append(targets.cpu().numpy())

    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    return features, labels

In [98]:
train_features, train_labels = extract_features(loader=dataloaders[TRAIN], conv_base=vgg16)
val_features, val_labels = extract_features(loader=dataloaders[VAL], conv_base=vgg16)
test_features, test_labels = extract_features(loader=dataloaders[TEST], conv_base=vgg16)

In [99]:
print((train_features).shape)

(2400, 25088)


## SVM

In [100]:
%%time
clf = SVC()
clf.fit(train_features, train_labels)
predicted = clf.predict(test_features)
# get the accuracy
print(accuracy_score(test_labels, predicted))

0.19407894736842105


## Training

In [None]:
def train_model(vgg, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time()
    
    # freeze feature layers
    for param in vgg.features.parameters():
        param.requires_grad = False
    
    # make classifier trainable
    for param in vgg.classifier.parameters():
        param.requires_grad = True
    """
    for param in vgg.features.parameters():
        param.requires_grad = True
    """
    vgg.classifier[-1] = torch.nn.Linear(in_features=vgg.classifier[-1].in_features, out_features=N_CLASSES)
    vgg = vgg.to(DEVICE)

    best_acc = 0.0
    avg_loss = 0
    avg_acc = 0
    avg_loss_val = 0
    avg_acc_val = 0

    train_batches = len(dataloaders[TRAIN])
    val_batches = len(dataloaders[VAL])

    for epoch in range(num_epochs):
        print(f"Epoch {epoch}/{num_epochs}")
        print('-' * 10)

        loss_train = 0
        loss_val = 0
        acc_train = 0
        acc_val = 0
        
        vgg.train(True)

        # iterate through batches of training set
        for i, data in enumerate(dataloaders[TRAIN]):
            if i % 100 == 0:
                print(f"\rTraining batch {i}/{train_batches}", flush=True)
            
            # set input and labels to the batch features and labels
            inputs = data[0]
            labels = data[1]

            # move to cuda if possible
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)

            # do the learning stuff & update loss etc.
            optimizer_ft.zero_grad()
            outputs = vgg(inputs)
            _, preds = torch.max(outputs.data, 1)
            # backward & optimize
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer_ft.step()                
            
            loss_train += loss.item()
            acc_train += torch.sum(preds == labels.data)
            
            del inputs, labels, outputs, preds
            torch.cuda.empty_cache()
        
        print()
        avg_loss = loss_train / dataset_sizes[TRAIN]
        avg_acc = acc_train / dataset_sizes[TRAIN]

        # set to evaluation mode
        vgg.train(False)
        vgg.eval()

        # iterate through batches of validation set
        with torch.no_grad():
            for i, data in enumerate(dataloaders[VAL]):
                if i % 100 == 0:
                    print(f"\rValidation batch {i}/{val_batches}", flush=True)
                
                # set input and labels to the batch features and labels
                inputs = data[0]
                labels = data[1]
    
                # move to cuda if possible
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)
    
                # do the learning stuff & update loss etc.
                # zero the gradients
                optimizer_ft.zero_grad()
                outputs = vgg(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)
                loss_val += loss.item()
                acc_val += torch.sum(preds == labels.data)
                
            # delete batch from cuda 
            del inputs, labels, outputs, preds
            torch.cuda.empty_cache()
        
        avg_loss_val = loss_val / dataset_sizes[VAL]
        avg_acc_val = acc_val / dataset_sizes[VAL]

        # print results
        print()
        print(f"Epoch {epoch} result: ".format(epoch))
        print(f"Avg loss (train): {avg_loss:.4f}")
        print(f"Avg acc (train): {avg_acc:.4f}")
        print(f"Avg loss (val): {avg_loss_val:.4f}")
        print(f"Avg acc (val): {avg_acc_val:.4f}")
        print('-' * 10)
        print()

        # update best acc save best model weights
        if avg_acc_val > best_acc:
            best_acc = avg_acc_val
            best_model_wts = copy.deepcopy(vgg.state_dict())

    # compute training time
    elapsed_time = time.time() - since

    # print all the results
    print()
    print(f"Training completed in {elapsed_time // 60:.0f}m {elapsed_time % 60:.0f}s")
    print(f"Best acc: {best_acc:.4f}")

    return vgg.load_state_dict(best_model_wts)

In [None]:
# init the VGG model
vgg16 = models.vgg16(weights='IMAGENET1K_V1')
vgg16 = vgg16.to(DEVICE)

#summary(vgg16, input_size=(3, 224, 224), batch_size=BATCH_SIZE, device='cuda')

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
vgg16 = train_model(vgg16, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=50)

#

In [None]:
torch.save(vgg16.state_dict(), 'VGG16_NIMH-CHEFS.pt')

In [None]:
def eval_model(vgg, criterion):
    since = time.time()
    avg_loss = 0
    avg_acc = 0
    loss_test = 0
    acc_test = 0
    
    test_batches = len(dataloaders[TEST])
    print("Evaluating model")
    print('-' * 10)
    
    for i, data in enumerate(dataloaders[TEST]):
        if i % 100 == 0:
            print(f"\rTest batch {i}/{test_batches}", flush=True)

        vgg.train(False)
        vgg.eval()
        inputs = data[0]
        labels = data[1]

        # move to cuda if possible
        inputs = inputs.to(DEVICE)
        labels = labels.to(DEVICE)

        # Ddsable gradient calculation for evaluation
        with torch.no_grad():  
            outputs = vgg(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

        loss_test += loss.item()
        acc_test += torch.sum(preds == labels.data)

        del inputs, labels, outputs, preds
        torch.cuda.empty_cache()
        
        
    avg_loss = loss_test / dataset_sizes[TEST]
    avg_acc = acc_test / dataset_sizes[TEST]
    
    elapsed_time = time.time() - since
    print()
    print(f"Evaluation completed in {elapsed_time // 60:.0f}m {elapsed_time % 60:.0f}s")
    print(f"Avg loss (test): {avg_loss:.4f}")
    print(f"Avg acc (test): {avg_acc:.4f}")
    print('-' * 10)

In [None]:
eval_model(vgg16, criterion)