In [35]:
try: 
    import cv2
    import torch
    import torchvision
    import sklearn.svm
except:
    %pip install opencv-python-headless==4.9.0.80
    %pip install torch
    %pip install torchvision
    %pip install torchsummary 

import torch
from torch.utils.data import Dataset
from torch import cuda
from torchvision import transforms, datasets, models
import torch.optim as optim
import torch.nn as nn
from torch.optim import lr_scheduler

from pathlib import Path
from timeit import default_timer as timer
from tqdm import tqdm
import matplotlib.pyplot as plt
from collections import Counter

from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, KFold, train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import random
import numpy as np
import time
import copy

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)


from torchsummary import summary
from PIL import Image

np.set_printoptions(threshold=np.inf)

print('import successful')

import successful


In [36]:
# Data paths
EMOREACT = Path('EmoReact')
FER = Path('FER-2013')
KDEF = Path('KDEF-AKDEF')
NIMH = Path('NIMH-CHEFS')

# General paths
BASE_PATH = Path('/home/jovyan/work/data/out')
MODEL_PATH = Path('/home/jovyan/work/models')

# Set dataset here
DATA = NIMH

# Dataset-specific paths
CURRENT_PATH = BASE_PATH / DATA
LABELS = [f.name for f in CURRENT_PATH.iterdir() if f.is_dir()]
IMAGE_PATHS = list(CURRENT_PATH.rglob('*.jpg'))

# Constants for splitting dataset
TRAIN = 'train'
TEST = 'test'
VAL = 'val'

FEATURES = 'feature-extraction'
TRANSFER = 'transfer-learning'
FINETUNE = 'fine-tuning'

batch_size = 2

# CUDA
train_on_gpu = cuda.is_available()
print(f'[INFO] Train on gpu ...{train_on_gpu}')
if train_on_gpu:
    gpu_count = cuda.device_count()
    print(f'[INFO] {gpu_count} gpus detected.')
    if gpu_count > 1:
        multi_gpu = True
    else:
        multi_gpu = False

[INFO] Train on gpu ...False


In [66]:
class Dataset(Dataset):
    def __init__(self, data_path, img_size, transforms=None, phase=TRAIN):
        self.data_path = Path(data_path / phase)
        self.img_size = img_size
        self.transform = transforms[phase]
        self.phase = phase

        self.classes = self._get_classes()
        self.image_paths = self._get_image_paths()
        self.num_classes = len(self.classes)

        self.class_idx = {class_name: idx for idx, class_name in enumerate(self.classes)}
        self.idx_class = {idx: class_name for class_name, idx in self.class_idx.items()}

        
    def _get_classes(self):
        return [f.name for f in (self.data_path).iterdir() if f.is_dir()]
    

    def _get_image_paths(self):
        paths = list(self.data_path.rglob('*.jpg'))
        random.shuffle( paths )
        return paths


    def __len__(self):
        return len(self.image_paths)
    

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx])
        img_path = self.image_paths[idx]

        if self.transform:
            img = self.transform(img)

        label = Path(img_path).parent.name

        return img, label
    

    def show_samples(self):
        fig = plt.figure(figsize=(20,20))

        for i in range(10):
            ax = fig.add_subplot(1, 10, i + 1)
            _, label = self.__getitem__(i)
            img_cv2 = self.get_cv2_img(i)

            ax.imshow(img_cv2, cmap='gray')
            ax.set_title(label)
            ax.axis('off')
        plt.show()


    def show_distribution(self):
        labels_count = Counter([self.__getitem__(i)[1] for i in tqdm(range(len(self.image_paths)))])
        sorted_counts = sorted(labels_count.items())
        labels, counts = zip(*sorted_counts)

        plt.figure(figsize=(10, 3))
        bars = plt.bar(labels, counts, color='skyblue')
        plt.xlabel(f'{DATA}')
        plt.ylabel('Count')
        plt.title('Counts per Emotion Category')
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', linestyle='--', alpha=0.7)

        for bar, count in zip(bars, counts):
            plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, count,
                    ha='center', va='bottom', color='black', fontsize=8) 

        plt.tight_layout()
        plt.show()

    def get_cv2_img(self, idx):
        img_path = self.image_paths[idx]
        return cv2.imread(str(img_path))
    

    def idx_to_class(self, idx_list):
        return [self.idx_class[idx] for idx in idx_list]


    def class_to_idx(self, class_list):
        return [self.class_idx[class_name] for class_name in class_list]
    
    
    def print_info(self):
        print(f"[INFO] Total number of images ...{len(self)}")
        print("[INFO] Number of classes: ", self.num_classes)
        print("[INFO] Classes: ", self.classes)

In [83]:
data_transforms = {
    TRAIN: transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    VAL: transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ]),
    TEST: transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
}

datasets = {
    x: Dataset(CURRENT_PATH, img_size=224, transforms=data_transforms, phase=x) for x in [TRAIN, VAL, TEST]
}

dataloaders = {
    x: torch.utils.data.DataLoader(datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in [TRAIN, VAL, TEST]
}

dataset_sizes = {
    x : len(datasets[x]) for x in [TRAIN, VAL, TEST] 
}

n_classes = datasets[TRAIN].num_classes

In [106]:
class VGG(torch.nn.Module):
    def __init__(self, n_classes, mode=FEATURES, pretrained=True, model=16):
        super(VGG, self).__init__()

        self.conv_base = None
        self.mode = mode
        self.n_classes = n_classes
        self.transfer_trained_model = None
        self.cuda = self._check_cuda()

        if model == 16:
            self.conv_base = models.vgg16(weights='IMAGENET1K_V1')
        elif model == 19:
            self.conv_base = models.vgg19(weights='IMAGENET1K_V1')
        else: 
            raise ValueError('Unsupported mode in VGG model')

        # this mode removes the classifier and returns extracted features.
        if self.mode == FEATURES:
            for param in self.conv_base.parameters():
                param.requires_grad = False
            self.conv_base.classifier = torch.nn.Identity()
        
        # this mode replaces the last layer of classifier-part of the vgg16 net with a custom classifier layer, which returns one of the class labels.
        elif self.mode == TRANSFER:
            for param in self.conv_base.parameters():
                param.requires_grad = False
            self.conv_base.classifier[-1] = torch.nn.Linear(in_features=self.conv_base.classifier[-1].in_features, out_features=n_classes)

        # this mode fine-tunes the classifier part and maybe also some other layers within the net??
        elif self.mode == FINETUNE:
            pass

        else: 
            raise ValueError('Unsupported mode in VGG16 / VGG19 init')
    
    def _check_cuda(self):
            return torch.cuda.is_available()
    

    def forward(self, x):
        x = self.conv_base(x)
        return x
    
    
    def summary(self):
        summary(self.conv_base, input_size=(3, 224, 224), batch_size=batch_size, device='cuda')


    def feature_extract(self, dataloaders):

        features = np.empty((0, 25088))
        labels = np.empty(0)

        for phase in ([TRAIN, TEST, VAL]):

            for inputs_batch, labels_batch in tqdm(dataloaders[phase]):
                with torch.no_grad():
                    features_batch = np.asarray(self.conv_base(inputs_batch))
                    features = np.append(features, features_batch, axis=0)
                    label = np.asarray((labels_batch)).flatten()

                labels = np.append(labels, label)
        
        return features, labels


    def transfer_learning(self, dataloaders, num_epochs=10):

        for param in self.conv_base.features.parameters():
            param.require_grad = True

        criterion = nn.CrossEntropyLoss()
        optimizer_ft = optim.SGD(self.conv_base.parameters(), lr=0.001, momentum=0.9)
        #exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

        since = time.time()
        best_model_wts = copy.deepcopy(self.conv_base.state_dict())

        best_acc = 0.0
        avg_loss = 0
        avg_acc = 0
        avg_loss_val = 0
        avg_acc_val = 0

        train_batches = len(dataloaders[TRAIN])
        val_batches = len(dataloaders[VAL])

        for epoch in range(num_epochs):
            print(f"Epoch {epoch}/{num_epochs}")
            print('-' * 10)

            loss_train = 0
            loss_val = 0
            acc_train = 0
            acc_val = 0
            
            self.conv_base.train(True)

            for i, data in enumerate(dataloaders[TRAIN]):
                if i % 100 == 0:
                    print(f"\rTraining batch {i}/{train_batches}", flush=True)

                if i >= train_batches / 2:
                    break
                
                inputs = data[0]
                labels = torch.tensor(np.asarray(dataloaders[TRAIN].dataset.class_to_idx(data[1])))

                if torch.cuda.is_available():
                    inputs, labels = inputs.cuda(), labels.cuda()
                else:
                    inputs, labels = inputs, labels

                optimizer_ft.zero_grad()
                outputs = self.conv_base(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)
                loss.backward()
                return
                optimizer_ft.step()
                return
                
                
                loss_train += loss.data[0]
                acc_train += torch.sum(preds == labels.data)
                
                del inputs, labels, outputs, preds
                torch.cuda.empty_cache()
            
            
            print()
            # * 2 as we only used half of the dataset
            avg_loss = loss_train * 2 / dataset_sizes[TRAIN]
            avg_acc = acc_train * 2 / dataset_sizes[TRAIN]
            
            self.conv_base.train(False)
            self.conv_base.eval()

            for i, data in enumerate(dataloaders[VAL]):
                if i % 100 == 0:
                    print(f"\rValidation batch {i}/{val_batches}", flush=True)
                
                inputs, labels = data
                
                if torch.cuda.is_available():
                    inputs, labels = inputs.cuda(), labels.cuda()
                else:
                    inputs, labels = inputs, labels
                
                optimizer_ft.zero_grad()
                
                outputs = self.conv_base(inputs)
                
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                loss_val += loss.data[0]
                acc_val += torch.sum(preds == labels.data)
                
                del inputs, labels, outputs, preds
                torch.cuda.empty_cache()
            
            avg_loss_val = loss_val / dataset_sizes[VAL]
            avg_acc_val = acc_val / dataset_sizes[VAL]

            print()
            print(f"Epoch {epoch} result: ".format(epoch))
            print(f"Avg loss (train): {avg_loss:.4f}")
            print(f"Avg acc (train): {avg_acc:.4f}")
            print(f"Avg loss (val): {avg_loss_val:.4f}")
            print(f"Avg acc (val): {avg_acc_val:.4f}")
            print('-' * 10)
            print()

            if avg_acc_val > best_acc:
                best_acc = avg_acc_val
                best_model_wts = copy.deepcopy(self.conv_base.state_dict())

        elapsed_time = time.time() - since

        print()
        print(f"Training completed in {elapsed_time // 60:.0f}m {elapsed_time % 60:.0f}s")
        print(f"Best acc: {best_acc:.4f}")

        self.transfer_trained_model = self.conv_base.load_state_dict(best_model_wts)



vgg16 = VGG(n_classes=n_classes, mode=FEATURES, pretrained=True, model=16)  
vgg16.transfer_learning(dataloaders=dataloaders, num_epochs=10)          

Epoch 0/10
----------
Training batch 0/160


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [29]:
def train_model(vgg, criterion, optimizer, scheduler, num_epochs=10):
    
        
        
    elapsed_time = time.time() - since
    print()
    print("Training completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Best acc: {:.4f}".format(best_acc))
    
    vgg.load_state_dict(best_model_wts)
    return vgg


In [None]:
vgg16.transfer_learning()

In [None]:
class Resnet(torch.nn.Module):
    def __init__(self, n_classes, mode=FEATURES, pretrained=True):
        super(Resnet, self).__init__()

        self.conv_base = models.resnet50(weights='IMAGENET1K_V2')
        self.mode = mode
        self.n_classes = n_classes

        if self.mode == FEATURES:
            for param in self.conv_base.parameters():
                param.requires_grad = False
            self.conv_base.fc = torch.nn.Identity()

        elif self.mode == TRANSFER:
            for param in self.conv_base.parameters():
                param.requires_grad = False
            self.conv_base.fc = torch.nn.Linear(in_features=self.conv_base.classifier[-1].in_features, out_features=n_classes)

        elif self.mode == FINETUNE:
            pass
        
        else: 
            raise ValueError('Unsupported mode in Resnet init')
    

    def forward(self, x):
        x = self.conv_base(x)
        return x
    

    def summary(self):
        summary(self.conv_base, input_size=(3, 224, 224), batch_size=batch_size, device='cuda')

    
    def feature_extract(self, dataloaders):

        features = np.empty((0, 2048))
        labels = np.empty(0)

        for phase in ([TRAIN, TEST, VAL]):

            for inputs_batch, labels_batch in tqdm(dataloaders[phase]):
                with torch.no_grad():
                    features_batch = np.asarray(self.conv_base(inputs_batch))
                    features = np.append(features, features_batch, axis=0)
                    label = np.asarray((labels_batch)).flatten()

                labels = np.append(labels, label)
    
        return features, labels
    
    def simple_classify(self):
        pass

In [None]:
resnet50 = Resnet(n_classes=n_classes, mode=FEATURES, pretrained=True)   

In [49]:
nimhchefs = Dataset(data_path=CURRENT_PATH, img_size=64, transforms=data_transforms)


AttributeError: 'Dataset' object has no attribute 'class_idx'

In [None]:
features, labels = resnet50.extract_features(dataloaders=dataloaders)

In [None]:
print(features.shape)
print(labels.shape)

In [None]:
resnet50.summary()

In [None]:
features, labels = vgg16.extract_features(dataloaders=dataloaders)

In [None]:
""" self.linear1 = torch.nn.Linear(in_features=25088, out_features=4096, bias=True)
self.relu1 = torch.nn.ReLU(inplace=True)
self.dropout1 = torch.nn.Dropout(p=0.5, inplace=False)
self.linear2 = torch.nn.Linear(in_features=4096, out_features=4096, bias=True)
self.relu2 = torch.nn.ReLU(inplace=True)
self.dropout2 = torch.nn.Dropout(p=0.5, inplace=False)
self.linear3 = torch.nn.Linear(in_features=4096, out_features=n_classes, bias=True) """

In [None]:
conv_base = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
for param in conv_base.parameters():
    param.requires_grad = False

In [None]:
def extract_features(dataloaders):

    features = torch.zeros(0, 5)
    labels = torch.zeros(0, dtype=torch.long)
    i = 0

    for phase in ([TRAIN, TEST, VAL]):

        
        for inputs_batch, labels_batch in tqdm(dataloaders[phase]):
            i += 1
            with torch.no_grad():
                features_batch = conv_base(inputs_batch)
                features = torch.tensor((features, features_batch))

            labels = torch.tensor((labels, labels_batch))
            return features, labels

In [None]:
features, labels = extract_features(dataloaders=dataloaders)

In [None]:
class_to_idx = image_datasets[TRAIN].class_to_idx
idx_to_class = { idx: class_ for class_, idx in class_to_idx.items() }

print(class_to_idx)
print(idx_to_class)

In [None]:
print(features.shape)
print(labels.shape)

In [None]:
category_counts = {}
for value in labels:
    category_counts[class_names[value.item()]] = category_counts.get(class_names[value.item()], 0) + 1

sorted_counts = sorted(category_counts.items(), key=lambda x: x[0])
categories, counts = zip(*sorted_counts)

plt.figure(figsize=(10, 3))
bars = plt.bar(categories, counts, color='skyblue')
plt.xlabel('Emotion Category')
plt.ylabel('Count')
plt.title('Counts per Emotion Category')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.7)

for bar, count in zip(bars, counts):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, count, ha='center', va='bottom')

plt.show()


### SVM Init

In [None]:
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, shuffle=True, stratify=labels, random_state=42)

In [None]:
print("[INFO] Number of images used in training ...", x_train.shape[0])
print("[INFO] Number of images used in testing ...", x_test.shape[0])

classifier = SVC()
parameters = {'gamma': [0.1, 0.01, 0.001], 'C': [1, 10, 100, 1000]}

In [None]:
grid_search = GridSearchCV(classifier, parameters, n_jobs=-1)
grid_search.fit(x_train, y_train)
best_estimator = grid_search.best_estimator_
print("[INFO] Best params ...", grid_search.best_params_)

In [None]:
def print_score(clf, x_train, y_train, x_test, y_test, train=True):
    if train:
        y_prediction = clf.predict(x_train)
        clf_report = classification_report(y_train, y_prediction)
        print("Train Result:\n================================================")
        print(f"Accuracy Score: {accuracy_score(y_train, y_prediction) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(y_train, y_prediction)}\n")
        
    elif train==False:
        y_prediction = clf.predict(x_test)
        clf_report = classification_report(y_test, y_prediction)
        print("Test Result:\n================================================")        
        print(f"Accuracy Score: {accuracy_score(y_test, y_prediction) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(y_test, y_prediction)}\n")

In [None]:
import pickle 

pickle.dump(best_estimator, open('/home/jovyan/work/model.p', 'wb'))

print_score(best_estimator, x_train, y_train, x_test, y_test, train=True)
print_score(best_estimator, x_train, y_train, x_test, y_test, train=False)

### K-fold cross-validation

In [None]:
n_splits_values = [3, 5, 10]

for n_splits in n_splits_values:
    cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)
    scores = cross_val_score(best_estimator, features, labels, scoring='accuracy', cv=cv, n_jobs=-1)
    print(f"{n_splits}-Fold CV: {scores.mean():.2f} accuracy with a standard deviation of {scores.std():.2f}")