## Creating the groundtruth data

In [1]:
from collections import namedtuple
import os
import random
import time
import collections
import math
import pandas as pd
import numpy  as np

# visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.utils.data as data

import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from skimage.feature import local_binary_pattern

from sklearn import decomposition
from sklearn import manifold
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

# for splitting the images into train/ test/val folder
import splitfolders

OSError: [WinError 1455] The paging file is too small for this operation to complete. Error loading "c:\Users\kings\miniconda3\lib\site-packages\torch\lib\cufft64_10.dll" or one of its dependencies.

In [3]:
# Define seed to eneable reproducibility of results
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [4]:
batch_size = 32
transform_train = transforms.Compose([
    # Randomly crop the image to obtain an image with an area of 0.08 to 1 of
    # the original area and height-to-width ratio between 3/4 and 4/3. Then,
    # scale the image to create a new 224 x 224 image
    # transforms.Resize((48,48)),
    # transforms.RandomResizedCrop(32, scale=(0.64, 1.0),
    #                              ratio=(1.0, 1.0)),
    # torchvision.transforms.RandomResizedCrop(32, scale=(0.08, 1.0),
    #                                         ratio=(3.0/4.0, 4.0/3.0)),
    # transforms.RandomHorizontalFlip(),
    # Randomly change the brightness, contrast, and saturation
    # torchvision.transforms.ColorJitter(brightness=0.4,
    #                                   contrast=0.4,
    #                                   saturation=0.4),
    # Add random noise
    transforms.ToTensor(),
    # Standardize each channel of the image
    transforms.Normalize([0.4907, 0.4907, 0.4907],
                                     [0.2099, 0.2099, 0.2099])
    ])

transform_test = transforms.Compose([
    # transforms.Resize((48,48)),
    # Crop a 224 x 224 square area from the center of the image
    #torchvision.transforms.CenterCrop(32),
    transforms.ToTensor(),
    transforms.Normalize([0.4907, 0.4907, 0.4907],
                                     [0.2099, 0.2099, 0.2099])
   ])

# Load the FER datatsets from each folder

In [5]:
data_dir = r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\fer"

train = datasets.ImageFolder(
    os.path.join(data_dir, 'fer_train_val_test', 'train'),
    transform=transform_train)

In [6]:
val, test = [datasets.ImageFolder(
    os.path.join(data_dir, 'fer_train_val_test', folder),
    transform=transform_test) for folder in ['val', 'test']]

In [7]:
train_iter = data.DataLoader(train, batch_size, shuffle=True, drop_last=True)

valid_iter = data.DataLoader(val, batch_size, shuffle=False,
                                         drop_last=True)

test_iter = data.DataLoader(test, batch_size, shuffle=False,
                                        drop_last=False)

In [8]:
with open(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\fer\fer_train_val_test\GroundTruth.csv", 'w') as f:
    f.write('id,' + ','.join(train.classes) + '\n')

In [2]:
gt = pd.read_csv(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\fer\fer_train_val_test\GroundTruth.csv")
gt.head()


Unnamed: 0,id,negative,neutral,positive
0,ck_negative_0,1,0,0
1,ck_negative_1,1,0,0
2,ck_negative_10,1,0,0
3,ck_negative_100,1,0,0
4,ck_negative_101,1,0,0


In [3]:
gt['id'] = sorted(os.listdir(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\fer\fer_train_val_test\test\Unknown"))

In [4]:
gt['id'] = gt['id'].apply(lambda x : x.split('.')[0])
gt.head()

Unnamed: 0,id,negative,neutral,positive
0,ck_negative_0,1,0,0
1,ck_negative_1,1,0,0
2,ck_negative_10,1,0,0
3,ck_negative_100,1,0,0
4,ck_negative_101,1,0,0


In [5]:
for i in gt.columns:
    print(gt[i].value_counts())

ck_negative_0    1
neutral_843      1
neutral_861      1
neutral_860      1
neutral_86       1
                ..
negative_2503    1
negative_2502    1
negative_2501    1
negative_2500    1
positive_999     1
Name: id, Length: 5739, dtype: int64
1    2912
0    2827
Name: negative, dtype: int64
0    4724
1    1015
Name: neutral, dtype: int64
0    3927
1    1812
Name: positive, dtype: int64


In [14]:
# creating the 
gt['positive'] = gt['id'].apply(lambda x: 1 if "positive" in str(x).lower() else 0)
gt['neutral'] = gt['id'].apply(lambda x: 1 if "neutral" in str(x).lower() else 0)
gt['negative'] = gt['id'].apply(lambda x: 1 if "negative" in str(x).lower() else 0)

In [6]:
print(gt['negative'].sum())
print(gt['neutral'].sum())
print(gt['positive'].sum())

2912
1015
1812


In [12]:
gt.to_csv(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\fer\fer_train_val_test\GroundTruth.csv", index=False)

# Load the CKPlus datatsets from each folder

In [4]:
data_dir = r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\ckplus"

train = datasets.ImageFolder(
    os.path.join(data_dir, 'ckplus_train_val_test', 'train'),
    transform=transform_train)

In [5]:
val, test = [datasets.ImageFolder(
    os.path.join(data_dir, 'ckplus_train_val_test', folder),
    transform=transform_test) for folder in ['val', 'test']]

In [6]:
train_iter = data.DataLoader(train, batch_size, shuffle=True, drop_last=True)

valid_iter = data.DataLoader(val, batch_size, shuffle=False,
                                         drop_last=True)

test_iter = data.DataLoader(test, batch_size, shuffle=False,
                                        drop_last=False)

In [8]:
with open(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\ckplus\ckplus_train_val_test\GroundTruth.csv", 'w') as f:
    f.write('id,' + ','.join(train.classes) + '\n')

In [9]:
gt = pd.read_csv(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\ckplus\ckplus_train_val_test\GroundTruth.csv")
gt.head()


Unnamed: 0,id,anger,contempt,disgust,fear,happy,sad


In [10]:
gt['id'] = sorted(os.listdir(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\ckplus\ckplus_train_val_test\test\Unknown"))

In [11]:
gt['id'] = gt['id'].apply(lambda x : x.split('.')[0])
gt.head()

Unnamed: 0,id,anger,contempt,disgust,fear,happy,sad
0,anger_0,,,,,,
1,anger_1,,,,,,
2,anger_10,,,,,,
3,anger_11,,,,,,
4,anger_12,,,,,,


In [13]:
# creating the 
gt['happy'] = gt['id'].apply(lambda x: 1 if "happy" in str(x).lower() else 0)
# gt['neutral'] = gt['id'].apply(lambda x: 1 if "neutral" in str(x).lower() else 0)
gt['sad'] = gt['id'].apply(lambda x: 1 if "sad" in str(x).lower() else 0)
gt['anger'] = gt['id'].apply(lambda x: 1 if "anger" in str(x).lower() else 0)
gt['contempt'] = gt['id'].apply(lambda x: 1 if "contempt" in str(x).lower() else 0)
gt['disgust'] = gt['id'].apply(lambda x: 1 if "disgust" in str(x).lower() else 0)
gt['fear'] = gt['id'].apply(lambda x: 1 if "fear" in str(x).lower() else 0)

In [15]:
gt.to_csv(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\ckplus\ckplus_train_val_test\GroundTruth.csv", index=False)

In [16]:
gt.shape

(154, 7)

: 

# Load the KDEF datatsets from each folder

In [8]:
data_dir = r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\kdef_train_val_test"

train = datasets.ImageFolder(
    os.path.join(data_dir, 'kdef_train_val_test', 'train'),
    transform=transform_train)

In [9]:
val, test = [datasets.ImageFolder(
    os.path.join(data_dir, 'kdef_train_val_test', folder),
    transform=transform_test) for folder in ['val', 'test']]

In [10]:
train_iter = data.DataLoader(train, batch_size, shuffle=True, drop_last=True)

valid_iter = data.DataLoader(val, batch_size, shuffle=False,
                                         drop_last=True)

test_iter = data.DataLoader(test, batch_size, shuffle=False,
                                        drop_last=False)

In [11]:
with open(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\kdef_train_val_test\kdef_train_val_test\GroundTruth.csv", 'w') as f:
    f.write('id,' + ','.join(train.classes) + '\n')

In [12]:
gt = pd.read_csv(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\kdef_train_val_test\kdef_train_val_test\GroundTruth.csv")
gt.head()


Unnamed: 0,id,anger,disgust,fear,happy,neutral,sad


In [13]:
gt['id'] = sorted(os.listdir(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\kdef_train_val_test\kdef_train_val_test\test\Unknown"))

In [14]:
gt['id'] = gt['id'].apply(lambda x : x.split('.')[0])
gt.head()

Unnamed: 0,id,anger,disgust,fear,happy,neutral,sad
0,anger_0,,,,,,
1,anger_1,,,,,,
2,anger_10,,,,,,
3,anger_11,,,,,,
4,anger_12,,,,,,


In [15]:
# creating the 
gt['happy'] = gt['id'].apply(lambda x: 1 if "happy" in str(x).lower() else 0)
gt['neutral'] = gt['id'].apply(lambda x: 1 if "neutral" in str(x).lower() else 0)
gt['sad'] = gt['id'].apply(lambda x: 1 if "sad" in str(x).lower() else 0)
gt['anger'] = gt['id'].apply(lambda x: 1 if "anger" in str(x).lower() else 0)
# gt['contempt'] = gt['id'].apply(lambda x: 1 if "contempt" in str(x).lower() else 0)
gt['disgust'] = gt['id'].apply(lambda x: 1 if "disgust" in str(x).lower() else 0)
gt['fear'] = gt['id'].apply(lambda x: 1 if "fear" in str(x).lower() else 0)

In [16]:
gt.to_csv(r"C:\Users\kings\OneDrive - MMU\MSC DATA SCIENCE\MSC Project\datasets\kdef_train_val_test\kdef_train_val_test\GroundTruth.csv", index=False)

In [17]:
gt.shape

(84, 7)