# Facial Expression Recognition Model

This model and weights are from https://github.com/WuJie1010/Facial-Expression-Recognition.Pytorch The github project is cloned and modified to fit the required needs.

The images in localized folder, which contains only faces of the sarcastic speaker through manual cropping. All the images in this folder are sent through the model to generate embeddings. These embeddings are expected to capture the important features of the facial expression of the speaker.

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd import Variable
import transforms as transforms
from skimage import io
from skimage.transform import resize
from models import *
import pandas as pd
from mtcnn import MTCNN
import cv2
import os
from tqdm import tqdm
import pickle

mtcnn = MTCNN()

cut_size = 44

transform_test = transforms.Compose([
    transforms.TenCrop(cut_size),
    transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])


def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

class ModifiedVGG(nn.Module):
    def __init__(self, vgg_name, embedding_dim=256):
        super(ModifiedVGG, self).__init__()
        
        # Original VGG features
        self.features = self._make_layers(cfg[vgg_name])
        
        # Embedding layer to get the embeddings from the model
        self.embedding_layer = nn.Linear(512, embedding_dim)
        
    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        
        # Pass through the embedding layer
        embedding = self.embedding_layer(out)
        
        return embedding

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

# Load pre-trained weights into modified VGG
transfer_net = ModifiedVGG('VGG19')
checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'))

# Remove the 'classifier' weights from the checkpoint as they don't exist in the ModifiedVGG
checkpoint['net'] = {k: v for k, v in checkpoint['net'].items() if 'classifier' not in k}
transfer_net.load_state_dict(checkpoint['net'], strict=False)  

transfer_net.cuda()
transfer_net.eval()

# Prepare to collect embeddings and filenames
averaged_embeddings_first_half = []
image_names_first_half = []

image_names = os.listdir('localized/')

for img_name in tqdm(image_names):
    img_path = os.path.join('localized/', img_name)
    
    raw_img = io.imread(img_path)
    
    img_embeddings = []  # To collect embeddings for this image
    
    gray = rgb2gray(raw_img)
    gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)
    
    img = gray[:, :, np.newaxis]
    img = np.concatenate((img, img, img), axis=2)
    img = Image.fromarray(img)
    inputs = transform_test(img)
    
    ncrops, c, h, w = np.shape(inputs)
    inputs = inputs.view(-1, c, h, w)
    inputs = inputs.cuda()
    
    with torch.no_grad():
        embed = transfer_net(inputs)
        
    # Check validity of embedding
    if torch.any(embed):  # If any value in the embedding tensor is non-zero
        embed_avg = embed.view(ncrops, -1).mean(0)
        img_embeddings.append(embed_avg.cpu().numpy())
            
        
    # Average the embeddings for this image
    if img_embeddings:  # If there's at least one valid embedding
        avg_embed = np.mean(img_embeddings, axis=0)
    else:
        avg_embed = np.zeros((256,))  # Placeholder for no embedding
    
    # Add averaged embedding and related info to lists
    averaged_embeddings_first_half.append(avg_embed)
    base_name = os.path.splitext(img_name)[0]
    image_names_first_half.append(base_name)


# Create a DataFrame with averaged embeddings and related info for the first half
df_avg_embeddings_first_half = pd.DataFrame({
    'ImageName': image_names_first_half,
    'Embedding': averaged_embeddings_first_half
})

import numpy as np

# Averaging each list of embeddings
averaged_embeddings = [np.mean(embedding, axis=0) for embedding in df_avg_embeddings_first_half['Embedding']]

# Filter the rows where the 'Embedding' column is not an instance of numpy.ndarray
df_cleaned = df_avg_embeddings_first_half[df_avg_embeddings_first_half['Embedding'].apply(lambda x: isinstance(x, np.ndarray))]

# If you want to reset the index after dropping:
df_cleaned.reset_index(drop=True, inplace=True)

# Convert the 'Embedding' column from a nested list to a numpy array
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(np.array)

# Average the embeddings along axis=1
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(lambda x: x.mean(axis=0) if len(x.shape) > 1 else x)

# Expand the averaged embeddings into individual columns
embeddings_df = df_cleaned['Embedding'].apply(pd.Series)

# Rename columns
embeddings_df.columns = [f'embed_{i}' for i in range(embeddings_df.shape[1])]

# Drop the original 'Embedding' column and concatenate the expanded columns
df_avg_embeddings_first_half = pd.concat([df_cleaned.drop('Embedding', axis=1), embeddings_df], axis=1)


df_avg_embeddings_first_half.to_csv('dependant_image_embeddings.csv', index=False)

  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
100%|█████████████████████████████████████████████████████████████████████████████| 1203/1203 [00:09<00:00, 123.56it/s]


In [4]:
df_avg_embeddings_first_half

Unnamed: 0,ImageName,embed_0,embed_1,embed_2,embed_3,embed_4,embed_5,embed_6,embed_7,embed_8,...,embed_246,embed_247,embed_248,embed_249,embed_250,embed_251,embed_252,embed_253,embed_254,embed_255
0,1_10004_u,0.225880,-0.158256,-0.166669,0.061559,0.061490,-0.127280,0.202243,-0.420007,0.017750,...,0.043996,0.237813,0.044273,-0.251303,0.176454,-0.067825,-0.255763,0.014547,0.411552,0.008522
1,1_10009_u,0.083695,-0.134326,-0.245697,0.065822,-0.059156,0.034589,-0.001629,0.015762,-0.117956,...,-0.158322,-0.008103,0.089971,0.054675,0.185983,-0.148866,-0.155370,0.067999,0.367675,0.082560
2,1_1001_u,0.105431,-0.061188,-0.176715,0.122682,0.072980,-0.042338,0.119878,-0.118731,-0.175453,...,0.121635,0.160505,0.091331,-0.093659,0.209373,0.014874,-0.181255,0.152046,0.253700,-0.038468
3,1_1003_u,0.118452,-0.078877,-0.113624,0.062875,0.110320,-0.154886,0.224045,-0.334346,-0.051198,...,0.183090,0.205164,0.064420,-0.214857,0.174033,-0.063176,-0.213527,0.078609,0.255371,-0.067467
4,1_10190_u,0.033148,-0.215622,-0.285165,0.128291,0.151158,-0.038314,0.059698,-0.374993,-0.019645,...,-0.042739,0.176863,0.065676,-0.181788,0.148470,-0.120253,-0.389759,-0.141423,0.539212,0.048756
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1198,3_S06E03_366_u,0.258991,-0.254466,-0.103983,-0.041723,-0.045361,-0.180650,0.273379,-0.505350,0.107875,...,-0.031126,0.243304,-0.031462,-0.103157,0.131190,-0.101826,-0.130989,-0.011995,0.393863,0.026977
1199,3_S06E05_355_u,0.332637,-0.135706,-0.076136,0.022215,0.003767,-0.159307,0.278680,-0.401238,-0.023301,...,0.068622,0.234057,0.057899,-0.219143,0.169299,-0.072238,-0.128961,0.131588,0.275824,0.022364
1200,3_S06E06_143_u,-0.017695,-0.118993,-0.109349,0.032382,0.041774,-0.024940,0.048582,-0.222080,-0.071703,...,0.134897,0.062996,0.060295,-0.037984,0.183025,-0.163765,-0.172335,0.008819,0.237152,-0.039465
1201,3_S06E07_272_u,0.107657,-0.187309,-0.065122,0.017533,0.048842,-0.065346,0.108859,-0.266657,-0.188629,...,0.196729,0.030840,0.067684,-0.122658,0.105194,-0.068256,-0.036879,0.113403,0.028594,-0.002684


the dataframe is only named df_avg_embeddings_first_half, it contains the entire data. The naming is such because this notebook was created after Independant Image embedding notebook