# Facial Expression Recognition Model

This model and weights are from https://github.com/WuJie1010/Facial-Expression-Recognition.Pytorch The github project is cloned and modified to fit the required needs.

The images in final_image_folder folder, which contains faces of all the speakers through use of MTCNN. All the images in this folder are sent through the model to generate embeddings. For each face detected in the image, each of the faces generate embeddings through FER model. All the embeddings are then averaged unless there is null or 0 embeddings. The logic behind averaging is that, everyone in the image/room would have same or similar facial expression.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd import Variable
import transforms as transforms
from skimage import io
from skimage.transform import resize
from models import *
import pandas as pd
from mtcnn import MTCNN
import cv2
import os
from tqdm import tqdm
import pickle

mtcnn = MTCNN()

cut_size = 44

transform_test = transforms.Compose([
    transforms.TenCrop(cut_size),
    transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])


def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

class ModifiedVGG(nn.Module):
    def __init__(self, vgg_name, embedding_dim=256):
        super(ModifiedVGG, self).__init__()
        
        # Original VGG features
        self.features = self._make_layers(cfg[vgg_name])
        
        # Embedding layer to get the embeddings from the model
        self.embedding_layer = nn.Linear(512, embedding_dim)
        
    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        
        # Pass through the embedding layer
        embedding = self.embedding_layer(out)
        
        return embedding

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

# Load pre-trained weights into modified VGG
transfer_net = ModifiedVGG('VGG19')
checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'))

# Remove the 'classifier' weights from the checkpoint as they don't exist in the ModifiedVGG
checkpoint['net'] = {k: v for k, v in checkpoint['net'].items() if 'classifier' not in k}
transfer_net.load_state_dict(checkpoint['net'], strict=False)  # Use strict=False since the model architectures differ slightly

transfer_net.cuda()
transfer_net.eval()

# Now you can pass your images through `transfer_net` to get the embeddings
# Prepare to collect embeddings and filenames
averaged_embeddings_first_half = []
image_names_first_half = []

image_names = os.listdir('final_images_faces/')

for img_name in tqdm(image_names[:645]):
    img_path = os.path.join('final_images_faces/', img_name)
    
    raw_img = io.imread(img_path)
    
    # Detect faces in the image
    faces = mtcnn.detect_faces(raw_img)
    
    img_embeddings = []  # To collect embeddings for this image
    
    for face in faces:
        cropped_face = face['box']  # This gives you the bounding box of the face
        
        gray = rgb2gray(raw_img)
        gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)
    
        img = gray[:, :, np.newaxis]
        img = np.concatenate((img, img, img), axis=2)
        img = Image.fromarray(img)
        inputs = transform_test(img)
    
        ncrops, c, h, w = np.shape(inputs)
        inputs = inputs.view(-1, c, h, w)
        inputs = inputs.cuda()
    
        # Note: 'volatile' is deprecated. Instead, use 'with torch.no_grad():' for inference
        with torch.no_grad():
            embed = transfer_net(inputs)
        
        # Check validity of embedding
        if torch.any(embed):  # If any value in the embedding tensor is non-zero
            img_embeddings.append(embed.cpu().numpy())

        
    # Average the embeddings for this image
    if img_embeddings:  # If there's at least one valid embedding
        avg_embed = np.mean(img_embeddings, axis=0)
    else:
        avg_embed = np.zeros((256,))  # Placeholder for no embedding
    
    # Add averaged embedding and related info to lists
    averaged_embeddings_first_half.append(avg_embed)
    base_name = os.path.splitext(img_name)[0]
    image_names_first_half.append(base_name)


# Create a DataFrame with averaged embeddings and related info for the first half
df_avg_embeddings_first_half = pd.DataFrame({
    'ImageName': image_names_first_half,
    'Embedding': averaged_embeddings_first_half
})

import numpy as np

# Averaging each list of embeddings
averaged_embeddings = [np.mean(embedding, axis=0) for embedding in df_avg_embeddings_first_half['Embedding']]

# Filter the rows where the 'Embedding' column is not an instance of numpy.ndarray
df_cleaned = df_avg_embeddings_first_half[df_avg_embeddings_first_half['Embedding'].apply(lambda x: isinstance(x, np.ndarray))]

# If you want to reset the index after dropping:
df_cleaned.reset_index(drop=True, inplace=True)

# Convert the 'Embedding' column from a nested list to a numpy array
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(np.array)

# Average the embeddings along axis=1
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(lambda x: x.mean(axis=0) if len(x.shape) > 1 else x)

# Expand the averaged embeddings into individual columns
embeddings_df = df_cleaned['Embedding'].apply(pd.Series)

# Rename columns
embeddings_df.columns = [f'embed_{i}' for i in range(embeddings_df.shape[1])]

# Drop the original 'Embedding' column and concatenate the expanded columns
df_avg_embeddings_first_half = pd.concat([df_cleaned.drop('Embedding', axis=1), embeddings_df], axis=1)


df_avg_embeddings_first_half.to_csv('645_1.csv', index=False)

  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
100%|████████████████████████████████████████████████████████████████████████████████| 645/645 [07:42<00:00,  1.39it/s]


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd import Variable
import transforms as transforms
from skimage import io
from skimage.transform import resize
from models import *
import pandas as pd
from mtcnn import MTCNN
import cv2
import os
from tqdm import tqdm
import pickle

mtcnn = MTCNN()

cut_size = 44

transform_test = transforms.Compose([
    transforms.TenCrop(cut_size),
    transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])


def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

class ModifiedVGG(nn.Module):
    def __init__(self, vgg_name, embedding_dim=256):
        super(ModifiedVGG, self).__init__()
        
        # Original VGG features
        self.features = self._make_layers(cfg[vgg_name])
        
        # Embedding layer to get the embeddings from the model
        self.embedding_layer = nn.Linear(512, embedding_dim)
        
    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        
        # Pass through the embedding layer
        embedding = self.embedding_layer(out)
        
        return embedding

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

# Load pre-trained weights into modified VGG
transfer_net = ModifiedVGG('VGG19')
checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'))

# Remove the 'classifier' weights from the checkpoint as they don't exist in the ModifiedVGG
checkpoint['net'] = {k: v for k, v in checkpoint['net'].items() if 'classifier' not in k}
transfer_net.load_state_dict(checkpoint['net'], strict=False)  # Use strict=False since the model architectures differ slightly

transfer_net.cuda()
transfer_net.eval()

# Now you can pass your images through `transfer_net` to get the embeddings
# Prepare to collect embeddings and filenames
averaged_embeddings_first_half = []
image_names_first_half = []

image_names = os.listdir('final_images_faces/')

for img_name in tqdm(image_names[645:1290]):
    img_path = os.path.join('final_images_faces/', img_name)
    
    raw_img = io.imread(img_path)
    
    # Detect faces in the image
    faces = mtcnn.detect_faces(raw_img)
    
    img_embeddings = []  # To collect embeddings for this image
    
    for face in faces:
        cropped_face = face['box']  # This gives you the bounding box of the face
        
        gray = rgb2gray(raw_img)
        gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)
    
        img = gray[:, :, np.newaxis]
        img = np.concatenate((img, img, img), axis=2)
        img = Image.fromarray(img)
        inputs = transform_test(img)
    
        ncrops, c, h, w = np.shape(inputs)
        inputs = inputs.view(-1, c, h, w)
        inputs = inputs.cuda()
    
        # Note: 'volatile' is deprecated. Instead, use 'with torch.no_grad():' for inference
        with torch.no_grad():
            embed = transfer_net(inputs)
        
        # Check validity of embedding
        if torch.any(embed):  # If any value in the embedding tensor is non-zero
            img_embeddings.append(embed.cpu().numpy())

        
    # Average the embeddings for this image
    if img_embeddings:  # If there's at least one valid embedding
        avg_embed = np.mean(img_embeddings, axis=0)
    else:
        avg_embed = np.zeros((256,))  # Placeholder for no embedding
    
    # Add averaged embedding and related info to lists
    averaged_embeddings_first_half.append(avg_embed)
    base_name = os.path.splitext(img_name)[0]
    image_names_first_half.append(base_name)


# Create a DataFrame with averaged embeddings and related info for the first half
df_avg_embeddings_first_half = pd.DataFrame({
    'ImageName': image_names_first_half,
    'Embedding': averaged_embeddings_first_half
})

import numpy as np

# Averaging each list of embeddings
averaged_embeddings = [np.mean(embedding, axis=0) for embedding in df_avg_embeddings_first_half['Embedding']]

# Filter the rows where the 'Embedding' column is not an instance of numpy.ndarray
df_cleaned = df_avg_embeddings_first_half[df_avg_embeddings_first_half['Embedding'].apply(lambda x: isinstance(x, np.ndarray))]

# If you want to reset the index after dropping:
df_cleaned.reset_index(drop=True, inplace=True)

# Convert the 'Embedding' column from a nested list to a numpy array
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(np.array)

# Average the embeddings along axis=1
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(lambda x: x.mean(axis=0) if len(x.shape) > 1 else x)

# Expand the averaged embeddings into individual columns
embeddings_df = df_cleaned['Embedding'].apply(pd.Series)

# Rename columns
embeddings_df.columns = [f'embed_{i}' for i in range(embeddings_df.shape[1])]

# Drop the original 'Embedding' column and concatenate the expanded columns
df_avg_embeddings_first_half = pd.concat([df_cleaned.drop('Embedding', axis=1), embeddings_df], axis=1)


df_avg_embeddings_first_half.to_csv('645_2.csv', index=False)

  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
100%|████████████████████████████████████████████████████████████████████████████████| 645/645 [11:19<00:00,  1.05s/it]


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd import Variable
import transforms as transforms
from skimage import io
from skimage.transform import resize
from models import *
import pandas as pd
from mtcnn import MTCNN
import cv2
import os
from tqdm import tqdm
import pickle

mtcnn = MTCNN()

cut_size = 44

transform_test = transforms.Compose([
    transforms.TenCrop(cut_size),
    transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])


def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

class ModifiedVGG(nn.Module):
    def __init__(self, vgg_name, embedding_dim=256):
        super(ModifiedVGG, self).__init__()
        
        # Original VGG features
        self.features = self._make_layers(cfg[vgg_name])
        
        # Embedding layer to get the embeddings from the model
        self.embedding_layer = nn.Linear(512, embedding_dim)
        
    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        
        # Pass through the embedding layer
        embedding = self.embedding_layer(out)
        
        return embedding

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

# Load pre-trained weights into modified VGG
transfer_net = ModifiedVGG('VGG19')
checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'))

# Remove the 'classifier' weights from the checkpoint as they don't exist in the ModifiedVGG
checkpoint['net'] = {k: v for k, v in checkpoint['net'].items() if 'classifier' not in k}
transfer_net.load_state_dict(checkpoint['net'], strict=False)  # Use strict=False since the model architectures differ slightly

transfer_net.cuda()
transfer_net.eval()

# Now you can pass your images through `transfer_net` to get the embeddings
# Prepare to collect embeddings and filenames
averaged_embeddings_first_half = []
image_names_first_half = []

image_names = os.listdir('final_images_faces/')

for img_name in tqdm(image_names[1290:1935]):
    img_path = os.path.join('final_images_faces/', img_name)
    
    raw_img = io.imread(img_path)
    
    # Detect faces in the image
    faces = mtcnn.detect_faces(raw_img)
    
    img_embeddings = []  # To collect embeddings for this image
    
    for face in faces:
        cropped_face = face['box']  # This gives you the bounding box of the face
        
        gray = rgb2gray(raw_img)
        gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)
    
        img = gray[:, :, np.newaxis]
        img = np.concatenate((img, img, img), axis=2)
        img = Image.fromarray(img)
        inputs = transform_test(img)
    
        ncrops, c, h, w = np.shape(inputs)
        inputs = inputs.view(-1, c, h, w)
        inputs = inputs.cuda()
    
        # Note: 'volatile' is deprecated. Instead, use 'with torch.no_grad():' for inference
        with torch.no_grad():
            embed = transfer_net(inputs)
        
        # Check validity of embedding
        if torch.any(embed):  # If any value in the embedding tensor is non-zero
            img_embeddings.append(embed.cpu().numpy())

        
    # Average the embeddings for this image
    if img_embeddings:  # If there's at least one valid embedding
        avg_embed = np.mean(img_embeddings, axis=0)
    else:
        avg_embed = np.zeros((256,))  # Placeholder for no embedding
    
    # Add averaged embedding and related info to lists
    averaged_embeddings_first_half.append(avg_embed)
    base_name = os.path.splitext(img_name)[0]
    image_names_first_half.append(base_name)


# Create a DataFrame with averaged embeddings and related info for the first half
df_avg_embeddings_first_half = pd.DataFrame({
    'ImageName': image_names_first_half,
    'Embedding': averaged_embeddings_first_half
})

import numpy as np

# Averaging each list of embeddings
averaged_embeddings = [np.mean(embedding, axis=0) for embedding in df_avg_embeddings_first_half['Embedding']]

# Filter the rows where the 'Embedding' column is not an instance of numpy.ndarray
df_cleaned = df_avg_embeddings_first_half[df_avg_embeddings_first_half['Embedding'].apply(lambda x: isinstance(x, np.ndarray))]

# If you want to reset the index after dropping:
df_cleaned.reset_index(drop=True, inplace=True)

# Convert the 'Embedding' column from a nested list to a numpy array
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(np.array)

# Average the embeddings along axis=1
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(lambda x: x.mean(axis=0) if len(x.shape) > 1 else x)

# Expand the averaged embeddings into individual columns
embeddings_df = df_cleaned['Embedding'].apply(pd.Series)

# Rename columns
embeddings_df.columns = [f'embed_{i}' for i in range(embeddings_df.shape[1])]

# Drop the original 'Embedding' column and concatenate the expanded columns
df_avg_embeddings_first_half = pd.concat([df_cleaned.drop('Embedding', axis=1), embeddings_df], axis=1)


df_avg_embeddings_first_half.to_csv('645_3.csv', index=False)

  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
100%|████████████████████████████████████████████████████████████████████████████████| 645/645 [11:33<00:00,  1.07s/it]


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd import Variable
import transforms as transforms
from skimage import io
from skimage.transform import resize
from models import *
import pandas as pd
from mtcnn import MTCNN
import cv2
import os
from tqdm import tqdm
import pickle

mtcnn = MTCNN()

cut_size = 44

transform_test = transforms.Compose([
    transforms.TenCrop(cut_size),
    transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])


def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

class ModifiedVGG(nn.Module):
    def __init__(self, vgg_name, embedding_dim=256):
        super(ModifiedVGG, self).__init__()
        
        # Original VGG features
        self.features = self._make_layers(cfg[vgg_name])
        
        # Embedding layer to get the embeddings from the model
        self.embedding_layer = nn.Linear(512, embedding_dim)
        
    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        
        # Pass through the embedding layer
        embedding = self.embedding_layer(out)
        
        return embedding

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

# Load pre-trained weights into modified VGG
transfer_net = ModifiedVGG('VGG19')
checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'))

# Remove the 'classifier' weights from the checkpoint as they don't exist in the ModifiedVGG
checkpoint['net'] = {k: v for k, v in checkpoint['net'].items() if 'classifier' not in k}
transfer_net.load_state_dict(checkpoint['net'], strict=False)  # Use strict=False since the model architectures differ slightly

transfer_net.cuda()
transfer_net.eval()

# Now you can pass your images through `transfer_net` to get the embeddings
# Prepare to collect embeddings and filenames
averaged_embeddings_first_half = []
image_names_first_half = []

image_names = os.listdir('final_images_faces/')

for img_name in tqdm(image_names[1935:2580]):
    img_path = os.path.join('final_images_faces/', img_name)
    
    raw_img = io.imread(img_path)
    
    # Detect faces in the image
    faces = mtcnn.detect_faces(raw_img)
    
    img_embeddings = []  # To collect embeddings for this image
    
    for face in faces:
        cropped_face = face['box']  # This gives you the bounding box of the face
        
        gray = rgb2gray(raw_img)
        gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)
    
        img = gray[:, :, np.newaxis]
        img = np.concatenate((img, img, img), axis=2)
        img = Image.fromarray(img)
        inputs = transform_test(img)
    
        ncrops, c, h, w = np.shape(inputs)
        inputs = inputs.view(-1, c, h, w)
        inputs = inputs.cuda()
    
        # Note: 'volatile' is deprecated. Instead, use 'with torch.no_grad():' for inference
        with torch.no_grad():
            embed = transfer_net(inputs)
        
        # Check validity of embedding
        if torch.any(embed):  # If any value in the embedding tensor is non-zero
            img_embeddings.append(embed.cpu().numpy())

        
    # Average the embeddings for this image
    if img_embeddings:  # If there's at least one valid embedding
        avg_embed = np.mean(img_embeddings, axis=0)
    else:
        avg_embed = np.zeros((256,))  # Placeholder for no embedding
    
    # Add averaged embedding and related info to lists
    averaged_embeddings_first_half.append(avg_embed)
    base_name = os.path.splitext(img_name)[0]
    image_names_first_half.append(base_name)


# Create a DataFrame with averaged embeddings and related info for the first half
df_avg_embeddings_first_half = pd.DataFrame({
    'ImageName': image_names_first_half,
    'Embedding': averaged_embeddings_first_half
})

import numpy as np

# Averaging each list of embeddings
averaged_embeddings = [np.mean(embedding, axis=0) for embedding in df_avg_embeddings_first_half['Embedding']]

# Filter the rows where the 'Embedding' column is not an instance of numpy.ndarray
df_cleaned = df_avg_embeddings_first_half[df_avg_embeddings_first_half['Embedding'].apply(lambda x: isinstance(x, np.ndarray))]

# If you want to reset the index after dropping:
df_cleaned.reset_index(drop=True, inplace=True)

# Convert the 'Embedding' column from a nested list to a numpy array
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(np.array)

# Average the embeddings along axis=1
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(lambda x: x.mean(axis=0) if len(x.shape) > 1 else x)

# Expand the averaged embeddings into individual columns
embeddings_df = df_cleaned['Embedding'].apply(pd.Series)

# Rename columns
embeddings_df.columns = [f'embed_{i}' for i in range(embeddings_df.shape[1])]

# Drop the original 'Embedding' column and concatenate the expanded columns
df_avg_embeddings_first_half = pd.concat([df_cleaned.drop('Embedding', axis=1), embeddings_df], axis=1)


df_avg_embeddings_first_half.to_csv('645_4.csv', index=False)

  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
100%|████████████████████████████████████████████████████████████████████████████████| 645/645 [10:25<00:00,  1.03it/s]


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd import Variable
import transforms as transforms
from skimage import io
from skimage.transform import resize
from models import *
import pandas as pd
from mtcnn import MTCNN
import cv2
import os
from tqdm import tqdm
import pickle

mtcnn = MTCNN()

cut_size = 44

transform_test = transforms.Compose([
    transforms.TenCrop(cut_size),
    transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])


def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

class ModifiedVGG(nn.Module):
    def __init__(self, vgg_name, embedding_dim=256):
        super(ModifiedVGG, self).__init__()
        
        # Original VGG features
        self.features = self._make_layers(cfg[vgg_name])
        
        # Embedding layer to get the embeddings from the model
        self.embedding_layer = nn.Linear(512, embedding_dim)
        
    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        
        # Pass through the embedding layer
        embedding = self.embedding_layer(out)
        
        return embedding

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

# Load pre-trained weights into modified VGG
transfer_net = ModifiedVGG('VGG19')
checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'))

# Remove the 'classifier' weights from the checkpoint as they don't exist in the ModifiedVGG
checkpoint['net'] = {k: v for k, v in checkpoint['net'].items() if 'classifier' not in k}
transfer_net.load_state_dict(checkpoint['net'], strict=False)  # Use strict=False since the model architectures differ slightly

transfer_net.cuda()
transfer_net.eval()

# Now you can pass your images through `transfer_net` to get the embeddings
# Prepare to collect embeddings and filenames
averaged_embeddings_first_half = []
image_names_first_half = []

image_names = os.listdir('final_images_faces/')

for img_name in tqdm(image_names[2580:]):
    img_path = os.path.join('final_images_faces/', img_name)
    
    raw_img = io.imread(img_path)
    
    # Detect faces in the image
    faces = mtcnn.detect_faces(raw_img)
    
    img_embeddings = []  # To collect embeddings for this image
    
    for face in faces:
        cropped_face = face['box']  # This gives you the bounding box of the face
        
        gray = rgb2gray(raw_img)
        gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8)
    
        img = gray[:, :, np.newaxis]
        img = np.concatenate((img, img, img), axis=2)
        img = Image.fromarray(img)
        inputs = transform_test(img)
    
        ncrops, c, h, w = np.shape(inputs)
        inputs = inputs.view(-1, c, h, w)
        inputs = inputs.cuda()
    
        # Note: 'volatile' is deprecated. Instead, use 'with torch.no_grad():' for inference
        with torch.no_grad():
            embed = transfer_net(inputs)
        
        # Check validity of embedding
        if torch.any(embed):  # If any value in the embedding tensor is non-zero
            img_embeddings.append(embed.cpu().numpy())

        
    # Average the embeddings for this image
    if img_embeddings:  # If there's at least one valid embedding
        avg_embed = np.mean(img_embeddings, axis=0)
    else:
        avg_embed = np.zeros((256,))  # Placeholder for no embedding
    
    # Add averaged embedding and related info to lists
    averaged_embeddings_first_half.append(avg_embed)
    base_name = os.path.splitext(img_name)[0]
    image_names_first_half.append(base_name)


# Create a DataFrame with averaged embeddings and related info for the first half
df_avg_embeddings_first_half = pd.DataFrame({
    'ImageName': image_names_first_half,
    'Embedding': averaged_embeddings_first_half
})

import numpy as np

# Averaging each list of embeddings
averaged_embeddings = [np.mean(embedding, axis=0) for embedding in df_avg_embeddings_first_half['Embedding']]

# Filter the rows where the 'Embedding' column is not an instance of numpy.ndarray
df_cleaned = df_avg_embeddings_first_half[df_avg_embeddings_first_half['Embedding'].apply(lambda x: isinstance(x, np.ndarray))]

# If you want to reset the index after dropping:
df_cleaned.reset_index(drop=True, inplace=True)

# Convert the 'Embedding' column from a nested list to a numpy array
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(np.array)

# Average the embeddings along axis=1
df_cleaned['Embedding'] = df_cleaned['Embedding'].apply(lambda x: x.mean(axis=0) if len(x.shape) > 1 else x)

# Expand the averaged embeddings into individual columns
embeddings_df = df_cleaned['Embedding'].apply(pd.Series)

# Rename columns
embeddings_df.columns = [f'embed_{i}' for i in range(embeddings_df.shape[1])]

# Drop the original 'Embedding' column and concatenate the expanded columns
df_avg_embeddings_first_half = pd.concat([df_cleaned.drop('Embedding', axis=1), embeddings_df], axis=1)


df_avg_embeddings_first_half.to_csv('645_5.csv', index=False)

  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
100%|████████████████████████████████████████████████████████████████████████████████| 465/465 [07:23<00:00,  1.05it/s]


# Merging all dataframes

In [2]:
import pandas as pd

df_1 = pd.read_csv('645_1.csv')
df_2 = pd.read_csv('645_2.csv')
df_3 = pd.read_csv('645_3.csv')
df_4 = pd.read_csv('645_4.csv')
df_5 = pd.read_csv('645_5.csv')

In [18]:
dfs = [df_1, df_2, df_3, df_4, df_5]

# Vertically stack the dataframes
df_combined = pd.concat(dfs, axis=0, ignore_index=True)

In [11]:
labels = pd.read_csv('labels_final.csv', index_col='SCENE')
labels.head()

Unnamed: 0_level_0,Sarcasm
SCENE,Unnamed: 1_level_1
1_10004,0.0
1_10009,0.0
1_1001,0.0
1_1003,1.0
1_10190,0.0


# Merging with Label
This is for ease of use. The data can be loaded and split into train and test easily including labels without reading and cleaning  again

In [12]:
import pandas as pd

# Remove the suffix from 'ImageName'
df['ImageName'] = df['ImageName'].str.split('_u_').str[0]

# Merge the two DataFrames based on 'ImageName' and 'SCENE'
merged_df = df.merge(labels, left_on='ImageName', right_on='SCENE')

In [13]:
merged_df

Unnamed: 0,ImageName,embed_0,embed_1,embed_2,embed_3,embed_4,embed_5,embed_6,embed_7,embed_8,...,embed_247,embed_248,embed_249,embed_250,embed_251,embed_252,embed_253,embed_254,embed_255,Sarcasm
0,1_10004,0.257854,0.066377,0.161984,0.105779,0.237214,0.453038,-0.026357,0.004962,-0.227729,...,-0.046718,-0.101532,0.000772,0.237791,0.022939,-0.099684,0.293339,0.418022,0.179787,0.0
1,1_10004,0.250770,0.088195,0.178239,0.137102,0.245249,0.393889,-0.063021,0.039276,-0.238393,...,-0.049494,-0.143644,-0.037926,0.217017,0.007791,-0.135646,0.274866,0.379432,0.159771,0.0
2,1_10004,0.245529,0.054224,0.139849,0.151603,0.246439,0.417611,-0.054625,0.056534,-0.256085,...,-0.067398,-0.164051,-0.026499,0.230718,-0.007253,-0.108036,0.289110,0.395327,0.169015,0.0
3,1_10004,0.297613,0.131141,0.179681,0.152630,0.248578,0.505152,-0.043195,0.001284,-0.234365,...,-0.091981,-0.148397,-0.031532,0.207091,0.041971,-0.116478,0.295764,0.486895,0.189584,0.0
4,1_10009,0.290877,0.090882,0.200734,-0.031469,0.171222,0.474413,0.004307,-0.071997,-0.168710,...,-0.002605,0.031383,0.059292,0.231099,0.114619,-0.136372,0.261263,0.390234,0.186231,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3037,3_S06E05_355,-0.167053,0.115846,-0.262147,0.410062,0.134311,0.074633,-0.334752,0.083559,0.059221,...,0.147792,0.084459,-0.072884,0.174486,0.033017,-0.238315,-0.221664,-0.186336,-0.055229,1.0
3038,3_S06E05_355,-0.172933,0.141143,-0.168410,0.389916,0.139194,0.036480,-0.321985,0.083749,0.025253,...,0.132260,0.097110,-0.053377,0.101862,0.008746,-0.181637,-0.218261,-0.144200,-0.000148,1.0
3039,3_S06E06_143,-0.111265,-0.082199,-0.102560,0.181393,0.017090,0.045054,-0.135264,0.047553,-0.027247,...,0.073729,0.047142,-0.009305,0.036020,0.081214,-0.242498,-0.238130,-0.076463,-0.202183,1.0
3040,3_S06E06_143,-0.191432,0.055875,-0.214800,0.344588,0.159469,0.064737,-0.296399,0.076148,0.057796,...,0.153785,0.042271,-0.075402,0.187334,0.047505,-0.246841,-0.205726,-0.112969,-0.038097,1.0


# Saving dataframe with embeddings

In [17]:
merged_df.to_csv('final_image_embeddings.csv', index=False)