In [2]:
from facial_lm_model import FacialLM_Model
import torch
import numpy as np
import cv2
from utils import *
import os
from torch.utils.data import Dataset
from tqdm import tqdm
import random
import pandas as pd
from torch.utils.data import DataLoader


In [3]:
weights = './model_weights/facial_landmarks.pth'
model = FacialLM_Model()

model.load_state_dict(torch.load(weights))
model.eval()
img_path = './4.jpg'
img = cv2.imread(img_path).astype(np.float32)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = pad_image(img, desired_size=192)
print(img.shape)
landmarks = model.predict(img)


(192, 192, 3)


In [36]:
import torch.nn as nn

class EmotionDetector(nn.Module):
    def __init__(self, landmark_model_weights, num_inp_imgs=10, num_labels=8):
        super().__init__()
        self.num_landmarks = 1404
        self.feature_size = 32
        
        self.num_imgs = num_inp_imgs
        self.num_labels = num_labels
        
        self.landmark_model = FacialLM_Model()
        self.landmark_model.load_state_dict(torch.load(landmark_model_weights))
        
        self.reducer = nn.Sequential(
            nn.Linear(self.num_landmarks, 512),
            nn.ReLU(),
            nn.LayerNorm(512),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, self.feature_size),
            nn.LayerNorm(self.feature_size),
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(self.feature_size * self.num_imgs, num_labels)
        )
        
    def forward(self, x):
        B, N, C, W, H = x.shape
        x = x.view(B*N, C, W, H)
        x, _ = self.landmark_model(x)
        x = x.view(B * N, self.num_landmarks)
        x = self.reducer(x)
        x = x.view(B, self.num_imgs * self.feature_size)
        x =  self.classifier(x)
        return x

In [38]:
video_model = EmotionDetector('./model_weights/facial_landmarks.pth')
video_model(torch.randn((8, 10, 3, 192, 192)))


tensor([[-0.1197,  0.6324,  0.3485, -0.4941, -0.0358, -0.2341,  0.4940,  0.1616],
        [-0.0140,  0.5639,  0.2450, -0.6044, -0.0431, -0.2267,  0.3871,  0.1949],
        [-0.0202,  0.5701,  0.2593, -0.5343, -0.0249, -0.2307,  0.5697,  0.1302],
        [-0.0023,  0.5904,  0.3055, -0.5262,  0.0047, -0.2352,  0.5406,  0.1575],
        [-0.0131,  0.5361,  0.2388, -0.5475, -0.0085, -0.2263,  0.5451,  0.1488],
        [-0.0242,  0.5695,  0.1122, -0.5907, -0.0551, -0.2630,  0.4002,  0.2077],
        [-0.0501,  0.6384,  0.2886, -0.6630,  0.0265, -0.2439,  0.4587,  0.2685],
        [ 0.0361,  0.5978,  0.2948, -0.4410,  0.0049, -0.2545,  0.5289,  0.1848]],
       grad_fn=<AddmmBackward0>)

In [8]:
import os

def process_video_all_frames(path: str, emotion: str, save_folder: str):
    '''

    '''
    if not os.path.exists(path):
        raise ValueError("Invalid Video Path")
    
    save_folder = os.path.join(save_folder, emotion)
    os.makedirs(save_folder, exist_ok=True)
    filename = path.split('/')[-1].split('.')[0]
    
    cap = cv2.VideoCapture(path)
            
    video = []
    while True:
        ret, frame = cap.read()
        if ret == False:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 
        frame = pad_image(frame, desired_size=192)       
        video.append(frame)
        
    save_path = os.path.join(save_folder, filename + '.npy') 
    np.save(save_path, np.array(video))
    return

test_video_path = "/media/cv/Extreme Pro/MERR/mer2023train/train/sample_00002844.avi"

In [10]:

videos_path = "/media/cv/Extreme Pro/MERR/mer2023train/train"
csv_path = "/media/cv/Extreme Pro/MERR/mer2023train/train-label.csv"

def make_vid_path(filename, base_path):
    return os.path.join(base_path, filename + '.avi')

def load_csv(path, base_path):
    df = pd.read_csv(path)
    df['path'] = df['name'].apply(lambda x: make_vid_path(x, base_path))
    return df

df = load_csv(csv_path, videos_path)
df.head()

Unnamed: 0,name,discrete,valence,path
0,sample_00002721,neutral,-0.25,/media/cv/Extreme Pro/MERR/mer2023train/train/...
1,sample_00005854,angry,-2.5,/media/cv/Extreme Pro/MERR/mer2023train/train/...
2,sample_00003358,sad,-2.5,/media/cv/Extreme Pro/MERR/mer2023train/train/...
3,sample_00005933,happy,2.0,/media/cv/Extreme Pro/MERR/mer2023train/train/...
4,sample_00000613,angry,-1.75,/media/cv/Extreme Pro/MERR/mer2023train/train/...


In [11]:
from tqdm import tqdm
save_folder = './MER_Dataset/'

failed_count = 0

for row in tqdm(range(df.shape[0])):
    if row < 1900:
        continue
    path = df.iloc[row]['path']
    emotion = df.iloc[row]['discrete']
    filename = df.iloc[row]['name']
    try:
        process_video_all_frames(path, emotion, save_folder)
    except:
        failed_count += 1
        print(filename, failed_count)
        
    

 56%|█████▋    | 1901/3373 [00:00<00:00, 2743.15it/s]

sample_00002946 1


100%|██████████| 3373/3373 [04:30<00:00, 12.45it/s]  


In [31]:
class LandmarksTemporal(Dataset):
    def __init__(self, dataset_path: str, mode: str, num_imgs: int=10, split: float=0.75):
        self._paths = self._load_paths(dataset_path, mode, split)
        self.num_imgs = num_imgs
        self.paths = []
        for path in tqdm(self._paths):
            try: 
                item = np.load(path)
                self.paths.append(path)
            except Exception as e:
                print("nothing saved", e)
        random.shuffle(self.paths)
        
          
        self.label2id = {'angry': 0, 'happy': 1, 'neutral': 2, 'sad': 3, 'surprise': 4, 'worried': 5}
        self.id2label = {v:k for k, v in self.label2id.items()}
           
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        path = self.paths[index]
        item = torch.tensor(np.load(path), dtype=torch.float32)
        FRAMES, C, W, H = item.shape
        video = torch.zeros(self.num_imgs, C, W, H)
        for i in range(min(FRAMES, self.num_imgs)):
            video[i] = item[i]
        video = video.permute(0, 3, 1, 2)

        label = torch.tensor(self.label2id[path.split('/')[-2]])
        return (video, label)        
        
    def _load_paths(self, base_folder: str, mode: str, split: float) -> list:
        
        emotions = [os.path.join(base_folder, emo) for emo in os.listdir(base_folder)]
        paths = []
        for emo in emotions:
            files = os.listdir(emo)
            if mode == 'train':
                start = 0
                end = int(len(files) * split)
            elif mode == 'test':
                start = int(len(files) * split)
                end = len(files)
            else:
                raise ValueError("Sanity Check: No mode other than train and test is defined")
            files = files[start: end]
            for file in files:
                paths.append(os.path.join(emo, file))
        # paths = [os.path.join(emo, file) for emo in emotions for file in os.listdir(emo)]
        return paths
    
    

In [45]:
from sklearn.metrics import accuracy_score

test_acc = []
test_loss = [0]
train_loss = [0]
train_step = [0]


def train(weights_save_path):
    
    '''
    Updates the global variables need to check that
    '''
    
    eval_loss = 0.0
    eval_acc = 0.0
    max_eval_acc = -1
    
    for epoch in range(epochs):
        model.train()
        
        for i, batch in enumerate(trainloader):
            # zero gradients
            optimizer.zero_grad()
            
            landmarks, labels = batch
            landmarks: torch.Tensor =landmarks.to(device)
            labels: torch.Tensor = labels.to(device)
            
            logits = model(landmarks)
            loss: torch.Tensor = criterion(logits, labels)
            
            
            #back_prop
            loss.backward()
            optimizer.step()
            
            if i % 25 == 0:
                print(f"Epoch: {epoch} | Loss: {loss.item():.4f} | Last Eval Loss: {eval_loss:.4f}| Last Eval Accuracy: {eval_acc:.4f}")
            
            # track stats
            train_loss.append(loss.item())
            train_step.append(train_step[-1] + 1)
        
        # evaluate the model
        eval_loss, eval_acc = test()
        
        # track evaluation dataset stats
        test_acc.append(eval_acc)
        test_loss.append(eval_loss)
        
        # save the best model
        if eval_acc > max_eval_acc:
            torch.save(model.state_dict(), weights_save_path)
            max_eval_acc = eval_acc
        
    print("Evaluation Loss: ",  eval_loss, eval_acc)
    
    
@torch.no_grad()         
def test():
    preds_ = []
    labels_ = []
    eval_loss = []
    model.eval()
    for i, batch in enumerate(testloader):
        
        landmarks, labels = batch
        landmarks = landmarks.to(device)
        labels = labels.to(device)
        
        logits = model(landmarks)
        
        preds = torch.argmax(logits, dim=1)
        preds_.extend((preds.cpu().tolist()))
        labels_.extend(labels.cpu().tolist())
        
        loss = criterion(logits, labels)
        
        eval_loss.append(loss.item())
    
    return (sum(eval_loss)/len(eval_loss), accuracy_score(preds_, labels_))

In [48]:
device='cuda:1'
epochs = 200
lr = 1e-3


model = EmotionDetector('./model_weights/facial_landmarks.pth').to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

BATCH_SIZE = 32
# dataset = LandMarks(save_folder)
# train, test = split_dataset(dataset, 0.75, random_seed=42)
dataset_path = "./MER_Dataset"

train_data = LandmarksTemporal(dataset_path, mode="train")
test_data = LandmarksTemporal(dataset_path, mode="test")

trainloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
testloader = DataLoader(test_data, batch_size=BATCH_SIZE)

  0%|          | 0/2527 [00:00<?, ?it/s]

 69%|██████▉   | 1739/2527 [00:03<00:01, 454.86it/s]

nothing saved cannot reshape array of size 11947904 into shape (183,192,192,3)


100%|██████████| 2527/2527 [00:05<00:00, 435.71it/s]
100%|██████████| 846/846 [00:02<00:00, 419.21it/s]


In [49]:
test_acc = []
test_loss = [0]
train_loss = [0]
train_step = [0]

train('./model_weights/temporal_landmark.pt')

Epoch: 0 | Loss: 2.4101 | Last Eval Loss: 0.0000| Last Eval Accuracy: 0.0000
Epoch: 1 | Loss: 1.7262 | Last Eval Loss: 1.6941| Last Eval Accuracy: 0.2577
Epoch: 2 | Loss: 1.6284 | Last Eval Loss: 1.6811| Last Eval Accuracy: 0.2577
Epoch: 3 | Loss: 1.7144 | Last Eval Loss: 1.6853| Last Eval Accuracy: 0.2435
Epoch: 4 | Loss: 1.7277 | Last Eval Loss: 1.6796| Last Eval Accuracy: 0.2577
Epoch: 5 | Loss: 1.7428 | Last Eval Loss: 1.6968| Last Eval Accuracy: 0.2435
Epoch: 6 | Loss: 1.6137 | Last Eval Loss: 1.6799| Last Eval Accuracy: 0.2435
Epoch: 7 | Loss: 1.6628 | Last Eval Loss: 1.6784| Last Eval Accuracy: 0.2435
Epoch: 8 | Loss: 1.5843 | Last Eval Loss: 1.6759| Last Eval Accuracy: 0.2435
Epoch: 9 | Loss: 1.6496 | Last Eval Loss: 1.6769| Last Eval Accuracy: 0.2435
Epoch: 10 | Loss: 1.6998 | Last Eval Loss: 1.6773| Last Eval Accuracy: 0.2577
Epoch: 11 | Loss: 1.7181 | Last Eval Loss: 1.6741| Last Eval Accuracy: 0.2577
Epoch: 12 | Loss: 1.6991 | Last Eval Loss: 1.6771| Last Eval Accuracy: 0.2