In [1]:
import os
import os.path
from PIL import Image
import cv2

import torch
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
from torchvision.models import vgg13
import torch.nn as nn
import math

import torchvision
from torchvision import transforms

import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

from models.vgg import VGG
from preprocess import preprocess

In [2]:
MODEL_DIR = '../pretrained'
MODEL_NAME = 'refinedFERPlus_model.pth'

In [3]:
device = "cpu"  #cpu

print("Loading model...")
model = vgg13()
model.classifier = nn.Sequential(nn.Linear(7 * 7 * 512, 1024),
                                 nn.ReLU(),
                                 nn.Dropout(0.25),
                                 nn.Linear(1024, 1024),
                                 nn.ReLU(),
                                 nn.Dropout(0.25),
                                 nn.Linear(1024, 8))

model.to(device)
model.load_state_dict(torch.load(f'{MODEL_DIR}/{MODEL_NAME}', map_location=device))
print("model loaded...")

Loading model...
model loaded...


In [4]:
def array2tesnor(image):
    # transform for image
    trans = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    tensor_image = trans(image).unsqueeze(dim=0)

    return tensor_image

In [5]:
def print_label(frame, output, top_class):
    global classes
    # feedback
    count = 2
    for i in range(len(classes)):
        if output[0][i] > 0.5 and classes[i] != top_class:
            x = 100 * count
            string = str(classes[i]) + ':' + str(output[0][i])
            cv.putText(frame, string, (100, x), cv.FONT_ITALIC, 3, (0,0,0), 5)
            count += 1
    return frame

In [6]:
image = cv2.imread('./preprocess/fer0032230.png')

class_dict = {
    0: 'neutral',
    1: 'happiness',
    2: 'surprise',
    3: 'sadness',
    4: 'anger',
    5: 'disgust',
    6: 'fear',
    7: 'contempt'
}

classes = ['neutral', 'happiness', 'surprise', 'sadness', 'anger', 'disgust', 'fear', 'contempt']

model.eval()

with torch.no_grad():
    tensor = array2tesnor(image).to(device)
    output = model(tensor)
    ps = torch.exp(output)

    prob = torch.nn.functional.softmax(output, dim=1)
    top_p, top_class = prob.topk(1, dim=1) # extract top class index and probability
    result = classes[top_class]
    print(result)
    #     _, pred = torch.max(outputs, 1)
#     print("pred", pred)
#     #pred = pred.cpu().numpy()
#     print(class_dict[pred])

happiness


In [7]:
webcam = cv2.VideoCapture(cv2.CAP_DSHOW+0)

if not webcam.isOpened():
    print("Could not open webcam")
    exit()

while webcam.isOpened():
    status, frame = webcam.read()
    
    preprocess_frame = preprocess.image_preprocessing(frame) # input for cnn
    
    if preprocess_frame is None:
        result = 'Not Defined'
    
    else:
    
        tensor = array2tesnor(frame).to(device)
        output = model(tensor)
        ps = torch.exp(output)

        prob = torch.nn.functional.softmax(output, dim=1)
        top_p, top_class = prob.topk(1, dim=1) # extract top class index and probability
        result = classes[top_class]
        result_image = print_label(frame, output, text) # feedback

    # put final class on the frame
    cv2.putText(result_image, text, (50, 100), cv.FONT_ITALIC, 5, (0,0,255), 5)
    
    if status:
        cv2.imshow("test", result_image)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

webcam.release()
cv2.destroyAllWindows()

Could not open webcam
