In [3]:
"""
visualize results for test image
"""

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd import Variable

import transforms as transforms
from skimage import io
from skimage.transform import resize
from models import *

import pandas as pd

In [20]:
cut_size = 44

transform_test = transforms.Compose([
    transforms.TenCrop(cut_size),
    transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])

def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

def fit_pred(path='images/1.jpg'):
    raw_img = io.imread(path)
    gray = rgb2gray(raw_img)
    gray = resize(gray, (48,48), mode='symmetric').astype(np.uint8)

    img = gray[:, :, np.newaxis]

    img = np.concatenate((img, img, img), axis=2)
    img = Image.fromarray(img)
    inputs = transform_test(img)

    class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

    net = VGG('VGG19')
    checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'), map_location='cpu')
    net.load_state_dict(checkpoint['net'])
    # net.cuda()
    net.eval()

    ncrops, c, h, w = np.shape(inputs)

    inputs = inputs.view(-1, c, h, w)
    # inputs = inputs.cuda()
    inputs = Variable(inputs, volatile=True)
    outputs = net(inputs)

    outputs_avg = outputs.view(ncrops, -1).mean(0)  # avg over crops

    score = F.softmax(outputs_avg)
    _, predicted = torch.max(outputs_avg.data, 0)

    predicted_emotion = class_names[int(predicted.cpu().numpy())]
    return predicted_emotion

In [35]:
data = pd.read_csv('../dataset/list_attr_celeba.csv')[['image_id', 'Smiling']]

In [41]:
from tqdm import tqdm
from IPython.display import clear_output

In [42]:
N = 1000

results = []
for im in tqdm(data.image_id[:N]):
    results.append(fit_pred(path='../dataset/img_align_celeba/1000/{}'.format(im)))
    clear_output()



100%|██████████| 1000/1000 [08:32<00:00,  1.90it/s][A[A

[A[A

In [50]:
data['Emotion'] = ['Happy'  if i == 1 else 'Sad' for i in data.Smiling]

In [65]:
set(results)

{'Angry', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'}

преобразуем результаты: если лицо распознано не как счастливое -- считаем, что оно без улыбки

In [71]:
results_binary = ['Happy' if i == 'Happy' else 'Sad' for i in results]

In [114]:
happy_idx = np.where(np.array(data.Emotion[:N]) == 'Happy')[0]

In [122]:
sum(data.Emotion[:N] == np.array(results_binary)) / N

0.737

In [120]:
sum(data['Emotion'][happy_idx] == np.array(results_binary)[happy_idx])/len(data['Emotion'][happy_idx])

0.5705882352941176

In [126]:
import torchvision

traced_script_module = torch.jit.trace(net, inputs)

traced_script_module.save("emotion_recognition.pt")

как видно, есть куда расти в определении эмоций