In [1]:
import os
import json
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch_itl import model, sampler, cost, kernel, estimator

### Set paths and load model config / ckpt

In [3]:
# Set trained model paths
base_experiment_path = './LS_Experiments/KDEF_single'
model_name = 'KDEF_NE_itl_model_split1_CF'

# get model config and ckpt
base_model_path = os.path.join(base_experiment_path, model_name, 'model/')
for fname in os.listdir(base_model_path):
    if ('config' in fname) and (fname.split('.')[-1] == 'json'):
        model_config_path = os.path.join(base_model_path, fname)
    elif ('ckpt' in fname) and (fname.split('.')[-1] == 'pt'):
        model_ckpt_path = os.path.join(base_model_path, fname)
    else:
        print(fname, 'does not exist')
print(model_config_path, model_ckpt_path)

# load ckpt and json
with open(model_config_path, 'r') as f:
    model_config = json.load(f)

./LS_Experiments/KDEF_single/KDEF_NE_itl_model_split1_CF/model/itl_model_config_20210130-101821.json ./LS_Experiments/KDEF_single/KDEF_NE_itl_model_split1_CF/model/itl_model_ckpt_20210130-101821.pt


### Read data

In [4]:
# ----------------------------------
# Reading input/output data
# ----------------------------------
dataset = model_config['Data']['dataset']  
theta_type = model_config['Data']['theta_type']
inp_emotion = model_config['Data']['inpu_emotion'] 
inc_neutral = model_config['Data']['include_emotion']  
use_facealigner = True if model_config['Data']['input_data_version'] == 'facealigner' else False

data_path = './datasets/' + dataset + '_Aligned/' + dataset +'_LANDMARKS'  # set data path
if dataset == 'Rafd':
    # dirty hack only used to get Rafd speaker ids, not continuously ordered
    data_csv_path = '/home/mlpboon/Downloads/Rafd/Rafd.csv'

print('Reading data')
if use_facealigner:
    if dataset == 'KDEF':
        from datasets.datasets import kdef_landmarks_facealigner
        x_train, y_train, x_test, y_test, train_list, test_list = \
            kdef_landmarks_facealigner(data_path, inp_emotion=inp_emotion, inc_emotion=inc_neutral)
    elif dataset == 'Rafd':
        from datasets.datasets import rafd_landmarks_facealigner
        x_train, y_train, x_test, y_test, train_list, test_list = \
            rafd_landmarks_facealigner(data_path, data_csv_path, inc_emotion=inc_neutral)
else:
    from datasets.datasets import import_kdef_landmark_synthesis
    x_train, y_train, x_test, y_test = import_kdef_landmark_synthesis(dtype=input_data_version)

n = x_train.shape[0]
m = y_train.shape[1]
nf = y_train.shape[2]
print('data dimensions', n, m, nf, inp_emotion)

Reading data
data dimensions 126 7 136 NE


In [5]:
# set ITL model
assert model_config['Kernels']['kernel_input_learnable'] == False
kernel_input = kernel.Gaussian(model_config['Kernels']['gamma_inp'])
kernel_output = kernel.Gaussian(model_config['Kernels']['gamma_out'])
kernel_freq = np.eye(nf) # can be added to ckpt or manually set as np.load(kernel_file)

# define emotion sampler - this can also be added to ckpt
if model_config['Data']['theta_type'] == 'aff':
    from datasets.datasets import import_affectnet_va_embedding
    affect_net_csv_path = ''  # to be set if theta_type == 'aff'
    aff_emo_dict = import_affectnet_va_embedding(affect_net_csv_path)
    
    if dataset == 'KDEF':
        aff_emo_match = {'NE': 'Neutral',
                         'HA': 'Happy',
                         'SA': 'Sad',
                         'SU': 'Surprise',
                         'AF': 'Fear',
                         'DI': 'Disgust',
                         'AN': 'Anger',
                         }
    elif dataset == 'Rafd':
        aff_emo_match = {'neutral': 'Neutral',
                         'happy': 'Happy',
                         'sad': 'Sad',
                         'surprised': 'Surprise',
                         'fearful': 'Fear',
                         'disgusted': 'Disgust',
                         'angry': 'Anger',
                         'contemptous': 'Contempt'
                         }    
    
    
    sampler_ = sampler.CircularSampler(data=dataset+theta_type,
                                       inp_emotion=aff_emo_match[inp_emotion],
                                       inc_emotion=inc_neutral,
                                       sample_dict=aff_emo_dict)
elif theta_type == '':
    sampler_ = sampler.CircularSampler(data=dataset,
                                       inc_neutral=inc_neutral)
sampler_.m = m

itl_model = model.SpeechSynthesisKernelModel(kernel_input, kernel_output,
                                             kernel_freq=torch.from_numpy(kernel_freq).float())

### Load model and predict

In [6]:
ckpt = torch.load(model_ckpt_path)
itl_model.test_mode(x_train=x_train, thetas=sampler_.sample(m), alpha=ckpt['itl_alpha'])
pred_test = itl_model.forward(x_test, sampler_.sample(m))

In [None]:
# compute cost
cost_pred = cost.speech_synth_loss(y_test, pred_test, sampler_.sample(m))
print('cost test:', cost_pred)

# compute expected euclidean distance between samples and mean for each emotion
var_gt_em = 0
var_test_em = 0
var_gt = 0
var_test = 0
for i in range(m):
    var_gt_em = np.sum(np.var(y_test[:,i,:].numpy(), axis=0))
    var_test_em = np.sum(np.var(pred_test[:,i,:].numpy(), axis=0))
    var_gt += var_gt_em
    var_test += var_test_em
    print('{:d}, {:.3f}, {:.3f}'.format(i, var_gt_em, var_test_em))
print('{:.3f}, {:.3f}'.format(var_gt/m, var_test/m))

In [None]:
xv2.diagonal(), xv1

In [None]:
check_output = pred_test*128
check_output[0,0].reshape(68,2)

In [None]:
%matplotlib inline
plt_x = x_test[0].numpy().reshape(68, 2)
plt_xt = pred_test[1, 4].detach().numpy().reshape(68, 2)
if use_facealigner:
    plt_x = plt_x * 128
    plt_xt = plt_xt * 128
plt_uv = plt_xt - plt_x
plt.quiver(plt_x[:, 0], plt_x[:, 1], plt_uv[:, 0], plt_uv[:, 1], angles='xy')
ax = plt.gca()
ax.invert_yaxis()
plt.show()

### Continuous generation

In [7]:
def circular_sampling(theta1, theta2, num_samples):
    angle1 = np.arctan2(theta1[1], theta1[0])
    angle2 = np.arctan2(theta2[1], theta2[0])
    angle1 = angle1 if angle1>=0 else angle1+(2*np.pi)
    angle2 = angle2 if angle2>=0 else angle2+(2*np.pi)
    
    reverse = False
    if angle1>angle2:
        start = angle2; end = angle1
        reverse = True
    else:
        start = angle1; end = angle2
        
    sampled_angles = np.linspace(start=start, stop=end, num=num_samples, endpoint=True)
    sample_coords = np.vstack((np.cos(sampled_angles), np.sin(sampled_angles))).T
    
    if reverse:
        return np.flipud(sample_coords)
    else:
        return sample_coords, sampled_angles

def radial_sampling(theta, num_samples):
    angle = np.arctan2(theta[1], theta[0])
    sampled_radii = np.linspace(start=0, stop=1, num=num_samples, endpoint=True)
    sample_coords = np.vstack((sampled_radii*np.cos(angle), sampled_radii*np.sin(angle))).T
    return sample_coords, sampled_radii


class EdgeMap(object):
    def __init__(self, out_res, num_parts=3):
        self.out_res = out_res
        self.num_parts = num_parts
        self.groups = [
            [np.arange(0, 17, 1), 255],
            [np.arange(17, 22, 1), 255],
            [np.arange(22, 27, 1), 255],
            [np.arange(27, 31, 1), 255],
            [np.arange(31, 36, 1), 255],
            [list(np.arange(36, 42, 1)) + [36], 255],
            [list(np.arange(42, 48, 1)) + [42], 255],
            [list(np.arange(48, 60, 1)) + [48], 255],
            [list(np.arange(60, 68, 1)) + [60], 255]
        ]

    def __call__(self, shape):
        image = np.zeros((self.out_res, self.out_res, self.num_parts), dtype=np.float32)
        for g in self.groups:
            for i in range(len(g[0]) - 1):
                start = int(shape[g[0][i]][0]), int(shape[g[0][i]][1])
                end = int(shape[g[0][i + 1]][0]), int(shape[g[0][i + 1]][1])
                cv2.line(image, start, end, g[1], 1)
        return image

In [10]:
import cv2
sampling_type = 'circular'
num_samples = 180
ckpt = torch.load(model_ckpt_path)
itl_model.test_mode(x_train=x_train, thetas=sampler_.sample(m), alpha=ckpt['itl_alpha'])
if sampling_type == 'circular':
    sampled_emotions, sampled_angles = circular_sampling(aff_emo_dict['Happy'], aff_emo_dict['Sad'], num_samples)
    print(sampled_angles)
elif sampling_type == 'radial':
    sampled_emotions, sampled_radii = radial_sampling(aff_emo_dict['Fear'], num_samples)
    print(sampled_radii)
EM = EdgeMap(out_res=128, num_parts=1)

[0.10528595 0.12439157 0.14349718 0.1626028  0.18170841 0.20081403
 0.21991964 0.23902526 0.25813087 0.27723649 0.2963421  0.31544771
 0.33455333 0.35365894 0.37276456 0.39187017 0.41097579 0.4300814
 0.44918702 0.46829263 0.48739825 0.50650386 0.52560948 0.54471509
 0.56382071 0.58292632 0.60203194 0.62113755 0.64024317 0.65934878
 0.6784544  0.69756001 0.71666563 0.73577124 0.75487685 0.77398247
 0.79308808 0.8121937  0.83129931 0.85040493 0.86951054 0.88861616
 0.90772177 0.92682739 0.945933   0.96503862 0.98414423 1.00324985
 1.02235546 1.04146108 1.06056669 1.07967231 1.09877792 1.11788354
 1.13698915 1.15609477 1.17520038 1.194306   1.21341161 1.23251722
 1.25162284 1.27072845 1.28983407 1.30893968 1.3280453  1.34715091
 1.36625653 1.38536214 1.40446776 1.42357337 1.44267899 1.4617846
 1.48089022 1.49999583 1.51910145 1.53820706 1.55731268 1.57641829
 1.59552391 1.61462952 1.63373514 1.65284075 1.67194636 1.69105198
 1.71015759 1.72926321 1.74836882 1.76747444 1.78658005 1.805685

In [13]:
# %matplotlib inline
# for i in range(len(sampled_emotions)):
#     pred_test = itl_model.forward(x_test, torch.from_numpy(sampled_emotions[i][np.newaxis]).float())
#     im_em = EM(pred_test[0, 0].detach().numpy().reshape(68,2)*128)
#     plt.imshow(np.squeeze(im_em))
#     plt.pause(0.5)
    
output_path_cont_gen = './utils/plot_utils/visualizations/continuous_control/circ_video_kdef/happy_sad'
if not os.path.exists(output_path_cont_gen):
    os.makedirs(output_path_cont_gen)
for i in range(len(sampled_emotions)):
    pred_test = itl_model.forward(x_test, torch.from_numpy(sampled_emotions[i][np.newaxis]).float())
    im_em = EM(pred_test[0, 0].detach().numpy().reshape(68,2)*128)
    cv2.imwrite(os.path.join(output_path_cont_gen, str(i).zfill(3)+'.jpg'), im_em)

### Behaviour of the classifier

In [None]:
%matplotlib inline
import torchvision.transforms as transforms
from torchvision.utils import make_grid, save_image
def show(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')
imlist = []
for i in range(len(sampled_emotions)):
    pred_test = itl_model.forward(x_test, torch.from_numpy(sampled_emotions[i][np.newaxis]).float())
    im_em = EM(pred_test[0, 0].detach().numpy().reshape(68,2)*128)
    imlist.append(transforms.ToTensor()(im_em.copy()))
    #imlist.append(transforms.ToTensor()(im_em.copy()))
#show(make_grid(imlist, nrow=10, padding=10, pad_value=1))
#save_image(imlist, 'radial_happy_to_surprise.jpg', nrow=10, padding=10, pad_value=1)

In [None]:
from torchvision import models
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if dataset == 'KDEF':
    MODEL_PATH = ''
elif dataset == 'Rafd':
    num_classes = 8
    MODEL_PATH = './utils/landmark_utils/Classification/LndExperiments/Rafd_bs16_e10_20201118-055249'

# model def
def model(model_name, num_classes):
    if model_name == 'resnet-18':
        model_ft = models.resnet18(pretrained=False)
        model_ft.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,
                                   bias=False)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
    return model_ft


# Get ResNet and load wts
emo_model_ft = model('resnet-18', num_classes)
emo_model_ft.load_state_dict(torch.load(MODEL_PATH, map_location=lambda storage, loc: storage))
emo_model_ft = emo_model_ft.to(device)
emo_model_ft.eval()

inputs = F.interpolate(torch.stack(imlist), size=224, mode='bilinear')
outputs = emo_model_ft(inputs/255.)
sout = nn.functional.softmax(outputs, dim=1)
sout_np = sout.detach().numpy()

### Generate combined continuous generation and classifier behaviour

In [None]:
if dataset == 'Rafd':
    plt_emo_labels = ['angry', 'contempt', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']
elif dataset == 'KDEF':
    plt_emo_labels = ['AF', 'AN', 'DI', 'HA', 'NE', 'SA', 'SU']    
rows = 2
columns = 8
fig=plt.figure(figsize=(32,8))
gs = fig.add_gridspec(3, 8)
for i in range(1, rows*columns + 1):
    if i <=(rows*columns/2):
        fig.add_subplot(gs[0:2, i-1])
        plt.imshow(np.squeeze(imlist[i-1].numpy()), cmap='gray')
        plt.yticks([])
        plt.xticks([])
    else:
        fig.add_subplot(gs[2:, int(i-(rows*columns/2)-1)])
        plt.imshow(sout.detach().numpy()[int(i-1-(rows*columns/2))][:,np.newaxis], cmap='Reds', interpolation='nearest')
        if i == 1+(rows*columns/2):
            plt.yticks(np.linspace(0, num_classes-1, endpoint=True, num=8), plt_emo_labels, fontsize=20)
        else:
            plt.yticks([])
        plt.xticks([])
plt.subplots_adjust(hspace = -0.1, wspace=0.05)
plt.savefig('plot_cls_cont.png',bbox_inches='tight',pad_inches=0.1)

In [None]:
(inputs[-1].mean())

In [None]:
pred_test[0,0]*128

In [None]:
import torchvision.transforms.functional as F

In [None]:
inp = F.resize(torch.stack(imlist), size=224)

In [None]:
inp = transforms.ToPILImage()(imlist[-1]/255.)
inp = transforms.Resize(size=224)(inp)
inp = transforms.ToTensor()(inp)
inp = inp.unsqueeze(0)

In [None]:
out_inp = emo_model_ft(inp)

In [None]:
out_inp

In [None]:
outputs

In [None]:
from PIL import Image
im = Image.open("./utils/landmark_utils/Classification/LndPredRafd_itl_model_20201118-134437/angry/pred_Rafd090_25_Caucasian_male_angry_frontal.JPG")

In [None]:
imlist[-1].max()

In [None]:
inppil = transforms.Grayscale()(im)
inppil = transforms.Resize(size=224)(inppil)
inppil = transforms.ToTensor()(inppil)
inppil = inppil.unsqueeze(0)

In [None]:
with torch.no_grad():
    outpil = emo_model_ft(inppil)

In [None]:
outpil

In [None]:
sorted(os.listdir("./utils/landmark_utils/Classification/LndPredRafd_itl_model_20201118-134437/angry/"))

In [None]:
test_list

In [None]:
model_config['Kernels']['gamma_inp']

In [None]:
model_config['Kernels']['gamma_out']