In [251]:
import pandas as pd
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch
import os
import cv2
import torchvision.models as models
import torch.nn as nn
from torchvision import transforms
import urllib
import numpy as np
import matplotlib.pyplot as plt
import random

In [252]:
script_dir = os.getcwd()

In [253]:
import torch
import torch.nn as nn
from transformers import AutoImageProcessor, AutoModelForImageClassification

class EMOTION_NET(nn.Module):
    def __init__(self, pretrained_name="trpakov/vit-face-expression", with_softmax=True):
        super(EMOTION_NET, self).__init__()
        self.with_softmax = with_softmax

        # Load pretrained Hugging Face model
        self.processor = AutoImageProcessor.from_pretrained(pretrained_name)
        self.model = AutoModelForImageClassification.from_pretrained(pretrained_name)

        # Get number of emotion classes from pretrained model
        self.n_classes = self.model.config.num_labels

        # Define activation
        if self.n_classes == 1:
            self.activation = nn.Sigmoid()
        else:
            self.activation = nn.Softmax(dim=1)

    def forward(self, x):
        # x is expected as torch tensors already preprocessed to (B, C, H, W)

        # Run through pretrained model
        outputs = self.model(x)  # logits
        logits = outputs.logits

        # Apply softmax/sigmoid if requested
        if self.with_softmax:
            return self.activation(logits)
        else:
            return logits


# WHEN TRAINING, pass images like this: 
# inputs = emotion_net.processor(images=raw_images, return_tensors="pt")
# outputs = emotion_net(inputs["pixel_values"])

In [254]:
from torchvision import datasets, transforms

# Define preprocessing
fer_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),   # ensure 1 channel
    transforms.Resize((48, 48)),                   # FER2013 size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))           # normalize to [-1,1]
])

# Load the dataset (subfolders = classes)
emotion_datasets = {
    "train": datasets.ImageFolder(root="data/fer2013/train", transform=fer_transform),
    "test":  datasets.ImageFolder(root="data/fer2013/test",  transform=fer_transform),
}

In [255]:
emotion_classes = emotion_datasets["test"].classes
print(emotion_classes)

['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [256]:
img, label = emotion_datasets["test"][0]
print(img.shape, label)
# So emotion_datasets has the form of (img, label) just like the MNIST example

torch.Size([1, 48, 48]) 0


# Wrap for deepproblog

In [257]:
import torch

class EMOTION_Images(object):
    def __init__(self, subset):
        self.subset = subset

    def __getitem__(self, item):
        img, _ = emotion_datasets[self.subset][int(item[0])]
        if isinstance(img, list):
            img = torch.tensor(img)
        return img


In [258]:
EMOTION_train = EMOTION_Images("train")
EMOTION_test = EMOTION_Images("test")

In [259]:
img = EMOTION_train[[0]]
print(img)

tensor([[[-0.6078, -0.7490, -0.8824,  ...,  0.0431,  0.1843, -0.3255],
         [-0.5529, -0.7333, -0.8275,  ...,  0.0824,  0.1843, -0.3020],
         [-0.5216, -0.7647, -0.8118,  ...,  0.1137,  0.1686, -0.3020],
         ...,
         [-0.1922, -0.2157, -0.2157,  ...,  0.1686, -0.1843, -0.3333],
         [-0.1608, -0.1294, -0.1137,  ...,  0.1843, -0.0588, -0.3255],
         [-0.1843, -0.1843, -0.1216,  ...,  0.1216,  0.0667, -0.3490]]])


In [260]:

pl_header = f"nn(emotion_net,[Image],Emotion,{emotion_classes}) :: face(Image, Emotion)."
rules = """
happy(Image) :- face(Image, 'happy').
"""

with open('./emotion_model.pl', "w") as f:
    f.write(pl_header)
    f.write("\n")
    f.write(rules)

In [261]:
from deepproblog.network import Network
emotion_net = EMOTION_NET()
emotion_network = Network(emotion_net, "emotion_net", batching=True)
emotion_network.optimizer = torch.optim.Adam(emotion_network.parameters(), lr=1e-3)

Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]


In [262]:
from deepproblog.model import Model
from deepproblog.engines import ExactEngine, ApproximateEngine

emotion_model = Model("emotion_model.pl", [emotion_network])

# Attach a solver
emotion_model.set_engine(ExactEngine(emotion_model))

In [263]:
emotion_model.add_tensor_source("train", EMOTION_train)
emotion_model.add_tensor_source("test", EMOTION_test)

We can use the addition as that returns a social cognition operator that takes into account multiple cues and takes a weighted sum.

In [None]:
img = EMOTION_train[[0]]
print(type(img), img.shape)

# 

<class 'torch.Tensor'> torch.Size([1, 48, 48])
