In [274]:
import pandas as pd
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch
import os
import cv2
import torchvision.models as models
import torch.nn as nn
from torchvision import transforms
import urllib
import numpy as np
import matplotlib.pyplot as plt
import random

In [275]:
script_dir = os.getcwd()

In [276]:
emotion_processor = AutoImageProcessor.from_pretrained("trpakov/vit-face-expression")
emotion_model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression")

Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]


In [277]:
scene_weights_url = "http://places2.csail.mit.edu/models_places365/resnet18_places365.pth.tar"
scene_checkpoint = torch.hub.load_state_dict_from_url(scene_weights_url, map_location="cpu")

scene_model = models.resnet18(num_classes=365)
scene_model.fc = nn.Linear(scene_model.fc.in_features, 365)
scene_state_dict = {k.replace("module.", ""): v for k, v in scene_checkpoint["state_dict"].items()}
scene_model.load_state_dict(scene_state_dict)
scene_model.eval()

# Preprocessing, the standard torchvision normalization for ResNet, VGG, DenseNet...
scene_tf = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Wrap pretrained models

In [278]:
from deepproblog.network import Network 
import torch.nn.functional as F

# Wrap HF model so forward() returns logits
class HFWrapper(nn.Module):
    def __init__(self, hf_model):
        super().__init__()
        self.hf_model = hf_model
    def forward(self, x):
        outputs = self.hf_model(x)
        logits = outputs.logits
        probs = F.softmax(logits, dim=-1)
        return probs
    
class SceneWrapper(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, x):
        logits = self.model(x)
        probs = F.softmax(logits, dim=-1)
        return probs
scene_model = SceneWrapper(scene_model)

emotion_model = HFWrapper(emotion_model)

In [279]:
emotion_net = Network(emotion_model, "emotion_model", batching=True)
scene_net = Network(scene_model, "scene_model", batching=True)

emotion_net.eval()
scene_net.eval()

emotion_net.cuda()
scene_net.cuda()

Moving  emotion_model  to GPU
Moving  scene_model  to GPU


<deepproblog.network.Network at 0x25f338af370>

# Define domain

In [280]:
emotion_net.domain = [emotion_model.hf_model.config.id2label[i] 
                      for i in range(len(emotion_model.hf_model.config.id2label))]
print(emotion_net.domain)

categories = [line.strip().split(' ')[0][3:] 
              for line in open("./data/places365/categories_places365.txt")]
scene_net.domain = categories
print(scene_net.domain)  # check first few


['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
['airfield', 'airplane_cabin', 'airport_terminal', 'alcove', 'alley', 'amphitheater', 'amusement_arcade', 'amusement_park', 'apartment_building/outdoor', 'aquarium', 'aqueduct', 'arcade', 'arch', 'archaelogical_excavation', 'archive', 'arena/hockey', 'arena/performance', 'arena/rodeo', 'army_base', 'art_gallery', 'art_school', 'art_studio', 'artists_loft', 'assembly_line', 'athletic_field/outdoor', 'atrium/public', 'attic', 'auditorium', 'auto_factory', 'auto_showroom', 'badlands', 'bakery/shop', 'balcony/exterior', 'balcony/interior', 'ball_pit', 'ballroom', 'bamboo_forest', 'bank_vault', 'banquet_hall', 'bar', 'barn', 'barndoor', 'baseball_field', 'basement', 'basketball_court/indoor', 'bathroom', 'bazaar/indoor', 'bazaar/outdoor', 'beach', 'beach_house', 'beauty_salon', 'bedchamber', 'bedroom', 'beer_garden', 'beer_hall', 'berth', 'biology_laboratory', 'boardwalk', 'boat_deck', 'boathouse', 'bookstore', 'booth/indoo

In [281]:
def sanitize(label: str) -> str:
    return (
        label.strip()
        .lower()
        .replace(" ", "_")
        .replace("/", "_")
    )

emotion_net.domain = [sanitize(l) for l in emotion_net.domain]
scene_net.domain   = [sanitize(l) for l in scene_net.domain]

In [282]:
print(emotion_net.domain)
print(scene_net.domain)

['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
['airfield', 'airplane_cabin', 'airport_terminal', 'alcove', 'alley', 'amphitheater', 'amusement_arcade', 'amusement_park', 'apartment_building_outdoor', 'aquarium', 'aqueduct', 'arcade', 'arch', 'archaelogical_excavation', 'archive', 'arena_hockey', 'arena_performance', 'arena_rodeo', 'army_base', 'art_gallery', 'art_school', 'art_studio', 'artists_loft', 'assembly_line', 'athletic_field_outdoor', 'atrium_public', 'attic', 'auditorium', 'auto_factory', 'auto_showroom', 'badlands', 'bakery_shop', 'balcony_exterior', 'balcony_interior', 'ball_pit', 'ballroom', 'bamboo_forest', 'bank_vault', 'banquet_hall', 'bar', 'barn', 'barndoor', 'baseball_field', 'basement', 'basketball_court_indoor', 'bathroom', 'bazaar_indoor', 'bazaar_outdoor', 'beach', 'beach_house', 'beauty_salon', 'bedchamber', 'bedroom', 'beer_garden', 'beer_hall', 'berth', 'biology_laboratory', 'boardwalk', 'boat_deck', 'boathouse', 'bookstore', 'booth_indoo

In [283]:
def preprocess_face(image_path):
    face_img = Image.open(image_path).convert("RGB")
    inputs = emotion_processor(face_img, return_tensors="pt")
    return inputs["pixel_values"]

def preprocess_scene(image_path):
    scene_img = Image.open(image_path).convert("RGB")
    return scene_tf(scene_img).unsqueeze(0) 

# Register test images

In [284]:
from deepproblog.dataset import Dataset

face_tensor = preprocess_face("./data/fer2013/test/happy/PrivateTest_10077120.jpg")
scene_tensor = preprocess_scene("./data/places365/val/amusement_park/Places365_val_00000381.jpg")

# Build the model

In [285]:
from deepproblog.model import Model
from problog.logic import Constant

from deepproblog.engines import ExactEngine

model = Model("social_cognition_model.pl", [scene_net, emotion_net])

face_source  = { (Constant("img"),): face_tensor } 
scene_source = { (Constant("img"),): scene_tensor }
model.add_tensor_source("emotion_model", face_source)
model.add_tensor_source("scene_model",  scene_source)


model.set_engine(ExactEngine(model))
model.eval()

# Query the model

In [286]:
from problog.logic import Term, Constant
from deepproblog.query import Query

# Correct: use Constant("happy") instead of Term("happy")
q1 = Query(Term("face", Constant("img"), Constant("happy")))
print(model.solve([q1])[0].result)

q2 = Query(Term("scene", Constant("img"), Constant("amusement_park")))
print(model.solve([q2])[0].result)


{face(img,happy): 0.0}
{scene(img,amusement_park): 0.0}
