In [27]:
import os
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch
import os
import torchvision.models as models
import torch.nn as nn
from deepproblog.dataset import Dataset, DataLoader
from torch.utils.data import Dataset as TorchDataset
from problog.logic import Term, Constant, Var

import torch.optim as optim
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import numpy as np
from mtcnn import MTCNN
import random
from deepproblog.utils.stop_condition import EpochStop
from deepproblog.optimizer import SGD

from deepproblog.model import Model
from deepproblog.network import Network
from deepproblog.engines import ExactEngine
from deepproblog.query import Query
from deepproblog.train import train_model


In [28]:
# GLOBAL VARS
training_size_perc = 1
epochs = 2
lr_scene_reduction = 0.0001
lr_faces = 0.0001
lr_scenes = 0.0001
lr_model = 0.0001
batch_size = 16

In [None]:
# Directories
script_dir = os.getcwd()
print("script_dir:", script_dir)
base_dir = os.path.dirname(os.path.dirname(script_dir))
print("base_dir:", base_dir)
data_dir = os.path.join(base_dir, "data")
print("data_dir:", data_dir)
findingemo_dir = os.path.join(data_dir, "FindingEmo_Images")
print("findingemo_dir:", findingemo_dir)
prolog_dir = os.path.join(base_dir, "code", "prolog")
print("prolog_dir:", prolog_dir)
df = pd.read_pickle(os.path.join(data_dir, "dataframe_cleaned.pkl"))

script_dir: c:\Users\jarne\Documents\Code Masterthesis\nsai_social_cognition\code\notebooks
base_dir: c:\Users\jarne\Documents\Code Masterthesis\nsai_social_cognition
data_dir: c:\Users\jarne\Documents\Code Masterthesis\nsai_social_cognition\data
findingemo_dir: c:\Users\jarne\Documents\Code Masterthesis\nsai_social_cognition\data\FindingEmo_Images
prolog_dir: c:\Users\jarne\Documents\Code Masterthesis\nsai_social_cognition\code\prolog


In [30]:
emotion_categories_hf = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']

EMOTION_SETS = [["Serenity", "Joy", "Ecstasy"],
                    ["Acceptance", "Trust", "Admiration"],
                    ["Apprehension", "Fear", "Terror"],
                    ["Distraction", "Surprise", "Amazement"],
                    ["Pensiveness", "Sadness", "Grief"],
                    ["Boredom", "Disgust", "Loathing"],
                    ["Annoyance", "Anger", "Rage"],
                    ["Interest", "Anticipation", "Vigilance"]]

EMO8_LIST = [l[1] for l in EMOTION_SETS]

In [None]:
# Delete the columns: "user" and "index" and "datetime"
if "user" in df.columns and "index" in df.columns and "datetime" in df.columns:
    df = df.drop(columns=["user", "index", "datetime"])

# remove leading slash from image_path
if df["image_path"].iloc[0].startswith("/"):
    df["image_path"] = df["image_path"].str.lstrip("/")

In [32]:
# Apply the mapping to 8 emotions as defined in the FindingEmo code
EMO8_MAPPING = {}

for i, leaf in enumerate(EMOTION_SETS):
    for emo in leaf:
        EMO8_MAPPING[emo] = EMO8_LIST[i]

df['emotion_8'] = df['emotion'].map(EMO8_MAPPING)
df['emotion_8_idx'] = df['emotion_8'].apply(lambda x: EMO8_LIST.index(x))

In [33]:
# Delete all the rows that contain the emotion_8 as "trust" and "anticipation"
for emo in ["Trust", "Anticipation"]:
    df = df[df['emotion_8'] != emo]

df.head()

Unnamed: 0,image_path,tags,age,valence,arousal,emotion,dec_factors,ambiguity,emotion_8,emotion_8_idx
4,Run_2/Shame elderly sports/3364887-46.jpg,Undefined,Adults,0,3,Apprehension,ConflictCtxtPerson,0,Fear,2
7,Run_2/Scared adolescents prison/15-hampton-roa...,Undefined,"Youth,Adults",0,3,Anger,"BodyLanguage,FacialExpression",0,Anger,6
8,Run_2/Raging students party/Newly-elected-Nepa...,Undefined,"Young Adults,Adults",0,2,Joy,"BodyLanguage,FacialExpression",0,Joy,0
10,Run_1/Accepting children school/whitecoatcerem...,Undefined,Young Adults,0,0,Joy,FacialExpression,0,Joy,0
11,Run_1/Grateful youth protest/trees3.jpg,Undefined,Adults,-1,3,Fear,"BodyLanguage,Staging",3,Fear,2


In [8]:
# get the different emo8's  
basic_6_emotions = ["Anger", "Disgust", "Fear", "Joy", "Sadness", "Surprise"]
basic_6_emotions_idx = {emo: i for i, emo in enumerate(basic_6_emotions)}

In [9]:
basic_6_emotions_idx

{'Anger': 0, 'Disgust': 1, 'Fear': 2, 'Joy': 3, 'Sadness': 4, 'Surprise': 5}

In [10]:
# Load category names
scene_categories_path = os.path.join(data_dir, "places365/categories_places365.txt")
with open(scene_categories_path) as f:
    scene_categories = [line.strip().split(' ')[0][3:] for line in f]
    # If there is a / in the category name, replace it with _
    scene_categories = [cat.replace('/', '_') for cat in scene_categories]

In [11]:
places365_to_13categories = {
    # --- Transport infrastructure ---
    "airfield": "transport_infrastructure",
    "airplane_cabin": "transport_infrastructure",
    "airport_terminal": "transport_infrastructure",
    "railroad_track": "transport_infrastructure",
    "train_interior": "transport_infrastructure",
    "train_station_platform": "transport_infrastructure",
    "subway_station_platform": "transport_infrastructure",
    "bridge": "transport_infrastructure",
    "runway": "transport_infrastructure",
    "highway": "transport_infrastructure",
    "garage_indoor": "transport_infrastructure",
    "garage_outdoor": "transport_infrastructure",
    "bus_interior": "transport_infrastructure",
    "bus_station_indoor": "transport_infrastructure",
    "harbor": "transport_infrastructure",
    "viaduct": "transport_infrastructure",
    "heliport": "transport_infrastructure",
    "parking_garage_indoor": "transport_infrastructure",
    "parking_garage_outdoor": "transport_infrastructure",
    "car_interior": "transport_infrastructure",
    "cockpit": "transport_infrastructure",
    "berth": "transport_infrastructure",

    # --- Indoor residential ---
    "bedroom": "indoor_residential",
    "bathroom": "indoor_residential",
    "living_room": "indoor_residential",
    "kitchen": "indoor_residential",
    "attic": "indoor_residential",
    "childs_room": "indoor_residential",
    "closet": "indoor_residential",
    "dining_room": "indoor_residential",
    "dorm_room": "indoor_residential",
    "hotel_room": "indoor_residential",
    "mansion": "indoor_residential",
    "nursery": "indoor_residential",
    "television_room": "indoor_residential",
    "pantry": "indoor_residential",
    "playroom": "indoor_residential",
    "house": "indoor_residential",
    "apartment_building_outdoor": "indoor_residential",
    "balcony_interior": "indoor_residential",
    "banquet_hall": "indoor_residential",
    "porch": "indoor_residential",
    "jacuzzi_indoor": "indoor_residential",
    "shower": "indoor_residential",
    "alcove": "indoor_residential",
    "bedchamber": "indoor_residential",
    "ballroom": "indoor_residential",
    "basement": "indoor_residential",
    "dressing_room": "indoor_residential",
    "home_office": "indoor_residential",
    "home_theater": "indoor_residential",
    "manufactured_home": "indoor_residential",
    "utility_room": "indoor_residential",
    "wet_bar": "indoor_residential",
    "youth_hostel": "indoor_residential",

    # --- Indoor commercial ---
    "bakery_shop": "indoor_commercial",
    "bar": "indoor_commercial",
    "bookstore": "indoor_commercial",
    "butchers_shop": "indoor_commercial",
    "candy_store": "indoor_commercial",
    "clothing_store": "indoor_commercial",
    "department_store": "indoor_commercial",
    "fastfood_restaurant": "indoor_commercial",
    "florist_shop_indoor": "indoor_commercial",
    "gift_shop": "indoor_commercial",
    "jewelry_shop": "indoor_commercial",
    "market_indoor": "indoor_commercial",
    "pet_shop": "indoor_commercial",
    "pharmacy": "indoor_commercial",
    "shoe_shop": "indoor_commercial",
    "shopping_mall_indoor": "indoor_commercial",
    "supermarket": "indoor_commercial",
    "toyshop": "indoor_commercial",
    "restaurant": "indoor_commercial",
    "restaurant_kitchen": "indoor_commercial",
    "restaurant_patio": "indoor_commercial",
    "pizzeria": "indoor_commercial",
    "pub_indoor": "indoor_commercial",
    "sushi_bar": "indoor_commercial",
    "hotel_outdoor": "indoor_commercial",
    "amusement_arcade": "indoor_commercial",
    "auto_showroom": "indoor_commercial",
    "bakery_shop": "indoor_commercial",
    "bank_vault": "indoor_commercial",
    "bazaar_indoor": "indoor_commercial",
    "beauty_salon": "indoor_commercial",
    "cafeteria": "indoor_commercial",
    "coffee_shop": "indoor_commercial",
    "delicatessen": "indoor_commercial",
    "discotheque": "indoor_commercial",
    "drugstore": "indoor_commercial",
    "fabric_store": "indoor_commercial",
    "flea_market_indoor": "indoor_commercial",
    "food_court": "indoor_commercial",
    "hardware_store": "indoor_commercial",
    "laundromat": "indoor_commercial",
    "market_outdoor": "indoor_commercial",
    "motel": "indoor_commercial",
    "pub_indoor": "indoor_commercial",
    "sauna": "indoor_commercial",
    "server_room": "indoor_commercial",
    "shoe_shop": "indoor_commercial",
    "shopping_mall_indoor": "indoor_commercial",
    "supermarket": "indoor_commercial",
    "booth_indoor": "indoor_commercial",
    "ice_cream_parlor": "indoor_commercial",
    "general_store_indoor": "indoor_commercial",
    "general_store_outdoor": "indoor_commercial",
    "pavilion": "indoor_commercial",
    "ticket_booth": "indoor_commercial",

    # --- Indoor institutional ---
    "art_gallery": "indoor_institutional",
    "auditorium": "indoor_institutional",
    "church_indoor": "indoor_institutional",
    "classroom": "indoor_institutional",
    "conference_room": "indoor_institutional",
    "hospital": "indoor_institutional",
    "hospital_room": "indoor_institutional",
    "library_indoor": "indoor_institutional",
    "lecture_room": "indoor_institutional",
    "office": "indoor_institutional",
    "office_cubicles": "indoor_institutional",
    "schoolhouse": "indoor_institutional",
    "science_museum": "indoor_institutional",
    "nursing_home": "indoor_institutional",
    "reception": "indoor_institutional",
    "waiting_room": "indoor_institutional",
    "museum_indoor": "indoor_institutional",
    "biology_laboratory": "indoor_institutional",
    "chemistry_lab": "indoor_institutional",
    "clean_room": "indoor_institutional",
    "conference_center": "indoor_institutional",
    "courthouse": "indoor_institutional",
    "embassy": "indoor_institutional",
    "entrance_hall": "indoor_institutional",
    "elevator_lobby": "indoor_institutional",
    "elevator_shaft": "indoor_institutional",
    "elevator_door": "indoor_institutional",
    "escalator_indoor": "indoor_institutional",
    "hospital": "indoor_institutional",
    "kindergarden_classroom": "indoor_institutional",
    "legislative_chamber": "indoor_institutional",
    "library_outdoor": "indoor_institutional",
    "lobby": "indoor_institutional",
    "locker_room": "indoor_institutional",
    "operating_room": "indoor_institutional",
    "physics_laboratory": "indoor_institutional",
    "recreation_room": "indoor_institutional",
    "veterinarians_office": "indoor_institutional",
    "atrium_public": "indoor_institutional",
    "corridor": "indoor_institutional",
    "computer_room": "indoor_institutional",
    "mezzanine": "indoor_institutional",
    "office_building": "indoor_institutional",
    "jail_cell": "indoor_institutional",
    "storage_room": "indoor_institutional",

    # --- Industrial facilities ---
    "assembly_line": "industrial_facility",
    "auto_factory": "industrial_facility",
    "engine_room": "industrial_facility",
    "industrial_area": "industrial_facility",
    "oilrig": "industrial_facility",
    "construction_site": "industrial_facility",
    "army_base": "industrial_facility",
    "junkyard": "industrial_facility",
    "repair_shop": "industrial_facility",
    "loading_dock": "industrial_facility",
    "fire_station": "industrial_facility",
    "landing_deck": "industrial_facility",
    "construction_site": "industrial_facility",
    "gas_station": "industrial_facility",
    "hangar_indoor": "industrial_facility",
    "hangar_outdoor": "industrial_facility",
    "dam": "industrial_facility",
    "excavation": "industrial_facility",
    "fire_escape": "industrial_facility",
    "landfill": "industrial_facility",
    "lock_chamber": "industrial_facility",
    "trench": "industrial_facility",
    "shed": "industrial_facility",

    # --- Urban outdoor ---
    "alley": "urban_outdoor",
    "building_facade": "urban_outdoor",
    "courtyard": "urban_outdoor",
    "crosswalk": "urban_outdoor",
    "downtown": "urban_outdoor",
    "plaza": "urban_outdoor",
    "street": "urban_outdoor",
    "shopfront": "urban_outdoor",
    "parking_lot": "urban_outdoor",
    "residential_neighborhood": "urban_outdoor",
    "skyscraper": "urban_outdoor",
    "amusement_park": "urban_outdoor",
    "arcade": "urban_outdoor",
    "balcony_exterior": "urban_outdoor",
    "bazaar_outdoor": "urban_outdoor",
    "boardwalk": "urban_outdoor",
    "driveway": "urban_outdoor",
    "doorway_outdoor": "urban_outdoor",
    "gazebo_exterior": "urban_outdoor",
    "market_outdoor": "urban_outdoor",
    "patio": "urban_outdoor",
    "plaza": "urban_outdoor",
    "promenade": "urban_outdoor",
    "rope_bridge": "urban_outdoor",
    "street": "urban_outdoor",
    "tower": "urban_outdoor",
    "slum": "urban_outdoor",
    "sky": "urban_outdoor",
    "roof_garden": "urban_outdoor",
    "phone_booth": "urban_outdoor",

    # --- Natural landscapes ---
    "badlands": "natural_landscape",
    "canyon": "natural_landscape",
    "cliff": "natural_landscape",
    "desert_sand": "natural_landscape",
    "desert_vegetation": "natural_landscape",
    "forest_broadleaf": "natural_landscape",
    "forest_path": "natural_landscape",
    "mountain": "natural_landscape",
    "mountain_path": "natural_landscape",
    "mountain_snowy": "natural_landscape",
    "valley": "natural_landscape",
    "volcano": "natural_landscape",
    "rock_arch": "natural_landscape",
    "rainforest": "natural_landscape",
    "bamboo_forest": "natural_landscape",
    "butte": "natural_landscape",
    "canal_natural": "natural_landscape",
    "cliff": "natural_landscape",
    "coast": "natural_landscape",
    "creek": "natural_landscape",
    "crevasse": "natural_landscape",
    "desert_road": "natural_landscape",
    "field_road": "natural_landscape",
    "forest_road": "natural_landscape",
    "grotto": "natural_landscape",
    "islet": "natural_landscape",
    "lawn": "natural_landscape",
    "marsh": "natural_landscape",
    "swamp": "natural_landscape",
    "vegetable_garden": "natural_landscape",
    "wheat_field": "natural_landscape",
    "wind_farm": "natural_landscape",
    "windmill": "natural_landscape",
    "zen_garden": "natural_landscape",
    "canal_urban": "natural_landscape",
    "fountain": "natural_landscape",

    # --- Cold environments ---
    "ice_floe": "cold_environment",
    "ice_shelf": "cold_environment",
    "iceberg": "cold_environment",
    "igloo": "cold_environment",
    "ski_resort": "cold_environment",
    "ski_slope": "cold_environment",
    "snowfield": "cold_environment",
    "tundra": "cold_environment",
    "glacier": "cold_environment",

    # --- Aquatic environments ---
    "aquarium": "aquatic_environment",
    "beach": "aquatic_environment",
    "boat_deck": "aquatic_environment",
    "boathouse": "aquatic_environment",
    "harbor": "aquatic_environment",
    "lake_natural": "aquatic_environment",
    "lagoon": "aquatic_environment",
    "ocean": "aquatic_environment",
    "pond": "aquatic_environment",
    "river": "aquatic_environment",
    "waterfall": "aquatic_environment",
    "swimming_pool_indoor": "aquatic_environment",
    "swimming_pool_outdoor": "aquatic_environment",
    "swimming_hole": "aquatic_environment",
    "hot_spring": "aquatic_environment",
    "beach_house": "aquatic_environment",
    "fishpond": "aquatic_environment",
    "moat_water": "aquatic_environment",
    "pier": "aquatic_environment",
    "raft": "aquatic_environment",
    "watering_hole": "aquatic_environment",
    "wave": "aquatic_environment",
    "water_tower": "aquatic_environment",
    "underwater_ocean_deep": "aquatic_environment",
    "lighthouse": "aquatic_environment",


    # --- Religious or historical sites ---
    "church_outdoor": "religious_or_historical_site",
    "mosque_outdoor": "religious_or_historical_site",
    "synagogue_outdoor": "religious_or_historical_site",
    "temple_asia": "religious_or_historical_site",
    "catacomb": "religious_or_historical_site",
    "mausoleum": "religious_or_historical_site",
    "palace": "religious_or_historical_site",
    "castle": "religious_or_historical_site",
    "ruin": "religious_or_historical_site",
    "arch": "religious_or_historical_site",
    "amphitheater": "religious_or_historical_site",
    "archaelogical_excavation": "religious_or_historical_site",
    "burial_chamber": "religious_or_historical_site",
    "cemetery": "religious_or_historical_site",
    "chalet": "religious_or_historical_site",
    "kasbah": "religious_or_historical_site",
    "medina": "religious_or_historical_site",
    "pagoda": "religious_or_historical_site",
    "throne_room": "religious_or_historical_site",

    # --- Sports and entertainment ---
    "arena_hockey": "sports_and_entertainment",
    "arena_performance": "sports_and_entertainment",
    "arena_rodeo": "sports_and_entertainment",
    "baseball_field": "sports_and_entertainment",
    "basketball_court_indoor": "sports_and_entertainment",
    "boxing_ring": "sports_and_entertainment",
    "football_field": "sports_and_entertainment",
    "gymnasium_indoor": "sports_and_entertainment",
    "martial_arts_gym": "sports_and_entertainment",
    "racecourse": "sports_and_entertainment",
    "stadium_football": "sports_and_entertainment",
    "stadium_soccer": "sports_and_entertainment",
    "stage_indoor": "sports_and_entertainment",
    "stage_outdoor": "sports_and_entertainment",
    "bowling_alley": "sports_and_entertainment",
    "movie_theater_indoor": "sports_and_entertainment",
    "amphitheater": "sports_and_entertainment",
    "athletic_field_outdoor": "sports_and_entertainment",
    "ball_pit": "sports_and_entertainment",
    "baseball_field": "sports_and_entertainment",
    "basketball_court_indoor": "sports_and_entertainment",
    "bow_window_indoor": "sports_and_entertainment",
    "boxing_ring": "sports_and_entertainment",
    "bullring": "sports_and_entertainment",
    "football_field": "sports_and_entertainment",
    "raceway": "sports_and_entertainment",
    "soccer_field": "sports_and_entertainment",
    "stadium_baseball": "sports_and_entertainment",
    "staircase": "sports_and_entertainment",
    "topiary_garden": "sports_and_entertainment",
    "water_park": "sports_and_entertainment",
    "carrousel": "sports_and_entertainment",
    "dining_hall": "sports_and_entertainment",
    "diner_outdoor": "sports_and_entertainment",
    "ice_skating_rink_indoor": "sports_and_entertainment",
    "ice_skating_rink_outdoor": "sports_and_entertainment",


    # --- Cultural spaces ---
    "art_studio": "cultural_space",
    "art_school": "cultural_space",
    "music_studio": "cultural_space",
    "television_studio": "cultural_space",
    "museum_outdoor": "cultural_space",
    "artists_loft": "cultural_space",
    "archive": "cultural_space",
    "galley": "cultural_space",
    "japanese_garden": "cultural_space",
    "natural_history_museum": "cultural_space",
    "music_studio": "cultural_space",
    "orchestra_pit": "cultural_space",

    # --- Rural or recreational area ---
    "aqueduct": "rural_or_recreational_area",
    "barn": "rural_or_recreational_area",
    "barndoor": "rural_or_recreational_area",
    "beer_garden": "rural_or_recreational_area",
    "beer_hall": "rural_or_recreational_area",
    "botanical_garden": "rural_or_recreational_area",
    "cabin_outdoor": "rural_or_recreational_area",
    "campsite": "rural_or_recreational_area",
    "campus": "rural_or_recreational_area",
    "farm": "rural_or_recreational_area",
    "field_cultivated": "rural_or_recreational_area",
    "field_wild": "rural_or_recreational_area",
    "golf_course": "rural_or_recreational_area",
    "greenhouse_outdoor": "rural_or_recreational_area",
    "hayfield": "rural_or_recreational_area",
    "orchard": "rural_or_recreational_area",
    "park": "rural_or_recreational_area",
    "pasture": "rural_or_recreational_area",
    "picnic_area": "rural_or_recreational_area",
    "playground": "rural_or_recreational_area",
    "rice_paddy": "rural_or_recreational_area",
    "sandbox": "rural_or_recreational_area",
    "stable": "rural_or_recreational_area",
    "tree_farm": "rural_or_recreational_area",
    "tree_house": "rural_or_recreational_area",
    "village": "rural_or_recreational_area",
    "vineyard": "rural_or_recreational_area",
    "volleyball_court_outdoor": "rural_or_recreational_area",
    "yard": "rural_or_recreational_area",
    "corn_field": "rural_or_recreational_area",
    "corral": "rural_or_recreational_area",
    "cottage": "rural_or_recreational_area",
    "formal_garden": "rural_or_recreational_area",
    "greenhouse_indoor": "rural_or_recreational_area",
    "hunting_lodge_outdoor": "rural_or_recreational_area",
    "inn_outdoor": "rural_or_recreational_area",
    "kennel_outdoor": "rural_or_recreational_area",
    "pavilion": "rural_or_recreational_area",
    "oast_house": "rural_or_recreational_area",
}


In [12]:
set(places365_to_13categories.values())

{'aquatic_environment',
 'cold_environment',
 'cultural_space',
 'indoor_commercial',
 'indoor_institutional',
 'indoor_residential',
 'industrial_facility',
 'natural_landscape',
 'religious_or_historical_site',
 'rural_or_recreational_area',
 'sports_and_entertainment',
 'transport_infrastructure',
 'urban_outdoor'}

In [13]:
# Now create train/test splits with clean data
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
df_train, df_test = df_train.reset_index(drop=True), df_test.reset_index(drop=True)
print("Training set shape: ",df_train.shape)
print("Test set shape: ",df_test.shape)

Training set shape:  (13135, 10)
Test set shape:  (3284, 10)


In [14]:
face_processor = AutoImageProcessor.from_pretrained("trpakov/vit-face-expression", use_fast=True)
face_model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression")

face_detector = MTCNN()

scene_processor = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load pretrained Places365 ResNet18
scene_model = models.resnet18(num_classes=365)
weights_url = "http://places2.csail.mit.edu/models_places365/resnet18_places365.pth.tar"
checkpoint = torch.hub.load_state_dict_from_url(weights_url, map_location="cpu")
state_dict = {k.replace("module.", ""): v for k, v in checkpoint["state_dict"].items()}
scene_model.load_state_dict(state_dict)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
scene_model.to(device)

face_model.to(device)

print("Loaded pretrained models")

Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 981.12it/s]


Loaded pretrained models


In [15]:
class TruncatedFaceModel(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, x):
        # x will be a batch of face tensors (batch_size, 3, 224, 224)
        device = next(self.model.parameters()).device
        x = x.to(device)
        
        logits_list = []
        for face_tensor in x:
            # Add batch dimension for individual face
            face_batch = face_tensor.unsqueeze(0)
            face_output = self.model(face_batch)
            face_logits = face_output.logits[:, :6]
            logits_list.append(face_logits)

        stacked = torch.cat(logits_list, dim=0)  # Stack along batch dimension
        return stacked.flatten()
    
face_model_truncated = TruncatedFaceModel(face_model)
face_network = Network(face_model_truncated, "face_model", batching = True)
face_network.optimizer = torch.optim.Adam(face_network.parameters(), lr=lr_faces)

In [16]:
class SceneReductionModel(nn.Module):
    def __init__(self, input_dim=365, hidden_dim=128, output_dim=13):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.model(x)

scene_model_reduction = SceneReductionModel()
scene_model_reduction.to(device)
scene_reduction_network = Network(scene_model_reduction, "scene_reduction", batching = True)
scene_reduction_network.optimizer= torch.optim.Adam(scene_reduction_network.parameters(), lr=lr_scene_reduction)

In [17]:
class TruncatedSceneModel(nn.Module):
    def __init__(self, model, scene_reduction_model):
        super().__init__()
        self.model = model
        self.scene_reduction_model = scene_reduction_model

    def forward(self, x):
        device = next(self.model.parameters()).device
        x = x.to(device)
        # x will be a tensor of the whole image (with the right preprocessing)
        scene_output = self.model(x)
        # Scene output has 365 values, we want it to be mapped to the 13 more common places
        # That mapping happens through a neural network that will be trained during DPL training
        scene_logits = self.scene_reduction_model(scene_output).squeeze(0)
        return scene_logits
    
scene_model_truncated = TruncatedSceneModel(scene_model, scene_model_reduction)
scene_network = Network(scene_model_truncated, "scene_model", batching = True)
scene_network.optimizer = torch.optim.Adam(scene_network.parameters(), lr=lr_scenes)

In [None]:
# Presave the cropped faces as tensors already with the correct preprocessing
# We save it as a tensor consisting of 3 tensors. Each tensor corresponding to a face
def preprocess_face_tensors(df, subset, face_detector, start_index = 0, max_size=2048):
    for i in range(start_index, len(df)):
        if i % 500 == 0:
            print(f"Processing image {i+1}/{len(df)} for subset {subset}")

        img_path = os.path.join(findingemo_dir, df.iloc[i]["image_path"])
        img = Image.open(img_path).convert("RGB")
        save_path = os.path.join(findingemo_dir, "precomputed", "detected_faces", subset)

        if max(img.size) > max_size:
            ratio = max_size / max(img.size)
            new_size = tuple(int(dim * ratio) for dim in img.size)
            img = img.resize(new_size, Image.Resampling.LANCZOS)

        img_np = np.array(img)
        faces = face_detector.detect_faces(img_np)
        face_tensors_list = []

        if faces:
            faces = sorted(faces, key=lambda f: f["box"][2] * f["box"][3], reverse=True)[:3]
            for fdet in faces:
                x, y, w, h = fdet["box"]
                x, y = max(0, x), max(0, y)
                x2, y2 = min(img_np.shape[1], x + w), min(img_np.shape[0], y + h)
                crop = img_np[y:y2, x:x2]
                if crop.shape[0] < 10 or crop.shape[1] < 10:
                    continue
                face_tensor = face_processor(images=Image.fromarray(crop), return_tensors="pt")["pixel_values"].squeeze(0)
                face_tensors_list.append(face_tensor)

        # Pad with zero tensors to ensure exactly 3 face tensors
        while len(face_tensors_list) < 3:
            face_tensors_list.append(torch.zeros(3, 224, 224))  # Same shape as processed face tensor

        # Return stack of exactly 3 face tensors
        stacked = torch.stack(face_tensors_list[:3])  # Shape: (3, 3, 224, 224)
        # save the tensor at the save path
        tensor_save_path = os.path.join(save_path, f"{i}.pt")
        torch.save(stacked, tensor_save_path)
    
preprocess_face_tensors(df_train, "train", face_detector, start_index=680)
preprocess_face_tensors(df_test, "test", face_detector)

OSError: [Errno 22] Invalid argument: 'c:\\Users\\jarne\\Documents\\Code Masterthesis\\nsai_social_cognition\\data\\FindingEmo_Images\\Run_2/Accepting soldiers rally/1044285674_0:376:3500:2266_1000x541_80_0_0_af0a34ef90e79407bad8c469e9dd3372.jpg'

In [1019]:
class FindingEmoFaces(object):
    def __init__(self, df, face_detector, max_size=2048):
        self.df = df
        self.face_detector = face_detector
        self.max_size = max_size

    def __getitem__(self, item):
        index = int(item[0]) if isinstance(item, (tuple, list)) else int(item)
        img_path = os.path.join(findingemo_dir, self.df.iloc[index]["image_path"])
        img = Image.open(img_path).convert("RGB")

        if max(img.size) > self.max_size:
            ratio = self.max_size / max(img.size)
            new_size = tuple(int(dim * ratio) for dim in img.size)
            img = img.resize(new_size, Image.Resampling.LANCZOS)

        img_np = np.array(img)
        faces = self.face_detector.detect_faces(img_np)
        face_tensors_list = []

        if faces:
            faces = sorted(faces, key=lambda f: f["box"][2] * f["box"][3], reverse=True)[:3]
            for fdet in faces:
                x, y, w, h = fdet["box"]
                x, y = max(0, x), max(0, y)
                x2, y2 = min(img_np.shape[1], x + w), min(img_np.shape[0], y + h)
                crop = img_np[y:y2, x:x2]
                if crop.shape[0] < 10 or crop.shape[1] < 10:
                    continue
                face_tensor = face_processor(images=Image.fromarray(crop), return_tensors="pt")["pixel_values"].squeeze(0)
                face_tensors_list.append(face_tensor)

        # Pad with zero tensors to ensure exactly 3 face tensors
        while len(face_tensors_list) < 3:
            face_tensors_list.append(torch.zeros(3, 224, 224))  # Same shape as processed face tensor

        # Return stack of exactly 3 face tensors
        stacked = torch.stack(face_tensors_list[:3])  # Shape: (3, 3, 224, 224)
        return stacked

facetensors_train = FindingEmoFaces(df_train, face_detector)
facetensors_test = FindingEmoFaces(df_test, face_detector)

In [1020]:
class FindingEmoSceneLogits(object):
    def __init__(self, df):
        self.df = df

    def __getitem__(self, item):
        index = int(item[0]) if isinstance(item, (tuple, list)) else int(item)
        img_path = os.path.join(findingemo_dir, self.df.iloc[index]["image_path"])
        img = Image.open(img_path).convert("RGB")

        x = scene_processor(img).unsqueeze(0).to(device)
 
        return x
    
scenetensors_train = FindingEmoSceneLogits(df_train)
scenetensors_test = FindingEmoSceneLogits(df_test)

In [1021]:
x = scenetensors_train[0]
y = scene_model_truncated(x)
print(y)
print(y.shape)

tensor([ 0.0855,  0.0221,  0.3375, -0.2059, -0.0292,  0.1417, -0.2242, -0.2522,
        -0.1419, -0.1095, -0.4523, -0.1804,  0.0405], device='cuda:0',
       grad_fn=<SqueezeBackward1>)
torch.Size([13])


In [1022]:
x = facetensors_train[0]
y = face_model_truncated(x)
print(y)
print(y.shape)

tensor([-0.2959, -2.5035,  0.5354, -0.8110,  4.0392, -1.5357, -1.0524, -2.2824,
        -0.5259, -2.4327,  2.8070,  3.7185, -0.6956, -1.1049,  1.4585, -2.2307,
        -0.8984,  4.0905], device='cuda:0', grad_fn=<ViewBackward0>)
torch.Size([18])


In [1023]:
model = Model(
    os.path.join(prolog_dir, "model3.pl"),
    [face_network, scene_reduction_network, scene_network]
)
model.set_engine(ExactEngine(model), cache=False)
model.optimizer = SGD(model, param_lr=lr_model)

model.add_tensor_source("train_face", facetensors_train)
model.add_tensor_source("train_scene", scenetensors_train)
model.add_tensor_source("test_face", facetensors_test)
model.add_tensor_source("test_scene", scenetensors_test)

In [1024]:
class FindingEmo(Dataset, TorchDataset):
    def __init__(self, indices, df, subset_name):
        super(FindingEmo, self).__init__()
        self.df = df
        self.subset_name = subset_name
        self.data = indices

    def __len__(self):
        return len(self.data)
    
    def to_query(self, i):
        sample_idx = self.data[i]
        emotion_idx = self.df.iloc[sample_idx]['emotion_8_idx']

        E = Var("E")

        goal = Term("final_emo",
                    Term("tensor", Term(f"{self.subset_name}_face", Constant(sample_idx))),
                    Term("tensor", Term(f"{self.subset_name}_scene", Constant(sample_idx))),
                    E)
        
        return Query(goal, substitution={E: Constant(emotion_idx)})
        

In [1025]:
training_size = int(len(df_train) * training_size_perc)

train_indices = list(range(0, training_size))
test_indices = list(range(0,len(df_test)))

train_set = FindingEmo(train_indices, df_train, "train")
test_set = FindingEmo(test_indices, df_test, "test")

In [1026]:
loader= DataLoader(train_set,batch_size=batch_size, shuffle=True)

In [1035]:
# Python
from problog.logic import Term, Constant, Var
from deepproblog.query import Query

# L = Var("L")
# goal = Term("face_logits", Term("tensor", Term("test_face", Constant(0))), L)
# query = Query(goal, substitution={L: L})
# results = model.solve([query])
# print("DPL result:", results[0])

# try to read tensor from solver/engine (may or may not be available depending on engine)
t = model.get_tensor(Term("tensor", Term("test_face", Constant(14))))
print("Direct tensor read:", t.shape)

Direct tensor read: torch.Size([3, 3, 224, 224])
