In [4]:
def predict_emotion_with_attributes(image_tensor, attribute_models, emotion_model):
    
    image_tensor = image_tensor
    emotion_model.eval()
    for model in attribute_models.values():
        model.eval()
    attributes = {}
    
    with torch.no_grad():
        for attr_name, attr_model in attribute_models.items():
            if attr_name != 'object':
                attr_model = attr_model
                outputs = attr_model(image_tensor)
                pred = outputs.argmax(dim=1)
                attributes[attr_name] = pred
        
        num_objects = 409
        random_objects = torch.zeros(1, num_objects)
        random_indices = torch.randint(0, num_objects, (5,))
        random_objects[0, random_indices] = 1.0
        attributes['object'] = random_objects
        emotion_output = emotion_model(image_tensor, attributes)
        emotion_pred = emotion_output.argmax(dim=1)
    
    return emotion_pred.item(), attributes


def process_single_image(image_path, attribute_models, emotion_model, idx2emotion):
    
    from PIL import Image
    from torchvision import transforms
    
    transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0)
    emotion_idx, attributes = predict_emotion_with_attributes(
        image_tensor, attribute_models, emotion_model
    )
    emotion_label = idx2emotion[str(emotion_idx)]
    print(f"Predicted emotion: {emotion_label}")
    return emotion_label, attributes

In [5]:
import timm
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader
import numpy as np


class EmotionRecognitionModel(nn.Module):
    
    def __init__(self, num_emotions=8, attribute_sizes=None, embedding_dim=32, hidden_dim=128):
        super(EmotionRecognitionModel, self).__init__()
        self.swin = timm.create_model("swin_tiny_patch4_window7_224", pretrained=True, num_classes=0)
        swin_feature_dim = self.swin.num_features  
        self.image_fc = nn.Linear(swin_feature_dim, hidden_dim)
        self.attribute_nets = nn.ModuleDict()
        self.attribute_sizes = attribute_sizes
        for attr, size in attribute_sizes.items():
            if attr != 'object':  
                self.attribute_nets[attr] = nn.Sequential(
                    nn.Embedding(size + 1, embedding_dim, padding_idx=size), 
                    nn.Linear(embedding_dim, hidden_dim),
                    nn.ReLU()
                )
        self.object_fc = nn.Linear(attribute_sizes['object'], hidden_dim)
        self.fusion_fc = nn.Sequential(
            nn.Linear(hidden_dim * (len(attribute_sizes) + 1), hidden_dim),  
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.classifier = nn.Linear(hidden_dim, num_emotions)
    
    def forward(self, image, attributes):
        image_features = self.swin(image)
        image_features = self.image_fc(image_features)
        attr_features = []
        for attr, net in self.attribute_nets.items():
            attr_values = attributes[attr].clone()
            attr_values[attr_values == -1] = self.attribute_sizes[attr]  
            attr_features.append(net(attr_values))  
        object_features = self.object_fc(attributes['object'])
        attr_features.append(object_features)
        combined_features = torch.cat([image_features] + attr_features, dim=1)
        fused_features = self.fusion_fc(combined_features)
        output = self.classifier(fused_features)
        return output

In [6]:
import torch
import torch.nn as nn
import torchvision.models as models

class AttributePredictor(nn.Module):
    def __init__(self, num_classes):
        super(AttributePredictor, self).__init__()
        resnet = models.resnet18(pretrained=True) 
        self.feature_extractor = nn.Sequential(*list(resnet.children())[:-1])  
        self.fc = nn.Linear(resnet.fc.in_features, num_classes)  
    def forward(self, x):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1) 
        x = self.fc(x)
        return x

In [14]:
import torch
import json

def load_emotion_model(model_path):
    checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
    attribute_sizes = checkpoint['attribute_sizes']
    num_emotions = checkpoint['num_emotions']
    model = EmotionRecognitionModel(
        num_emotions=num_emotions, 
        attribute_sizes=attribute_sizes
    )
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model
    model.eval()
    return model

def main():
    emotion_model = load_emotion_model('D:/sem 6/ai/package/image/swin transformer/saved_models/emotion_recognition_epoch_8.pth')
    attribute_classes = {
    'scene': 254,
    'facial_expression': 6,
    'human_action': 264,
    'brightness': 11,
    'colorfulness': 11,
    'object': 409  
    }
    attribute_models = {}
    for attr in ['scene', 'facial_expression', 'human_action', 'brightness', 'colorfulness']:
        model = AttributePredictor(attribute_classes[attr])
        model.load_state_dict(torch.load(f"D:/sem 6/ai/package/image/attributes/{attr}_predictor.pth"))
        attribute_models[attr] = model
    
    with open('D:/sem 6/ai/package/image/dataset/info.json', 'r') as f:
        info = json.load(f)
    idx2emotion = info['emotion']['idx2label']
    image_path = 'D:/sem 6/ai/package/image/dataset/image/excitement/excitement_00013.jpg' 
    emotion, _ = process_single_image(image_path, attribute_models, emotion_model, idx2emotion)
    print(f"Final emotion prediction: {emotion}")

if __name__ == "__main__":
    main()

Predicted emotion: excitement
Final emotion prediction: excitement
