In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
import json
from PIL import Image
class EmoSet(Dataset):
    ATTRIBUTES_MULTI_CLASS = [
        'scene', 'facial_expression', 'human_action', 'brightness', 'colorfulness',
    ]
    ATTRIBUTES_MULTI_LABEL = [
        'object'
    ]
    NUM_CLASSES = {
        'brightness': 11,
        'colorfulness': 11,
        'scene': 254,
        'object': 409,
        'facial_expression': 6,
        'human_action': 264,
    }
    def __init__(self,
                 data_root,
                 num_emotion_classes,
                 phase,
                 ):
        assert num_emotion_classes in (8, 2)
        assert phase in ('train', 'val', 'test')
        self.transforms_dict = self.get_data_transforms()
        self.info = self.get_info(data_root, num_emotion_classes)
        if phase == 'train':
            self.transform = self.transforms_dict['train']
        elif phase == 'val':
            self.transform = self.transforms_dict['val']
        elif phase == 'test':
            self.transform = self.transforms_dict['test']
        else:
            raise NotImplementedError
        data_store = json.load(open(os.path.join(data_root, f'{phase}.json')))
        self.data_store = [
            [
                self.info['emotion']['label2idx'][item[0]],
                os.path.join(data_root, item[1]),
                os.path.join(data_root, item[2])
            ]
            for item in data_store
        ]
    @classmethod
    def get_data_transforms(cls):
        transforms_dict = {
            'train': transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]),
            'val': transforms.Compose([
                transforms.Resize(224),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]),
            'test': transforms.Compose([
                transforms.Resize(224),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]),
        }
        return transforms_dict
    def get_info(self, data_root, num_emotion_classes):
        assert num_emotion_classes in (8, 2)
        info = json.load(open(os.path.join(data_root, 'info.json')))
        if num_emotion_classes == 8:
            pass
        elif num_emotion_classes == 2:
            emotion_info = {
                'label2idx': {
                    'amusement': 0,
                    'awe': 0,
                    'contentment': 0,
                    'excitement': 0,
                    'anger': 1,
                    'disgust': 1,
                    'fear': 1,
                    'sadness': 1,
                },
                'idx2label': {
                    '0': 'positive',
                    '1': 'negative',
                }
            }
            info['emotion'] = emotion_info
        else:
            raise NotImplementedError
        return info
    def load_image_by_path(self, path):
        image = Image.open(path).convert('RGB')
        image = self.transform(image)
        return image
    def load_annotation_by_path(self, path):
        json_data = json.load(open(path))
        return json_data
    def __getitem__(self, item):
        emotion_label_idx, image_path, annotation_path = self.data_store[item]
        image = self.load_image_by_path(image_path)
        annotation_data = self.load_annotation_by_path(annotation_path)
        data = { 'image': image, 'emotion_label_idx': emotion_label_idx}
        for attribute in self.ATTRIBUTES_MULTI_CLASS:
            attribute_label_idx = -1
            if attribute in annotation_data:
                attribute_label_idx = self.info[attribute]['label2idx'][str(annotation_data[attribute])]
            data.update({f'{attribute}_label_idx': attribute_label_idx})
        for attribute in self.ATTRIBUTES_MULTI_LABEL:
            assert attribute == 'object'
            num_classes = self.NUM_CLASSES[attribute]
            attribute_label_idx = torch.zeros(num_classes)
            if attribute in annotation_data:
                for label in annotation_data[attribute]:
                    attribute_label_idx[self.info[attribute]['label2idx'][label]] = 1
            data.update({f'{attribute}_label_idx': attribute_label_idx})
        return data
    def __len__(self):
        return len(self.data_store)

In [15]:
import torch
import torch.nn as nn
import timm
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
class EmotionRecognitionModel(nn.Module):
    def __init__(self, num_emotions=8, attribute_sizes=None, embedding_dim=32, hidden_dim=128):
        super(EmotionRecognitionModel, self).__init__()
        self.swin = timm.create_model("swin_tiny_patch4_window7_224", pretrained=True, num_classes=0)
        swin_feature_dim = self.swin.num_features
        self.image_fc = nn.Linear(swin_feature_dim, hidden_dim)
        self.attribute_nets = nn.ModuleDict()
        self.attribute_sizes = attribute_sizes
        for attr, size in attribute_sizes.items():
            if attr != 'object':  
                self.attribute_nets[attr] = nn.Sequential(
                    nn.Embedding(size + 1, embedding_dim, padding_idx=size),
                    nn.Linear(embedding_dim, hidden_dim),
                    nn.ReLU()
                )
        self.object_fc = nn.Linear(attribute_sizes['object'], hidden_dim)
        self.fusion_fc = nn.Sequential(
            nn.Linear(hidden_dim * (len(attribute_sizes) + 1), hidden_dim),  
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        self.classifier = nn.Linear(hidden_dim, num_emotions)
    def forward(self, image, attributes):
        image_features = self.swin(image)
        image_features = self.image_fc(image_features)
        attr_features = []
        for attr, net in self.attribute_nets.items():
            attr_values = attributes[attr].clone()
            attr_values[attr_values == -1] = self.attribute_sizes[attr]
            attr_features.append(net(attr_values))  
        object_features = self.object_fc(attributes['object'])
        attr_features.append(object_features)
        combined_features = torch.cat([image_features] + attr_features, dim=1)
        fused_features = self.fusion_fc(combined_features)
        output = self.classifier(fused_features)
        return output

  from .autonotebook import tqdm as notebook_tqdm


In [16]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
def process_batch(model, batch, device):
    """Helper function to process a batch for evaluation (with attributes)"""
    images = batch['image'].to(device)
    emotion_labels = batch['emotion_label_idx'].to(device)
    attributes = {}
    for attr in ['scene', 'facial_expression', 'human_action', 'brightness', 'colorfulness']:
        attributes[attr] = batch[f'{attr}_label_idx'].to(device)
    attributes['object'] = batch['object_label_idx'].to(device)
    outputs = model(images, attributes)
    return outputs, emotion_labels, attributes
def evaluate_model(model, dataloader, criterion, device='cuda', idx2label=None):
    """Evaluate the model and print detailed metrics"""
    model.eval()
    running_loss = 0.0
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            try:
                outputs, emotion_labels, _ = process_batch(model, batch, device)
                loss = criterion(outputs, emotion_labels)
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                all_labels.extend(emotion_labels.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())
            except Exception as e:
                print(f"Error in evaluation batch {i}: {e}")
                continue
    test_loss = running_loss / len(dataloader)
    test_accuracy = 100 * sum(1 for pred, label in zip(all_predictions, all_labels) if pred == label) / len(all_labels)
    print(f"\nEvaluation Results:")
    print(f"Loss: {test_loss:.4f}")
    print(f"Accuracy: {test_accuracy:.2f}%")
    cm = confusion_matrix(all_labels, all_predictions)
    print("\nConfusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    if idx2label:
        target_names = [idx2label[str(i)] for i in range(len(idx2label))]
        print(classification_report(all_labels, all_predictions, target_names=target_names, zero_division=0))
    else:
        print(classification_report(all_labels, all_predictions, zero_division=0))
    return test_loss, test_accuracy, cm, all_labels, all_predictions
if __name__ == "__main__":
    data_root = "C:/Users/HP/Downloads/Emotion-Analysis/dataset"
    num_emotion_classes = 8
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    attribute_sizes = {
        'scene': 254,
        'facial_expression': 6,
        'human_action': 264,
        'brightness': 11,
        'colorfulness': 11,
        'object': 409
    }
    train_dataset = EmoSet(data_root=data_root, num_emotion_classes=num_emotion_classes, phase='train')
    test_dataset = EmoSet(data_root=data_root, num_emotion_classes=num_emotion_classes, phase='test')
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    model = EmotionRecognitionModel(num_emotions=num_emotion_classes, attribute_sizes=attribute_sizes).to(device)
    checkpoint_path = "C:/Users/HP/Downloads/Emotion-Analysis/emotion_recognition_epoch_8.pth"
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    criterion = nn.CrossEntropyLoss()
    print("\nEvaluating Swin Transformer (with Attributes) on test set...")
    emotion_idx2label = train_dataset.info['emotion']['idx2label']
    test_loss, test_acc, conf_matrix, _, _ = evaluate_model(
        model, 
        test_dataloader, 
        criterion, 
        device=device, 
        idx2label=emotion_idx2label
    )
    print(f"Test Accuracy: {test_acc:.2f}%")


Evaluating Swin Transformer (with Attributes) on test set...

Evaluation Results:
Loss: 1.2544
Accuracy: 53.25%

Confusion Matrix:
[[1509  102  551  285   20  168  123   12]
 [ 228 1118  448   75   34  237   81   39]
 [ 412  175 1259  166   62  297   81   32]
 [ 372   92  333 2021   33   91   62   10]
 [ 165   45  200   54  773  261  155   55]
 [ 196   23  168    5   37  960  216   56]
 [ 220   36  136   81   89  452  839  121]
 [  74   62  204   34   73  169  274  955]]

Classification Report:
              precision    recall  f1-score   support

   amusement       0.48      0.54      0.51      2770
         awe       0.68      0.49      0.57      2260
 contentment       0.38      0.51      0.44      2484
  excitement       0.74      0.67      0.70      3014
       anger       0.69      0.45      0.55      1708
     disgust       0.36      0.58      0.45      1661
        fear       0.46      0.43      0.44      1974
     sadness       0.75      0.52      0.61      1845

    accurac

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import os
class EmotionRecognitionModel(nn.Module):
    def __init__(self, num_emotions=8, hidden_dim=128):
        super(EmotionRecognitionModel, self).__init__()
        resnet = models.resnet50(pretrained=True)
        self.cnn = nn.Sequential(*list(resnet.children())[:-1])
        self.image_fc = nn.Linear(resnet.fc.in_features, hidden_dim)
        self.dropout = nn.Dropout(0.5)
        self.classifier = nn.Linear(hidden_dim, num_emotions)
    def forward(self, image):
        image_features = self.cnn(image)
        image_features = image_features.view(image_features.size(0), -1)
        image_features = self.image_fc(image_features)
        image_features = self.dropout(image_features)
        output = self.classifier(image_features)
        return output

In [19]:
from sklearn.metrics import classification_report
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def process_batch(model, batch, device):
    """Helper function to process a batch for either training or validation"""
    images = batch['image'].to(device)
    emotion_labels = batch['emotion_label_idx'].to(device)
    outputs = model(images)
    return outputs, emotion_labels
test_dataset = EmoSet(data_root=data_root, num_emotion_classes=num_emotion_classes, phase='test')
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
model = EmotionRecognitionModel(num_emotions=num_emotion_classes, hidden_dim=128).to(device)
checkpoint_path = "C:/Users/HP/Downloads/Emotion-Analysis/emotion_recognition_image_only_epoch_1.pth"
checkpoint = torch.load(checkpoint_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for batch in test_loader:
        outputs, emotion_labels = process_batch(model, batch, device)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(emotion_labels.cpu().numpy())
emotion_labels = ['amusement', 'anger', 'awe', 'contentment', 'disgust', 'excitement', 'fear', 'sadness']
print("Image-Only Emotion Recognition Classification Report:")
print(classification_report(all_labels, all_preds, target_names=emotion_labels, zero_division=0))

Image-Only Emotion Recognition Classification Report:
              precision    recall  f1-score   support

   amusement       0.44      0.50      0.47      2770
       anger       0.60      0.63      0.61      2260
         awe       0.38      0.37      0.38      2484
 contentment       0.48      0.61      0.54      3014
     disgust       0.44      0.30      0.36      1708
  excitement       0.56      0.40      0.46      1661
        fear       0.43      0.44      0.43      1974
     sadness       0.41      0.36      0.38      1845

    accuracy                           0.47     17716
   macro avg       0.47      0.45      0.45     17716
weighted avg       0.47      0.47      0.46     17716

