In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torchvision.models as models
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
from opacus.validators import ModuleValidator
from opacus.utils.uniform_sampler import UniformWithReplacementSampler
from torch.utils.data import DataLoader
import numpy as np
from opacus import PrivacyEngine
from opacus.utils.batch_memory_manager import BatchMemoryManager
from opacus.grad_sample.grad_sample_module import create_or_accumulate_grad_sample
from opacus.data_loader import DPDataLoader
from opacus.grad_sample import GradSampleModule
from torchvision import datasets, transforms, models
import wandb
from torchsummary import summary
import torchvision, torch, opacus
import optuna
from functools import partial
import numpy as np
from imgaug import augmenters as iaa

In [None]:
class Modified_Deep_Emotion(nn.Module):
  def __init__(self):
    '''
    Deep_Emotion class contains the network architecture.
    '''
    super(Modified_Deep_Emotion,self).__init__()
    self.conv1 = nn.Conv2d(1,10,3)
    self.conv2 = nn.Conv2d(10,10,3)
    self.pool2 = nn.MaxPool2d(2,2)

    self.conv3 = nn.Conv2d(10,10,3)
    self.conv4 = nn.Conv2d(10,10,3)
    self.pool4 = nn.MaxPool2d(2,2)

    self.norm = nn.BatchNorm2d(10)

    self.fc1 = nn.Linear(112360, 50)  
    self.fc2 = nn.Linear(50,5)
    self.dropout = nn.Dropout(p=0.5)  # Dropout

    self.localization = nn.Sequential(
      nn.Conv2d(1, 8, kernel_size=7),
                                           #nn.Conv2d(1, 8, kernel_size=7),
      nn.MaxPool2d(2, stride=2),
      nn.ReLU(True),
      nn.Conv2d(8, 10, kernel_size=5),
      nn.MaxPool2d(2, stride=2),
      nn.ReLU(True)
        )

    self.fc_loc = nn.Sequential(
      nn.Linear(27040, 32),
      nn.ReLU(True),
      nn.Linear(32, 3 * 2)
        )
    self.fc_loc[2].weight.data.zero_()
    self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

  def stn(self, x):
    xs = self.localization(x)
    #print("Shape of xs before reshaping:", xs.shape)
    batch_size, channels, height, width = xs.shape

    xs = xs.view(batch_size, -1)

    #print("Shape of xs after reshaping:", xs.shape)
    theta = self.fc_loc(xs)
    theta = theta.view(-1, 2, 3)

    grid = F.affine_grid(theta, x.size())
    x = F.grid_sample(x, grid)
    return x

  def forward(self,input):
    #print("Input shape to Deep_Emotion:", input.shape)  # Debug print
    out = self.stn(input)
    #print("Output shape after STN:", out.shape)  # Debug print

    out = F.relu(self.conv1(out))
    #print("Output shape after conv1:", out.shape)  # Debug print

    out = self.conv2(out)
    #print("Output shape after conv2:", out.shape)  # Debug print
    out = self.dropout(out)
    out = F.relu(self.pool2(out))
    #print("Output shape after pool2:", out.shape)  # Debug print

    out = F.relu(self.conv3(out))
    out = self.dropout(out)
    out = self.norm(out)
    out = F.relu(self.conv4(out))  
    #print("Output shape after conv3:", out.shape)  # Debug print

    out = F.dropout(out)

    
    batch_size, num_features, height, width = out.shape

    num_features_total = num_features * height * width

    out = out.view(batch_size, num_features_total)
    out = F.relu(self.fc1(out))
    out = self.fc2(out)

    return out

In [None]:
class CombinedModel(nn.Module):
  def __init__(self, num_classes):  # Set default value of num_classes to 5
    super(CombinedModel, self).__init__()

    # ResNet-18 model
    self.resnet18 = model_resnet
    # Remove the last layer to get features
    modules = list(self.resnet18.children())[:-1]
    self.resnet18 = nn.Sequential(*modules)

    # Modified_Deep_Emotion model
    self.deep_emotion = Modified_Deep_Emotion()

    # Fully connected layer
    self.fc = nn.Linear(517, num_classes)  # 512 from ResNet-18 and 53 from Modified_Deep_Emotion

  def forward(self, x):
    x1 = self.resnet18(x)
    x1 = x1.view(x1.size(0), -1)

    # Convert the input image to grayscale
    x_gray = x.mean(dim=1, keepdim=True)

    x2 = self.deep_emotion(x_gray)
    x2 = x2.view(x2.size(0), -1)

    # Concatenate features from both models
    x = torch.cat((x1, x2), dim=1)

    x = self.fc(x)
    return x

In [None]:

model = torch.load('deep_emotion_expw_step1.pth')

### test the picture by function 1

In [None]:
import torch
import numpy as np
import cv2
from torchvision import transforms
from PIL import Image

In [None]:
emotion_mapping = {
    0: "angry",
    1: "disgust",
    2: "fear",
    3: "sad"
}

In [None]:
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model = model.to(device)
def transform_image(image_path):
    '''
    
    :param image_path: 
    '''
    image = Image.open(image_path)  

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return transform(image).unsqueeze(0)

angry_dir = '/content/Expw-F/test/sad'
for image_name in os.listdir(angry_dir):
    image_path = os.path.join(angry_dir, image_name)
    if os.path.isfile(image_path):  
        test_image = transform_image(image_path)
        test_image = test_image.to(device)

        with torch.no_grad():
            outputs = model(test_image)
            _, predicted = torch.max(outputs, 1)
            print(f"Image: {image_name}, Predicted emotion: {predicted.item()}")



Image: 45035heartbroken_programmer_77.jpg, Predicted emotion: 3
Image: 17263anxious_yang_756.jpg, Predicted emotion: 2
Image: 38597crying_son_192.jpg, Predicted emotion: 1
Image: 23386fighting_black_807.jpg, Predicted emotion: 1
Image: 44491heartbroken_mother_124.jpg, Predicted emotion: 1
Image: 37355crying_manager_401.jpg, Predicted emotion: 1
Image: 34056crying_actor_105.jpg, Predicted emotion: 1
Image: 11057fighting_yang_566.jpg, Predicted emotion: 1
Image: 38180crying_president_556.jpg, Predicted emotion: 1
Image: 34592crying_asian_402.jpg, Predicted emotion: 1
Image: 37579crying_mother_40.jpg, Predicted emotion: 1
Image: 34518crying_asian_16.jpg, Predicted emotion: 3
Image: 39575crying_yang_230.jpg, Predicted emotion: 1
Image: 35632crying_european_89.jpg, Predicted emotion: 1
Image: 4512annoyed_girl_346.jpg, Predicted emotion: 1
Image: 37129crying_lady_375.jpg, Predicted emotion: 1
Image: 74940amazed_grandmother_139.jpg, Predicted emotion: 1
Image: 34482crying_american_646.jpg, Pr

In [None]:
import os
for image_name in os.listdir(angry_dir):
    image_path = os.path.join(angry_dir, image_name)
    if os.path.isfile(image_path):
        test_image = transform_image(image_path)
        test_image = test_image.to(device)

        with torch.no_grad():
            outputs = model(test_image)
            _, predicted_idx = torch.max(outputs, 1)
            predicted_emotion = emotion_mapping[predicted_idx.item()]
            print(f"Image: {image_name}, Predicted emotion: {predicted_emotion}")

Image: 45035heartbroken_programmer_77.jpg, Predicted emotion: sad
Image: 17263anxious_yang_756.jpg, Predicted emotion: fear
Image: 38597crying_son_192.jpg, Predicted emotion: disgust
Image: 23386fighting_black_807.jpg, Predicted emotion: disgust
Image: 44491heartbroken_mother_124.jpg, Predicted emotion: disgust
Image: 37355crying_manager_401.jpg, Predicted emotion: disgust
Image: 34056crying_actor_105.jpg, Predicted emotion: disgust
Image: 11057fighting_yang_566.jpg, Predicted emotion: disgust
Image: 38180crying_president_556.jpg, Predicted emotion: disgust
Image: 34592crying_asian_402.jpg, Predicted emotion: disgust
Image: 37579crying_mother_40.jpg, Predicted emotion: disgust
Image: 34518crying_asian_16.jpg, Predicted emotion: sad
Image: 39575crying_yang_230.jpg, Predicted emotion: disgust
Image: 35632crying_european_89.jpg, Predicted emotion: disgust
Image: 4512annoyed_girl_346.jpg, Predicted emotion: disgust
Image: 37129crying_lady_375.jpg, Predicted emotion: disgust
Image: 74940ama

### test the picture by function 2

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import numpy as np


def preprocess_image(image_path, output_size):
    
    transform = transforms.Compose([
        transforms.Resize(output_size),  
        transforms.CenterCrop(output_size),  
        transforms.ToTensor(),  
    ])
    image = Image.open(image_path).convert('RGB')  
    image = transform(image)  
    return image

#image_path = '/content/Expw-F/test/angry/1020angry_expression_174.jpg'
image_path = '/content/Expw-F/test/disgust/31895disgust_African_243.jpg' 
output_size = (224, 224)  

processed_image = preprocess_image(image_path, output_size)
#noisy_image = add_noise(processed_image)

model_resnet = models.resnet18(pretrained=True)
model = CombinedModel(num_classes=4)  

model.eval()  


emotion_mapping = {
    0: "angry",
    1: "disgust",
    2: "fear",
    3: "sad"
}


# Use model to predicted the outcome of the input picyure.

processed_image = processed_image.unsqueeze(0)  
with torch.no_grad():  
    prediction = model(processed_image)
    print('prediction: ', prediction)
    predicted_class = torch.argmax(prediction, dim=1)
    print('predicted_class: ', predicted_class)

print(f'Predicted class: {predicted_class.item()}')
predicted_emotion = emotion_mapping[predicted_class.item()]

print(f'Predicted class: {predicted_emotion}')

prediction:  tensor([[-0.1961,  0.8316, -0.5942, -0.5582]])
predicted_class:  tensor([1])
Predicted class: 1
Predicted class: disgust
