In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install onnx

In [None]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split, Subset
from torch import nn, onnx
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

from sklearn.metrics import confusion_matrix, precision_score, recall_score
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
# from matplotlib.colors import LinearSegmentedColormap

import numpy as np


In [None]:
### data preprocess

# resize image size and set it to tensor for dataloader
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# use imagefolder to get all image samples with labels in each class folder
dataset = datasets.ImageFolder(root='/content/drive/MyDrive/comp8420/Subset For Assignment SFEW', transform=transform)

# Split datasets
train_indices = []
valid_indices = []
test_indices = []
for class_index in range(len(dataset.classes)):
  class_indices = [i for i, (img, label) in enumerate(dataset.samples) if label == class_index]
  train_class_indices, test_class_indices = train_test_split(class_indices, test_size=0.2, random_state=42)
  valid_class_indices, test_class_indices = train_test_split(test_class_indices, test_size=0.5, random_state=42)

  train_indices.extend(train_class_indices)
  valid_indices.extend(valid_class_indices)
  test_indices.extend(test_class_indices)

# Create subset dataloaders
train_dataset = Subset(dataset, train_indices)
valid_dataset = Subset(dataset, valid_indices)
test_dataset = Subset(dataset, test_indices)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# get the class index to differ each face emotion
class_index = dataset.class_to_idx
class_index

In [None]:
class SpatialAttention(nn.Module):
  def __init__(self, kernel_size=7):
    super(SpatialAttention, self).__init__()
    assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
    padding = 3 if kernel_size == 7 else 1

    self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    # Average along channel axis
    avg_out = torch.mean(x, dim=1, keepdim=True)
    # Max along channel axis
    max_out, _ = torch.max(x, dim=1, keepdim=True)
    # Stack channel-wise
    x = torch.cat([avg_out, max_out], dim=1)
    x = self.conv1(x)
    return self.sigmoid(x)


In [None]:
class ResNetSA(nn.Module):
  def __init__(self, num_classes=7):
    super(ResNetSA, self).__init__()
    self.resnet = models.resnet18(pretrained=True)
    # self.resnet = models.resnet50(pretrained=True)
    self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)
    self.sa = SpatialAttention()

  def forward(self, x):
    x = self.resnet.conv1(x)
    x = self.resnet.bn1(x)
    x = self.resnet.relu(x)
    x = self.resnet.maxpool(x)

    # x = self.sa(x) * x

    x = self.resnet.layer1(x)
    x = self.sa(x) * x  # Spatial Attention

    x = self.resnet.layer2(x)
    # x = self.sa(x) * x

    x = self.resnet.layer3(x)
    # x = self.sa(x) * x

    x = self.resnet.layer4(x)
    # x = self.sa(x) * x

    x = self.resnet.avgpool(x)
    x = torch.flatten(x, 1)
    x = self.resnet.fc(x)

    return x


In [None]:
### start to run model

# load emotionnet model and set loss and optimizer
num_epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNetSA().to(device)
# model = models.resnet18(pretrained=True).to(device)
# model = models.resnet50(pretrained=True).to(device)

criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)

# use tensorboard to record training process
writer = SummaryWriter('/content/drive/MyDrive/comp8420/runs_resnet18input')
## train
for epoch in range(num_epochs):
  model.train()
  running_loss = 0.0
  correct = 0
  total = 0
  # batch size
  for images, labels in train_dataloader:
    images, labels = images.to(device), labels.to(device)
    optimizer.zero_grad()
    # predicts
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    running_loss += loss.item()
    _, predicts = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicts == labels).sum().item()
  # update learning rate
  scheduler.step()
  print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, running_loss/len(train_dataloader)))
  # write outputs in tensorboard
  writer.add_scalar('training loss:', running_loss / len(train_dataloader), epoch)
  writer.add_scalar('training accuracy:', 100 * correct / total, epoch)
  # weight analysis
  for name, param in model.named_parameters():
    writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch)

  ## validation
  model.eval()
  correct = 0
  total = 0
  val_predicts = []
  val_labels = []
  with torch.no_grad():
    for images, labels in valid_dataloader:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      _, predicts = torch.max(outputs, 1)
      total += labels.size(0)
      correct += (predicts == labels).sum().item()

      val_predicts.extend(predicts.cpu().numpy())
      val_labels.extend(labels.cpu().numpy())

  # write outputs in tensorboard
  writer.add_scalar('validation loss:', running_loss / len(train_dataloader), epoch)
  writer.add_scalar('validation accuracy:', 100 * correct / total, epoch)

# plot the outputs
sns.heatmap(confusion_matrix(val_labels,val_predicts), annot=True, fmt='d', cmap='Pastel1')
plt.xlabel('Validation Predicts')
plt.ylabel('Validation Labels')
plt.show()

writer.close()
# save model
# torch.save(model.state_dict(), '/content/drive/MyDrive/comp8420/emotionNet_model.pth')
dummy_input = torch.randn(1, 3, 224, 224, device=device)
onnx_path = "/content/drive/MyDrive/comp8420/emotionNet_resnet18input.onnx"  # The file path to save the ONNX model
onnx.export(model, dummy_input, onnx_path)

## test
correct = 0
total = 0
test_predicts = []
test_labels = []
with torch.no_grad():
    for images, labels in test_dataloader:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      _, predicts = torch.max(outputs, 1)
      total += labels.size(0)
      correct += (predicts == labels).sum().item()
      test_predicts.extend(predicts.cpu().numpy())
      test_labels.extend(labels.cpu().numpy())
print('Test Accuracy: {:.2f}%'.format(100 * correct / total))
# plot the outputs
sns.heatmap(confusion_matrix(test_labels,test_predicts), annot=True, fmt='d', cmap='Pastel1',vmin=0, vmax=7)
plt.xlabel('Test Predicts')
plt.ylabel('Test Labels')
plt.show()

In [None]:
# confusion matrix
cm = confusion_matrix(test_labels, test_predicts)

# precision, recall
precision = precision_score(test_labels, test_predicts, average=None)
recall = recall_score(test_labels, test_predicts, average=None)

# specificity
specificity = np.zeros_like(precision)
for i in range(len(specificity)):
    true_negative = np.sum(cm) - np.sum(cm[i, :]) - np.sum(cm[:, i]) + cm[i, i]
    false_positive = np.sum(cm[:, i]) - cm[i, i]
    specificity[i] = true_negative / (true_negative + false_positive)

# show results
for i in range(len(precision)):
    print("Class {}: Precision: {:.2f}, Recall: {:.2f}, Specificity: {:.2f}".format(i, precision[i], recall[i], specificity[i]))


In [None]:
!pip install grad-cam


In [None]:
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from torchvision.transforms import transforms
from PIL import Image
import matplotlib.pyplot as plt

model.eval()

# last layer
target_layers = [model.resnet.layer4[-1]]

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
image = Image.open("/content/drive/MyDrive/comp8420/Subset For Assignment SFEW/Happy/Bridesmaids_000059880_00000039.png")
input_tensor = transform(image).unsqueeze(0).to(device)

# create cam
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=device=='cuda')

targets = [ClassifierOutputTarget(0)]
grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0, :]
visualization = show_cam_on_image(input_tensor.cpu().numpy()[0].transpose(1, 2, 0), grayscale_cam, use_rgb=True)

plt.imshow(visualization)
plt.axis('off')
plt.show()


In [None]:
kernels = model.resnet.conv1.weight.data.cpu().numpy()
kernels = kernels.transpose(0, 2, 3, 1)
kernels = (kernels - kernels.min()) / (kernels.max() - kernels.min())

fig, axs = plt.subplots(8, 8, figsize=(8, 8))
for i in range(64):
    row = i // 8
    col = i % 8
    axs[row, col].imshow(kernels[i, :, :, :], cmap='viridis')
    axs[row, col].axis('off')
plt.show()


In [None]:
from sklearn.metrics import classification_report

report = classification_report(val_labels, val_predicts)
print(report)

report = classification_report(test_labels, test_predicts)
print(report)

In [None]:
%load_ext tensorboard


In [None]:
%tensorboard --logdir=/content/drive/MyDrive/comp8420/runs --port=6007
