## Loading Libraries and Classes 

you can mount this project models and datasets from drive:
https://drive.google.com/drive/folders/1ICCQoevk0oxKjs7P-omHGq5HpFCKgbAe?usp=share_link

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import cv2
import time
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
from torchvision import datasets
from torchvision.io import read_image
from torchvision.transforms import ToTensor
from torchvision import transforms
from torch.utils.data import Dataset , DataLoader
from torchvision.models import resnet18,resnet50 ,ResNet50_Weights

os.system('pip install facenet-pytorch')
from facenet_pytorch import MTCNN

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [3]:
class MSCTDDataset(Dataset):
    def __init__(self, img_dir, mode , transform=None, target_transform=None,resize = None):
        # downloading text files from github
        print('\nLoading Text Files')
        os.system('git clone https://github.com/XL2248/MSCTD.git')
        #unziping images
        if mode == 'train':
          print('Loading Train Images')
          os.system('unzip -n '+ os.path.join(img_dir,'train_ende.zip'))
          os.system('mv train_ende train')
          print('Train Images Count:', len(os.listdir('train')))
          os.system('cp -r MSCTD/MSCTD_data/ende/english_train.txt -t train')
          os.system('cp -r MSCTD/MSCTD_data/ende/image_index_train.txt -t train')
          os.system('cp -r MSCTD/MSCTD_data/ende/sentiment_train.txt -t train')
        

        if mode == 'dev':
          print('Loading Validation Images')
          os.system('unzip -n '+ os.path.join(img_dir,'dev.zip'))
          print('Dev Images Count:', len(os.listdir('dev')))
          os.system('cp -r MSCTD/MSCTD_data/ende/english_dev.txt -t dev')
          os.system('cp -r MSCTD/MSCTD_data/ende/image_index_dev.txt -t dev')
          os.system('cp -r MSCTD/MSCTD_data/ende/sentiment_dev.txt -t dev')
        

        if mode == 'test':
          print('Loading Test Images')
          os.system('unzip -n '+ os.path.join(img_dir,'test.zip'))
          print('Test Images Count:', len(os.listdir('test')))
          os.system('cp -r MSCTD/MSCTD_data/ende/english_test.txt -t test')
          os.system('cp -r MSCTD/MSCTD_data/ende/image_index_test.txt -t test')
          os.system('cp -r MSCTD/MSCTD_data/ende/sentiment_test.txt -t test')

        os.system('rm -r MSCTD')
        # processing text files and saving them as attribute of dataset
        if mode == 'val':
            mode = 'dev'
        file1 = open(mode + '/sentiment_' + mode + '.txt', 'r')
        Lines = file1.readlines()
        file1.close()
        label = []
        for line in Lines:
            line = line.strip()
            label.append(int(line))         
        self.sentiment = np.array(label)

        file1 = open(mode + '/english_' + mode + '.txt', 'r')
        Lines = file1.readlines()
        file1.close()
        text = []
        for line in Lines:
            line = line.strip()
            text.append(line)  
        self.text = text

        image_index = []
        file1 = open(mode + '/image_index_' + mode + '.txt', 'r')
        Lines = file1.readlines()
        file1.close()
        text = []
        for line in Lines:
            line = line.strip()
            image_index.append(line) 
        self.image_index = image_index

        self.mode = mode
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.resize = resize
        
    def __len__(self):
        return len(self.sentiment)

    def __getitem__(self, idx):
        img_path = os.path.join(self.mode, f'{idx}.jpg')
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB)
        sentiment = self.sentiment[idx]
        text = self.text[idx]
        if self.resize:
              image = cv2.resize(image, self.resize) 
        else :
              image = cv2.resize(image, (1280,633)) 

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            sentiment = self.target_transform(sentiment)
        return {'text':text ,'image':image, 'sentiment':(sentiment)}

In [None]:
train_data = MSCTDDataset(img_dir = '/content/drive/MyDrive/Project/Phase0', mode = 'train', resize = (640,316))
val_data = MSCTDDataset(img_dir = '/content/drive/MyDrive/Project/Phase0', mode = 'dev', resize = (640,316))
test_data = MSCTDDataset(img_dir = '/content/drive/MyDrive/Project/Phase0', mode = 'test',  resize = (640,316))

batch_size=128
train_dataloader =  DataLoader(train_data, batch_size=batch_size)
val_dataloader =  DataLoader(val_data, batch_size=batch_size)
test_dataloader =  DataLoader(test_data, batch_size=batch_size)


Loading Text Files
Loading Train Images
Train Images Count: 2146

Loading Text Files


# Part 1

## Part 1-1

### Part 1-1-1 (Face Extraction)

In [None]:
class faceDataset(Dataset):
    def __init__(self, mode, dir, transform=None, target_transform=None, resize=None):
        self.mode = mode
        os.system(f'unzip -n {dir}/face_{self.mode}.zip')
        
        file1 = open(f'face_{self.mode}/face_{self.mode}.txt', 'r')
        Lines = file1.readlines()
        file1.close()
        label = []
        for line in Lines:
            line = line.strip()
            label.append(int(line))         
        self.sentiment = np.array(label)

        
        self.transform = transform
        self.target_transform = target_transform
        self.resize = resize
        
    def __len__(self):
        return len(self.sentiment)

    def __getitem__(self, idx):
        img_path = os.path.join(f'face_{self.mode}', f'{idx}.jpg')
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB)
        sentiment = self.sentiment[idx] 
        if self.resize:
              image = cv2.resize(image, self.resize) 

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            sentiment = self.target_transform(sentiment)
        return {'image':image, 'sentiment':(sentiment)}

In [None]:
def face_detect(mode, data_loader, dir):
    mtcnn = MTCNN(select_largest=False, post_process=False, device='cuda:0')
    i = 0
    face_sentiment = []
    if (os.path.isdir(f'face_{mode}')==0):
      os.mkdir(f'face_{mode}')

    for batch in data_loader:
      images = batch['image'].numpy()
      sentiments = batch['sentiment'].numpy()
      img_snt = zip(images,sentiments) 

      for img,snt in img_snt:
        boxes, probs = mtcnn.detect(img,landmarks=False)
        try:
            boxes = np.array(boxes,dtype='uint64')
            for x1,y1,x2,y2 in boxes:
                face = img[y1:y2, x1:x2, :]
                face = cv2.resize(face,(40,60))
                cv2.imwrite(f'face_{mode}/{i}.jpg',cv2.cvtColor(face, cv2.COLOR_RGB2BGR))
                face_sentiment.append(snt)
                i+=1
        except:
              se=5

    with open(f"face_{mode}/face_{mode}.txt", 'w') as output:
        for row in face_sentiment:
          output.write(str(row) + '\n')
    os.system(f'zip -r {dir}/face_{mode}.zip face_{mode}')
    os.system(f'rm -r face_{mode}')

In [None]:
face_detect('train', train_dataloader, '/content/drive/MyDrive/Project/Phase1')
face_detect('val', val_dataloader, '/content/drive/MyDrive/Project/Phase1')
face_detect('test', test_dataloader, '/content/drive/MyDrive/Project/Phase1')

In [None]:
face_train = faceDataset(mode = 'train', dir='/content/drive/MyDrive/Project/Phase1', transform = ToTensor())
face_val = faceDataset(mode = 'val', dir='/content/drive/MyDrive/Project/Phase1', transform = ToTensor())
face_test = faceDataset(mode = 'test', dir='/content/drive/MyDrive/Project/Phase1', transform = ToTensor())

batch_size = 128
face_train_dataloader = DataLoader(face_train, batch_size=batch_size, shuffle=True)
face_val_dataloader = DataLoader(face_val, batch_size=batch_size, shuffle=True)
face_test_dataloader = DataLoader(face_test, batch_size=batch_size, shuffle=True)

### Part 1-1-2 (CNN Training)

In [None]:
# Defining the Neural Network Layers, Neurons and Activation Function
class CNN1(nn.Module):
    def __init__(self, p = 0):
        self.p = p
        
        super(CNN1, self).__init__()
        self.flatten = nn.Flatten()
        
        self.conv2d_relu_stack = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=7, stride=1, padding='same'),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(p=self.p),

            nn.Conv2d(16, 32, kernel_size=7, stride=1, padding='same'),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout(p=self.p),
            
            nn.Conv2d(32, 32, kernel_size=7, stride=1, padding='same'),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=None, padding=0),
            nn.Dropout(p=self.p),
            
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding='same'),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(p=self.p),
            
            nn.Conv2d(64, 128, kernel_size=5, stride=1, padding='same'),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(p=self.p),
            
            nn.Conv2d(128, 128, kernel_size=5, stride=1, padding='same'),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=None, padding=0),
            nn.Dropout(p=self.p),
            
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding='valid'),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Dropout(p=self.p),

            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding='valid'),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Dropout(p=self.p),

            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding='valid'),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, stride=None, padding=0),
            nn.Dropout(p=self.p),
            )
        
        self.linear_relu_stack = nn.Sequential(
            nn.Dropout(p=self.p),
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Dropout(p=self.p),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Dropout(p=self.p),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Dropout(p=self.p),
            nn.Linear(32, 8),
            nn.ReLU(),
            nn.Dropout(p=self.p),
            nn.Linear(8, 3),
            )

    def forward(self, x):
        x = self.conv2d_relu_stack(x)
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
# Defining the function that will do calculation of Loss and gradiant updates for all Batches in Train Dataset in 1 epoch
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    loss_av, correct = 0, 0

    for batch in tqdm(dataloader):
        X,y = batch['image'].to(device), batch['sentiment'].to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        loss_av += float(loss.item())
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    loss_av /= num_batches
    correct /= size
    return 100*correct, loss_av

# Defining the function that will do calculation of Loss and Accuracy over Test Dataset
def Accuracy_Loss(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    loss, correct = 0, 0

    with torch.no_grad():
        for batch in tqdm(dataloader):
            X,y = batch['image'].to(device), batch['sentiment'].to(device)
            pred = model(X)
            loss += float(loss_fn(pred, y).item())
            y_pred = pred.argmax(1)
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    loss /= num_batches
    correct /= size
    return 100*correct, loss

In [None]:
# setting hyper-parameters
learning_rate = 1e-4
batch_size = 128
epochs = 20
loss_fn = nn.CrossEntropyLoss()

# model and optimizer defenintion
CNN1_model = CNN1(0).to(device)
CNN1_model.train()
optimizer = torch.optim.Adam(CNN1_model.parameters(), lr=learning_rate)
face_train_dataloader = DataLoader(face_train, batch_size=batch_size, shuffle=True)

# lists to save loss and accuracy
train_loss_cnn1 = []
val_loss_cnn1 = []
train_acu_cnn1 = []
val_acu_cnn1 = []

best_acu = 0
for e in range(epochs):
    
    print(f'epoch {e+1}/{epochs}:')

    CNN1_model.train()
    t_loss = train_loop(face_train_dataloader, CNN1_model, loss_fn, optimizer)
    train_loss_cnn1.append(t_loss[1]),train_acu_cnn1.append(t_loss[0])

    CNN1_model.eval()
    v_loss = Accuracy_Loss(face_val_dataloader, CNN1_model, loss_fn)
    val_loss_cnn1.append(v_loss[1]),val_acu_cnn1.append(v_loss[0])

    print(f'train loss:{t_loss[1]:0.4f}    train acc:{t_loss[0]:0.3f} ---- val loss:{v_loss[1]:0.4f}   val acc:{v_loss[0]:0.3f} \n')
    if val_acu_cnn1[e]>best_acu:
        best_acu = val_acu_cnn1[e]
        torch.save(CNN1_model, 'CNN1_model.pth')
        best_epoch = e+1
        
best_CNN1_model = torch.load('CNN1_model.pth')
best_CNN1_model.eval()
print(f"CNN result for epoch {best_epoch}:")
print("Best Model Accuracy for Train Set:", train_acu_cnn1[best_epoch-1])  
print("Best Model Accuracy for Validation Set:",val_acu_cnn1[best_epoch-1])    
print("Best Model Accuracy for Test Set:", Accuracy_Loss(face_test_dataloader, best_CNN1_model, loss_fn)[0])  
torch.save(best_CNN1_model, '/content/drive/MyDrive/Project/Phase1/CNN1_model.pth')     

epoch 1/20:


100%|██████████| 211/211 [00:23<00:00,  9.10it/s]
100%|██████████| 48/48 [00:02<00:00, 21.96it/s]


train loss:1.0903    train acc:38.223 ---- val loss:1.0969   val acc:36.527 

epoch 2/20:


100%|██████████| 211/211 [00:23<00:00,  8.98it/s]
100%|██████████| 48/48 [00:02<00:00, 22.51it/s]


train loss:1.0896    train acc:38.576 ---- val loss:1.0936   val acc:36.950 

epoch 3/20:


100%|██████████| 211/211 [00:22<00:00,  9.24it/s]
100%|██████████| 48/48 [00:02<00:00, 22.98it/s]


train loss:1.0896    train acc:38.335 ---- val loss:1.0954   val acc:36.233 

epoch 4/20:


100%|██████████| 211/211 [00:22<00:00,  9.27it/s]
100%|██████████| 48/48 [00:02<00:00, 21.60it/s]


train loss:1.0892    train acc:38.487 ---- val loss:1.0949   val acc:35.174 

epoch 5/20:


100%|██████████| 211/211 [00:23<00:00,  9.08it/s]
100%|██████████| 48/48 [00:02<00:00, 19.66it/s]


train loss:1.0886    train acc:38.885 ---- val loss:1.0952   val acc:35.207 

epoch 6/20:


100%|██████████| 211/211 [00:22<00:00,  9.19it/s]
100%|██████████| 48/48 [00:02<00:00, 18.69it/s]


train loss:1.0871    train acc:38.889 ---- val loss:1.0967   val acc:35.859 

epoch 7/20:


100%|██████████| 211/211 [00:22<00:00,  9.23it/s]
100%|██████████| 48/48 [00:02<00:00, 20.94it/s]


train loss:1.0853    train acc:39.506 ---- val loss:1.0952   val acc:35.060 

epoch 8/20:


100%|██████████| 211/211 [00:23<00:00,  9.13it/s]
100%|██████████| 48/48 [00:02<00:00, 22.79it/s]


train loss:1.0848    train acc:39.506 ---- val loss:1.0966   val acc:35.109 

epoch 9/20:


100%|██████████| 211/211 [00:23<00:00,  9.13it/s]
100%|██████████| 48/48 [00:02<00:00, 22.87it/s]


train loss:1.0850    train acc:39.361 ---- val loss:1.0985   val acc:35.826 

epoch 10/20:


100%|██████████| 211/211 [00:22<00:00,  9.20it/s]
100%|██████████| 48/48 [00:02<00:00, 19.05it/s]


train loss:1.0846    train acc:39.521 ---- val loss:1.0943   val acc:36.429 

epoch 11/20:


100%|██████████| 211/211 [00:22<00:00,  9.21it/s]
100%|██████████| 48/48 [00:02<00:00, 23.06it/s]


train loss:1.0846    train acc:39.629 ---- val loss:1.0971   val acc:34.995 

epoch 12/20:


100%|██████████| 211/211 [00:23<00:00,  9.17it/s]
100%|██████████| 48/48 [00:02<00:00, 22.81it/s]


train loss:1.0832    train acc:39.755 ---- val loss:1.0958   val acc:36.152 

epoch 13/20:


100%|██████████| 211/211 [00:23<00:00,  9.17it/s]
100%|██████████| 48/48 [00:02<00:00, 20.19it/s]


train loss:1.0811    train acc:40.242 ---- val loss:1.0930   val acc:38.205 

epoch 14/20:


100%|██████████| 211/211 [00:22<00:00,  9.20it/s]
100%|██████████| 48/48 [00:02<00:00, 18.55it/s]


train loss:1.0807    train acc:40.220 ---- val loss:1.1006   val acc:36.022 

epoch 15/20:


100%|██████████| 211/211 [00:22<00:00,  9.20it/s]
100%|██████████| 48/48 [00:02<00:00, 20.49it/s]


train loss:1.0803    train acc:40.231 ---- val loss:1.0979   val acc:36.054 

epoch 16/20:


100%|██████████| 211/211 [00:22<00:00,  9.19it/s]
100%|██████████| 48/48 [00:02<00:00, 22.86it/s]


train loss:1.0787    train acc:40.242 ---- val loss:1.0941   val acc:37.814 

epoch 17/20:


100%|██████████| 211/211 [00:23<00:00,  9.16it/s]
100%|██████████| 48/48 [00:02<00:00, 22.85it/s]


train loss:1.0778    train acc:40.666 ---- val loss:1.0987   val acc:35.712 

epoch 18/20:


100%|██████████| 211/211 [00:23<00:00,  9.16it/s]
100%|██████████| 48/48 [00:02<00:00, 22.61it/s]


train loss:1.0774    train acc:40.659 ---- val loss:1.0959   val acc:37.129 

epoch 19/20:


100%|██████████| 211/211 [00:23<00:00,  9.17it/s]
100%|██████████| 48/48 [00:02<00:00, 22.90it/s]


train loss:1.0748    train acc:41.165 ---- val loss:1.0976   val acc:36.983 

epoch 20/20:


100%|██████████| 211/211 [00:22<00:00,  9.22it/s]
100%|██████████| 48/48 [00:02<00:00, 22.76it/s]


train loss:1.0734    train acc:41.329 ---- val loss:1.0989   val acc:36.543 

CNN result for epoch 13:
Best Model Accuracy for Train Set: 40.24249637371221
Best Model Accuracy for Validation Set: 38.204626914304335


100%|██████████| 53/53 [00:02<00:00, 22.86it/s]


Best Model Accuracy for Test Set: 37.22039964211154


### Part 1-1-3 (Merging Models)

In [None]:
def merge_model(data_loader, CNN_model, Face_model):
  i0, i1, i2 = 0, 0, 0
  c0, c1, c2 = 0, 0, 0
  acu, acu0, acu1, acu2 = 0, 0, 0, 0
  size = len(data_loader.dataset)
  trans = transforms.ToTensor()

  for batch in tqdm(data_loader):
      images = batch['image'].numpy()
      sentiments = batch['sentiment'].numpy()
      img_snt = zip(images,sentiments) 
      
      for img,snt in img_snt:
        face_img = []
        boxes, probs = Face_model.detect(img,landmarks=False)

        try:
            boxes = np.array(boxes,dtype='uint64')
            for x1,y1,x2,y2 in boxes:
                face = img[y1:y2, x1:x2, :]
                face = cv2.resize(face,(40,60))
                face_img.append(trans(face).numpy())
            X = np.array(face_img)
            X = torch.tensor(X).to(device)

            pred = CNN_model(X)
            label = pred.argmax(1)
            label = label.cpu().detach().numpy()
            label = np.bincount(label).argmax()
            acu += (snt == label)
            if snt==0:
              c0+=1
              acu0+=(label==0)
            elif snt==1:
              c1+=1
              acu1+=(label==1)
            else:
              c2+=1
              acu2+=(label==2)

        except:
            #guessing the label is 1
            acu += (snt == 1)
            if snt==0:
              i0+=1
            elif snt==1:
              i1+=1
            else:
              i2+=1

  return acu/(size), (c0,i0,acu0/c0), (c1,i1,acu1/c1), (c2,i2,acu2/c1)

#### Evaluation

In [None]:
best_CNN1_model = torch.load('/content/drive/MyDrive/Project/Phase1/CNN1_model.pth')
loss_fn = nn.CrossEntropyLoss()
print("Accuracy on each face:")
print("Face Model Accuracy for Val Set:", Accuracy_Loss(face_val_dataloader, best_CNN1_model, loss_fn)[0])  
print("Face Model Accuracy for Test Set:", Accuracy_Loss(face_test_dataloader, best_CNN1_model, loss_fn)[0])  

mtcnn = MTCNN(select_largest=False, post_process=False, device='cuda:0')
print("Accuracy on each Image:")
a = merge_model(val_dataloader,best_CNN1_model,mtcnn)
print('\n Merge Model Val Accuracy:',a[0])
a = merge_model(test_dataloader,best_CNN1_model,mtcnn)
print('\n Merge Model Test Accuracy:',a[0])

Accuracy on each face:


100%|██████████| 48/48 [00:02<00:00, 22.70it/s]


Face Model Accuracy for Val Set: 38.204626914304335


100%|██████████| 53/53 [00:02<00:00, 22.90it/s]


Face Model Accuracy for Test Set: 37.22039964211154
Accuracy on each Image:


100%|██████████| 40/40 [04:16<00:00,  6.40s/it]



 Merge Model Val Accuracy: 0.3839620778194746


100%|██████████| 40/40 [04:20<00:00,  6.52s/it]


 Merge Model Test Accuracy: 0.38819814485889087





In [None]:
print("Each Class Details (face-detected/faceless/accuracy on face-detected) :\n",a[1:])

Each Class Details (face-detected/faceless/accuracy on face-detected) :
 ((1049, 249, 0.31649189704480457), (1799, 364, 0.7053918843802113), (1308, 298, 0.0011117287381878821))


## Part 1-2

### Part 1-2-1 (Augmentation)

#### Loading Classes and Functions

In [None]:
os.system('git clone https://github.com/amodas/PRIME-augmentations.git')
os.system('mv /content/PRIME-augmentations/utils /content')
os.system('rm -r /content/PRIME-augmentations')
os.system('pip install einops')
##########################################################################################################################################
import utils
from utils.rand_filter import RandomFilter
from utils.color_jitter import RandomSmoothColor
from utils.diffeomorphism import Diffeo
from utils.prime import GeneralizedPRIMEModule
from utils.prime import PRIMEAugModule
from torch.distributions import Dirichlet, Beta
from einops import rearrange, repeat
from opt_einsum import contract
##########################################################################################################################################
class TransformLayer(nn.Module):
    def __init__(self, mean, std):
        super().__init__()
        mean = torch.as_tensor(mean, dtype=torch.float)[None, :, None, None].to(device)
        std = torch.as_tensor(std, dtype=torch.float)[None, :, None, None].to(device)
        self.mean = nn.Parameter(mean, requires_grad=False)
        self.std = nn.Parameter(std, requires_grad=False)

    def forward(self, x):
        return (x.to(device)).sub(self.mean).div(self.std).to(device)

class PRIMEAugModule(torch.nn.Module):
    def __init__(self, augmentations):
        super().__init__()
        self.augmentations = augmentations
        self.num_transforms = len(augmentations)

    def forward(self, x, mask_t):
        aug_x = torch.zeros_like(x)
        for i in range(self.num_transforms):
            aug_x += self.augmentations[i](x) * mask_t[:, i]
        return aug_x

In [None]:
class face_augDataset(Dataset):
    def __init__(self, mode, dir, transform=None, target_transform=None, resize=None):
        self.mode = mode
        os.system(f'unzip -n {dir}/face_aug_{self.mode}.zip')
        
        file1 = open(f'face_aug_{self.mode}/face_aug_{self.mode}.txt', 'r')
        Lines = file1.readlines()
        file1.close()
        label = []
        for line in Lines:
            line = line.strip()
            label.append(int(line))         
        self.sentiment = np.array(label)

        
        self.transform = transform
        self.target_transform = target_transform
        self.resize = resize
        
    def __len__(self):
        return len(self.sentiment)

    def __getitem__(self, idx):
        img_path = os.path.join(f'face_aug_{self.mode}', f'{idx}.jpg')
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB)
        sentiment = self.sentiment[idx] 
        if self.resize:
              image = cv2.resize(image, self.resize) 
       

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            sentiment = self.target_transform(sentiment)
        return {'image':image, 'sentiment':(sentiment)}

In [None]:
def face_aug(mode, augmentation, data_loader, dir):
    mtcnn = MTCNN(select_largest=False, post_process=False, device='cuda:0')
    i = 0
    trans = transforms.ToTensor()
    face_sentiment = []
    if (os.path.isdir(f'face_aug_{mode}')==0):
      os.mkdir(f'face_aug_{mode}')

    for batch in tqdm(data_loader):
      images = batch['image'].numpy()
      sentiments = batch['sentiment'].numpy()
      img_snt = zip(images,sentiments) 

      mean = torch.tensor(np.mean(images))
      std = torch.tensor(np.std(images))
      prime_module = GeneralizedPRIMEModule(
            preprocess=TransformLayer((mean,) ,(std,)),
            mixture_width=3,
            mixture_depth=-1,
            no_jsd=1, max_depth=3,
            aug_module=PRIMEAugModule(augmentation))
      
      for img,snt in img_snt:
        boxes, probs = mtcnn.detect(img,landmarks=False)
        try:
            boxes = np.array(boxes,dtype='uint64')
            for x1,y1,x2,y2 in boxes:
                #face detect
                face = img[y1:y2, x1:x2, :]
                face = cv2.resize(face,(60,60))
                face = trans(face)[None,:].to(device)
                #prime augment
                f = prime_module(face)[0]
                f = ((f-f.min())/(f.max()-f.min()))
                #process to save
                f = np.array(f.permute(1,2,0).cpu())
                f = cv2.resize(f,(40,60))
                f = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
                f = np.array(255*f,dtype='uint64')
                cv2.imwrite(f'face_aug_{mode}/{i}.jpg',f)
                face_sentiment.append(snt)
                i+=1
        except:
              se=5
              
    with open(f"face_aug_{mode}/face_aug_{mode}.txt", 'w') as output:
        for row in face_sentiment:
          output.write(str(row) + '\n')
    os.system(f'zip -r {dir}/face_aug_{mode}.zip face_aug_{mode}')
    os.system(f'rm -r face_aug_{mode}')
    

#### Casting  Augmentation

توضیح در مورد پارامتر های هر فیلتر: فیلتر دیفرم: پارامتر اول میزان دیفرم شدن را کنترل میکند و پارامتر دوم میزان اثر کات مکس را. هرچه کات مکس و کات مین کم و بهم نزدیک باشند اثر این فیلتر بیشتر است.
فیلتر رندم کالر: در این فیلتر پارامتر تی کنترل کننده ی قدرت اعمال فیلتر است و با 0 کردن آن فیلتر بی اثر می شود و تغیر پهنای باند و ضریب کات میزان این تغییر و نویزی بودن آن را کنترل می کند. در کل این فیلتر تم رنگی را تغییر میدهد.
فیلتر رندم فیلتر: این فیلتر تصویر را تار میکند و میزان تار شدن با ضریب سیگما کنترل می شود. پارامتر سایز کرنل نیز اندازه پنچره هایی که روی آن تارشدن اعمال می شود را تعیین می کند

In [None]:
# PRIME Filters
augmentations = []
diffeo = Diffeo(sT=1, rT=1, scut=1, rcut=1, cutmin=100, cutmax=200, alpha=1.0, stochastic=True)
augmentations.append(diffeo)
color = RandomSmoothColor(cut=1, T=0.01,  freq_bandwidth=1 , stochastic=True)
augmentations.append(color)
filt = RandomFilter(kernel_size=7, sigma=0.5, stochastic=True)
augmentations.append(filt)

# Casting Filters
face_aug('train', augmentations, train_dataloader,'/content/drive/MyDrive/Project/Phase1')
face_aug('val', augmentations, val_dataloader,'/content/drive/MyDrive/Project/Phase1')
face_aug('test', augmentations, test_dataloader,'/content/drive/MyDrive/Project/Phase1')

100%|██████████| 159/159 [25:38<00:00,  9.68s/it]
100%|██████████| 40/40 [05:38<00:00,  8.46s/it]
100%|██████████| 40/40 [05:46<00:00,  8.66s/it]


### Part 1-2-2 (Evaluate Model on Augmented Data)

In [None]:
# Building Datasets and Dataloaders
face_aug_train = face_augDataset(mode = 'train', dir='/content/drive/MyDrive/Project/Phase1', transform = ToTensor())
face_aug_val = face_augDataset(mode = 'val', dir='/content/drive/MyDrive/Project/Phase1', transform = ToTensor())
face_aug_test = face_augDataset(mode = 'test', dir='/content/drive/MyDrive/Project/Phase1', transform = ToTensor())

batch_size = 128
face_aug_train_dataloader = DataLoader(face_aug_train, batch_size=batch_size, shuffle=True)
face_aug_val_dataloader = DataLoader(face_aug_val, batch_size=batch_size, shuffle=True)
face_aug_test_dataloader = DataLoader(face_aug_test, batch_size=batch_size, shuffle=True)



best_CNN1_model = torch.load('/content/drive/MyDrive/Project/Phase1/CNN1_model.pth')
best_CNN1_model.eval()
print(f"Pre-Trained CNN result on Augmented Data :")
print("Best Model Accuracy for Train Set:", Accuracy_Loss(face_aug_train_dataloader, best_CNN1_model, loss_fn)[0])  
print("Best Model Accuracy for Validation Set:", Accuracy_Loss(face_aug_val_dataloader, best_CNN1_model, loss_fn)[0])    
print("Best Model Accuracy for Test Set:", Accuracy_Loss(face_aug_test_dataloader, best_CNN1_model, loss_fn)[0])  
torch.save(best_CNN1_model, '/content/drive/MyDrive/Project/Phase1/CNN1_model.pth')     

Pre-Trained CNN result on Augmented Data :


100%|██████████| 211/211 [00:10<00:00, 19.66it/s]


Best Model Accuracy for Train Set: 37.63156915981701


100%|██████████| 48/48 [00:02<00:00, 22.84it/s]


Best Model Accuracy for Validation Set: 36.55913978494624


100%|██████████| 53/53 [00:02<00:00, 22.72it/s]

Best Model Accuracy for Test Set: 36.74321503131524





### Part 1-2-3 (Tuning CNN1 Model with Augmented Data)

In [None]:
# setting hyper-parameters
learning_rate = 1e-4
batch_size = 128
epochs = 20
loss_fn = nn.CrossEntropyLoss()

# model and data defenintion
CNN1_model = torch.load('/content/drive/MyDrive/Project/Phase1/CNN1_model.pth')
CNN1_model.train()
optimizer = torch.optim.Adam(CNN1_model.parameters(), lr=learning_rate)

face_aug_train = face_augDataset(mode = 'train', dir='/content/drive/MyDrive/Project/Phase1', transform = ToTensor())
batch_size = 128
face_aug_train_dataloader = DataLoader(face_aug_train, batch_size=batch_size, shuffle=True)

# lists to save loss and accuracy
train_loss_cnn1 = []
val_loss_cnn1 = []
train_acu_cnn1 = []
val_acu_cnn1 = []

best_acu = 0
for e in range(epochs):
    
    print(f'epoch {e+1}/{epochs}:')

    CNN1_model.train()
    t_loss = train_loop(face_aug_train_dataloader, CNN1_model, loss_fn, optimizer)
    train_loss_cnn1.append(t_loss[1]),train_acu_cnn1.append(t_loss[0])

    CNN1_model.eval()
    v_loss = Accuracy_Loss(face_val_dataloader, CNN1_model, loss_fn)
    val_loss_cnn1.append(v_loss[1]),val_acu_cnn1.append(v_loss[0])

    print(f'train loss:{t_loss[1]:0.4f}    train acc:{t_loss[0]:0.3f} ---- val loss:{v_loss[1]:0.4f}   val acc:{v_loss[0]:0.3f} \n')
    if val_acu_cnn1[e]>best_acu:
        best_acu = val_acu_cnn1[e]
        torch.save(CNN1_model, 'aug_CNN1_model.pth')
        best_epoch = e+1
        
best_CNN1_model = torch.load('aug_CNN1_model.pth')
best_CNN1_model.eval()
print(f"CNN result for epoch {best_epoch}:")
print("Best Model Accuracy for Train Set:", train_acu_cnn1[best_epoch-1])  
print("Best Model Accuracy for Validation Set:",val_acu_cnn1[best_epoch-1])    
print("Best Model Accuracy for Test Set:", Accuracy_Loss(face_test_dataloader, best_CNN1_model, loss_fn)[0])  
torch.save(best_CNN1_model, '/content/drive/MyDrive/Project/Phase1/aug_CNN1_model.pth')     

epoch 1/20:


100%|██████████| 211/211 [00:22<00:00,  9.23it/s]
100%|██████████| 48/48 [00:02<00:00, 22.16it/s]


train loss:1.0899    train acc:38.033 ---- val loss:1.0923   val acc:35.712 

epoch 2/20:


100%|██████████| 211/211 [00:23<00:00,  8.90it/s]
100%|██████████| 48/48 [00:02<00:00, 22.62it/s]


train loss:1.0896    train acc:38.178 ---- val loss:1.0929   val acc:35.582 

epoch 3/20:


100%|██████████| 211/211 [00:23<00:00,  8.81it/s]
100%|██████████| 48/48 [00:02<00:00, 22.77it/s]


train loss:1.0885    train acc:38.606 ---- val loss:1.0943   val acc:34.979 

epoch 4/20:


100%|██████████| 211/211 [00:23<00:00,  8.99it/s]
100%|██████████| 48/48 [00:02<00:00, 18.79it/s]


train loss:1.0883    train acc:38.528 ---- val loss:1.0936   val acc:35.940 

epoch 5/20:


100%|██████████| 211/211 [00:23<00:00,  8.99it/s]
100%|██████████| 48/48 [00:02<00:00, 21.62it/s]


train loss:1.0885    train acc:38.680 ---- val loss:1.0936   val acc:35.419 

epoch 6/20:


100%|██████████| 211/211 [00:23<00:00,  8.92it/s]
100%|██████████| 48/48 [00:02<00:00, 20.04it/s]


train loss:1.0876    train acc:38.547 ---- val loss:1.0964   val acc:35.207 

epoch 7/20:


100%|██████████| 211/211 [00:23<00:00,  8.98it/s]
100%|██████████| 48/48 [00:02<00:00, 18.91it/s]


train loss:1.0870    train acc:38.866 ---- val loss:1.0945   val acc:35.891 

epoch 8/20:


100%|██████████| 211/211 [00:23<00:00,  8.97it/s]
100%|██████████| 48/48 [00:02<00:00, 19.17it/s]


train loss:1.0856    train acc:39.183 ---- val loss:1.0935   val acc:36.054 

epoch 9/20:


100%|██████████| 211/211 [00:23<00:00,  8.96it/s]
100%|██████████| 48/48 [00:02<00:00, 20.27it/s]


train loss:1.0855    train acc:39.000 ---- val loss:1.0961   val acc:35.288 

epoch 10/20:


100%|██████████| 211/211 [00:23<00:00,  8.99it/s]
100%|██████████| 48/48 [00:02<00:00, 23.04it/s]


train loss:1.0835    train acc:39.387 ---- val loss:1.0979   val acc:35.011 

epoch 11/20:


100%|██████████| 211/211 [00:23<00:00,  8.96it/s]
100%|██████████| 48/48 [00:02<00:00, 22.98it/s]


train loss:1.0838    train acc:39.588 ---- val loss:1.0953   val acc:36.950 

epoch 12/20:


100%|██████████| 211/211 [00:23<00:00,  8.90it/s]
100%|██████████| 48/48 [00:02<00:00, 22.68it/s]


train loss:1.0818    train acc:39.711 ---- val loss:1.1010   val acc:34.604 

epoch 13/20:


100%|██████████| 211/211 [00:23<00:00,  8.98it/s]
100%|██████████| 48/48 [00:02<00:00, 22.96it/s]


train loss:1.0811    train acc:39.774 ---- val loss:1.0958   val acc:36.201 

epoch 14/20:


100%|██████████| 211/211 [00:23<00:00,  8.98it/s]
100%|██████████| 48/48 [00:02<00:00, 20.55it/s]


train loss:1.0783    train acc:40.607 ---- val loss:1.0995   val acc:36.136 

epoch 15/20:


100%|██████████| 211/211 [00:23<00:00,  8.92it/s]
100%|██████████| 48/48 [00:02<00:00, 22.97it/s]


train loss:1.0783    train acc:40.250 ---- val loss:1.1011   val acc:36.755 

epoch 16/20:


100%|██████████| 211/211 [00:23<00:00,  8.91it/s]
100%|██████████| 48/48 [00:02<00:00, 20.00it/s]


train loss:1.0770    train acc:40.510 ---- val loss:1.0985   val acc:36.266 

epoch 17/20:


100%|██████████| 211/211 [00:23<00:00,  8.92it/s]
100%|██████████| 48/48 [00:02<00:00, 18.97it/s]


train loss:1.0746    train acc:41.020 ---- val loss:1.1010   val acc:35.630 

epoch 18/20:


100%|██████████| 211/211 [00:23<00:00,  8.96it/s]
100%|██████████| 48/48 [00:02<00:00, 18.76it/s]


train loss:1.0746    train acc:41.183 ---- val loss:1.0979   val acc:36.527 

epoch 19/20:


100%|██████████| 211/211 [00:23<00:00,  8.98it/s]
100%|██████████| 48/48 [00:02<00:00, 19.86it/s]


train loss:1.0703    train acc:41.674 ---- val loss:1.1030   val acc:35.777 

epoch 20/20:


100%|██████████| 211/211 [00:23<00:00,  8.91it/s]
100%|██████████| 48/48 [00:02<00:00, 22.02it/s]


train loss:1.0667    train acc:42.348 ---- val loss:1.1056   val acc:36.152 

CNN result for epoch 11:
Best Model Accuracy for Train Set: 39.587904935470675
Best Model Accuracy for Validation Set: 36.950146627565985


100%|██████████| 53/53 [00:04<00:00, 11.63it/s]


Best Model Accuracy for Test Set: 33.596779003877124


همانطور که مشاهده میشود این اگمنتیشن تاثیر به سزایی در بهبود دقت ندارد،زیرا احساس از چهره ها استخراج میشود و با این کشیدگی در چهره و تغییراتی نظیر این دقت محدود شبکه را نیز دسخوش تغییر بیشتر کرده ایم.

# Part 2


In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Linear(32, 8),
            nn.ReLU(),
            nn.Linear(8, 3),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
train_data = MSCTDDataset(img_dir = '/content/drive/MyDrive/Project/Phase0', mode = 'train', resize = (640,316), transform = ToTensor())
val_data = MSCTDDataset(img_dir = '/content/drive/MyDrive/Project/Phase0', mode = 'dev', resize = (640,316), transform = ToTensor())
test_data = MSCTDDataset(img_dir = '/content/drive/MyDrive/Project/Phase0', mode = 'test',  resize = (640,316), transform = ToTensor())

batch_size=128
train_dataloader =  DataLoader(train_data, batch_size=batch_size)
val_dataloader =  DataLoader(val_data, batch_size=batch_size)
test_dataloader =  DataLoader(test_data, batch_size=batch_size)


Loading Text Files
Loading Train Images
Train Images Count: 20244

Loading Text Files
Loading Validation Images
Dev Images Count: 5066

Loading Text Files
Loading Test Images
Test Images Count: 5070


In [None]:
# backbone model settings
backbone = resnet50(weights=ResNet50_Weights.DEFAULT)
for param in backbone.parameters():
      param.requires_grad = False

backbone.fc = nn.Linear(2048, 1024)
Resnet50_model = nn.Sequential(backbone,MyModel()).to(device)

# setting hyper-parameters
learning_rate = 1e-4
batch_size = 128
epochs = 10
loss_fn = nn.CrossEntropyLoss()

# model and optimizer defenintion
Resnet50_model.train()
optimizer = torch.optim.Adam(Resnet50_model.parameters(), lr=learning_rate)

# lists to save loss and accuracy
train_loss_cnn1 = []
val_loss_cnn1 = []
train_acu_cnn1 = []
val_acu_cnn1 = []

best_acu = 0
for e in (range(epochs)):
    
    print(f'epoch {e+1}/{epochs}:')
    Resnet50_model.train()
    t_loss = train_loop(train_dataloader, Resnet50_model, loss_fn, optimizer)
    
    train_loss_cnn1.append(t_loss[1]),train_acu_cnn1.append(t_loss[0])

    v_loss = Accuracy_Loss(val_dataloader, Resnet50_model, loss_fn)
    val_loss_cnn1.append(v_loss[1]),val_acu_cnn1.append(v_loss[0])

    print(f'train loss:{t_loss[1]:0.4f}    train acc:{t_loss[0]:0.3f} ---- val loss:{v_loss[1]:0.4f}   val acc:{v_loss[0]:0.3f} \n')
    if val_acu_cnn1[e]>best_acu:
        best_acu = val_acu_cnn1[e]
        torch.save(Resnet50_model, 'Resnet50_model.pth')
        best_epoch = e+1
        
best_Resnet50_model = torch.load('Resnet50_model.pth')
best_Resnet50_model.eval()
print(f"Resnet50 result for epoch {best_epoch}:")
print("Best Model Accuracy for Train Set:", train_acu_cnn1[best_epoch-1])  
print("Best Model Accuracy for Validation Set:",val_acu_cnn1[best_epoch-1])  
print("Best Model Accuracy for Test Set:", Accuracy_Loss(test_dataloader, best_Resnet50_model, loss_fn)[0])  

torch.save(best_Resnet50_model, '/content/drive/MyDrive/Project/Phase1/Resnet50_model2.pth')  


epoch 1/10:


100%|██████████| 159/159 [09:25<00:00,  3.56s/it]
100%|██████████| 40/40 [02:15<00:00,  3.40s/it]


train loss:1.0896    train acc:38.696 ---- val loss:1.0984   val acc:36.303 

epoch 2/10:


100%|██████████| 159/159 [09:20<00:00,  3.53s/it]
100%|██████████| 40/40 [02:13<00:00,  3.33s/it]


train loss:1.0897    train acc:38.710 ---- val loss:1.0968   val acc:36.303 

epoch 3/10:


100%|██████████| 159/159 [09:15<00:00,  3.49s/it]
100%|██████████| 40/40 [02:11<00:00,  3.28s/it]


train loss:1.0894    train acc:38.710 ---- val loss:1.0966   val acc:36.303 

epoch 4/10:


100%|██████████| 159/159 [09:15<00:00,  3.50s/it]
100%|██████████| 40/40 [02:11<00:00,  3.28s/it]


train loss:1.0891    train acc:38.631 ---- val loss:1.0966   val acc:36.303 

epoch 5/10:


100%|██████████| 159/159 [09:19<00:00,  3.52s/it]
100%|██████████| 40/40 [02:14<00:00,  3.37s/it]


train loss:1.0889    train acc:38.523 ---- val loss:1.0968   val acc:36.303 

epoch 6/10:


100%|██████████| 159/159 [09:15<00:00,  3.49s/it]
100%|██████████| 40/40 [02:13<00:00,  3.34s/it]


train loss:1.0888    train acc:38.340 ---- val loss:1.0970   val acc:36.303 

epoch 7/10:


100%|██████████| 159/159 [09:11<00:00,  3.47s/it]
100%|██████████| 40/40 [02:14<00:00,  3.37s/it]


train loss:1.0883    train acc:38.458 ---- val loss:1.0975   val acc:36.303 

epoch 8/10:


100%|██████████| 159/159 [09:13<00:00,  3.48s/it]
100%|██████████| 40/40 [02:11<00:00,  3.28s/it]


train loss:1.0880    train acc:38.286 ---- val loss:1.0983   val acc:36.303 

epoch 9/10:


100%|██████████| 159/159 [09:13<00:00,  3.48s/it]
100%|██████████| 40/40 [02:11<00:00,  3.30s/it]


train loss:1.0876    train acc:38.523 ---- val loss:1.0986   val acc:36.303 

epoch 10/10:


100%|██████████| 159/159 [09:12<00:00,  3.48s/it]
100%|██████████| 40/40 [02:11<00:00,  3.30s/it]


train loss:1.0874    train acc:38.725 ---- val loss:1.0980   val acc:36.303 

Resnet50 result for epoch 1:
Best Model Accuracy for Train Set: 38.69565217391304
Best Model Accuracy for Validation Set: 36.30258739877543


100%|██████████| 40/40 [02:08<00:00,  3.22s/it]


Best Model Accuracy for Test Set: 42.68798105387803


دقت این شبکه بهتر است، اما با مشاهده خروجی مشاهده میشود که این شبکه برای بخش عمده ای از ورودی ، خروجی 1 تولید میکند و از آنجایی که بیشتر داده تست ما 1 هستند، دقت بهتر شده پس بهتر است در بخش بعد انرا با مدل بخش قبل ترکیب کرد.

# Part3

In [None]:
def final_merge_model(data_loader, CNN_model, Face_model, resnet_model):
    i = 0
    acu = 0
    size = len(data_loader.dataset)
    trans = transforms.ToTensor()
    for batch in tqdm(data_loader):
        y_pred = []
        images = batch['image'].numpy()
        sentiments = batch['sentiment'].numpy()
        X3 = batch['image']
        img_snt = zip(images,sentiments,X3) 
        
      
        for img,snt,X3 in img_snt:
          face_img = []
          final_labels = []
          boxes, probs = Face_model.detect(img,landmarks=False)
          try:
              boxes = np.array(boxes,dtype='uint64')
              for x1,y1,x2,y2 in boxes:
                  face = img[y1:y2, x1:x2, :]
                  face = cv2.resize(face,(40,60))
                  face_img.append(face)
                  X2 = trans(face).to(device)
                  x,y,z = X2.shape
                  X2 = X2.resize(1,x,y,z)
                  pred = CNN_model(X2)
                  label = pred.argmax(1).cpu().numpy()
                  label = label[0]
                  final_labels.append(label)
                  
              if(len(boxes)>1):
                  X3 = trans(X3.numpy()).to(device)
                  x,y,z = X3.shape
                  X3 = X3.resize(1,x,y,z)
                  logit2 = resnet_model(X3)
                  label = logit2.argmax(1).cpu().numpy()
                  label = label[0]
                  final_labels.append(label)
                  final_labels.append(label)
              
              label = np.bincount(final_labels).argmax()
              y_pred.append(label)
          except:
                X3 = trans(X3.numpy()).to(device)
                x,y,z = X3.shape
                X3 = X3.resize(1,x,y,z)
                logit2 = resnet_model(X3)
                label = logit2.argmax(1).cpu().numpy()
                label = label[0]
                y_pred.append(label)

        acu += np.count_nonzero(np.equal(sentiments,y_pred))
    return acu/size

In [None]:
val_data = MSCTDDataset(img_dir = '/content/drive/MyDrive/Project/Phase0', mode = 'val',  resize = (640,316))
test_data = MSCTDDataset(img_dir = '/content/drive/MyDrive/Project/Phase0', mode = 'test',  resize = (640,316))
batch_size = 128
val_dataloader =  DataLoader(val_data, batch_size=batch_size)
test_dataloader =  DataLoader(test_data, batch_size=batch_size)

best_CNN1_model = torch.load('/content/drive/MyDrive/Project/Phase1/CNN1_model.pth')
best_resnet_model = torch.load('/content/drive/MyDrive/Project/Phase1/Resnet50_model2.pth')
mtcnn = MTCNN(select_largest=False, post_process=False, device='cuda:0')
final_merge_model(test_dataloader,best_CNN1_model,mtcnn,best_resnet_model)


Loading Text Files

Loading Text Files
Loading Test Images
Test Images Count: 5070


100%|██████████| 40/40 [04:59<00:00,  7.49s/it]


0.40299980264456287

برای تلفیق دو مدل اینگونه عمل میکنیم که اگر تصویر تنها یک چهره داشت از مدل بخش 1 استفاده میکنیم-اگرتعداد چهره ها بیش از یکی بود علاوه بر برچسب های حاصل از مدل 1 دو برچسب هم از مدل2 در نظر میگیریم و سپس رای گیری انجام میدهیم، در صورتی هم که چهره ای در تصویر نبود خروجی تمامی وابسته به مدل 2 می باشد.