In [1]:
import cv2
import math
import numpy as np
import torch.utils.data as data
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import os,torch
import cv2
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from torchvision import models
import argparse
import torchfile
from PIL import Image
from torchvision import datasets
from vggmodel.vggface import VGGFace
import random

In [2]:
class EmotiWDataset(Dataset):
    
    def __init__(self, image_filelist, face_filelist, maxFaces, transformFaces = transforms):
        """
        Args:
            filelist: List of names of image/feature files.
            root_dir: Dataset directory
            transform (callable, optional): Optional transformer to be applied
                                            on an image sample.
        """
        
        self.image_filelist = image_filelist
        self.face_filelist = face_filelist
        self.transformFaces = transformFaces
        
        neg_filelist = sorted(os.listdir(image_filelist + 'Negative/'))
        neu_filelist = sorted(os.listdir(image_filelist + 'Neutral/'))
        pos_filelist = sorted(os.listdir(image_filelist + 'Positive/'))
        
        all_filelist = neg_filelist + neu_filelist + pos_filelist
        
        self.name_filelist = [x.split('.')[0] for x in all_filelist]

        self.label = []
        neg_label = np.array(np.zeros(len(neg_filelist)),dtype = np.int64)
        neu_label = np.array(np.ones(len(neu_filelist)),dtype = np.int64)
        pos_label = np.array(2*np.ones(len(pos_filelist)),dtype = np.int64)
        
        self.label.extend(neg_label)
        self.label.extend(neu_label)
        self.label.extend(pos_label)
        
        self.file_paths = []
        
        for f in neg_filelist:
            path = os.path.join(self.image_filelist,'Negative/',f)
            self.file_paths.append(path)
        for f in neu_filelist:
            path = os.path.join(self.image_filelist,'Neutral/',f)
            self.file_paths.append(path)
        for f in pos_filelist:
            path = os.path.join(self.image_filelist,'Positive/',f)
            self.file_paths.append(path)       

        neg_face_path = []
        neu_face_path = []
        pos_face_path = []
        
        self.all_face_path = []
        
        neg_path_filelist = [x.split('.')[0] for x in neg_filelist]
        neu_path_filelist = [x.split('.')[0] for x in neu_filelist]
        pos_path_filelist = [x.split('.')[0] for x in pos_filelist]

        for f in neg_path_filelist:      
            path = os.path.join(face_filelist,'Negative/',f)
            neg_face_path.append(path)    
        for f in neu_path_filelist:      
            path = os.path.join(face_filelist,'Neutral/',f)
            neu_face_path.append(path)    
        for f in pos_path_filelist:      
            path = os.path.join(face_filelist,'Positive/',f)
            pos_face_path.append(path)                
        
        self.all_face_path = neg_face_path + neu_face_path + pos_face_path
        self.neg_face_path = neg_face_path
        self.neu_face_path = neu_face_path
        self.pos_face_path = pos_face_path
        
        self.maxFaces = maxFaces
        self.Extract_Index = np.array(np.zeros(len(self.all_face_path)),dtype = np.int64)
        self.Individual_Label = [[0 for i in range(self.maxFaces+2)] for i in range(len(self.label))]
        for i in range(len(self.label)):
            self.Individual_Label[i] = self.label[i] + self.Individual_Label[i]
                
        
        
    def __len__(self):
        return (len(self.file_paths)) 
 
    def __getitem__(self, idx):
        
        img_path = self.file_paths[idx]
        g_f = np.load(img_path)
        global_feature = g_f['global_feature']
        individual_label = self.Individual_Label[idx]
        maxFaces = self.maxFaces
        #CROPPED FACE IMAGES
        individual_faces = np.zeros((maxFaces + 2, 3, 224, 224), dtype = 'float32')
        individual_label = self.Individual_Label[idx]
        
        counter = 0
        for i in range(maxFaces):
            face_path = self.all_face_path[idx] + '_' + str(i) + '.jpg'            
            if os.path.exists(face_path) is False:
                break
            face = pil_loader(face_path)
            counter = counter + 1
                        
            if self.transformFaces:
                face = self.transformFaces(face)
                
            individual_faces[i] = face
            
        label = self.label[idx]
        numberFaces = counter
        
        len_neg = len(self.neg_face_path)
        len_neu = len(self.neu_face_path)
        len_pos = len(self.pos_face_path)
        len_all = len(self.all_face_path)
        if numberFaces != 0:
            if label == 0:
                low_bound1 = len_neg
                up_bound1= len_neu - 1 + len_neg
                low_bound2 = len_neu + len_neg
                up_bound2= len_all-1
                individual_label[numberFaces] = 1
                individual_label[numberFaces+1] = 2
            elif label == 1:
                low_bound1 = 0
                up_bound1= len_neg-1
                low_bound2 = len_neg + len_neu
                up_bound2= len_all-1 
                individual_label[numberFaces] = 0
                individual_label[numberFaces+1] = 2
            elif label == 2:
                low_bound1 = 0
                up_bound1= len_neg-1
                low_bound2 = len_neg
                up_bound2= len_neg + len_neu - 1                  
                individual_label[numberFaces] = 0
                individual_label[numberFaces+1] = 1
            while(True):
                temp_ind1 = random.randint(low_bound1,up_bound1)
                face_path = self.all_face_path[temp_ind1] + '_' + str(self.Extract_Index[temp_ind1]) + '.jpg'
                if os.path.exists(face_path):
                    face = pil_loader(face_path)
                    break
                else:
                    continue

                
            if self.transformFaces:
                face = self.transformFaces(face)
                
            individual_faces[numberFaces] = face
  
            while(True):
                temp_ind2 = random.randint(low_bound2,up_bound2)
                face_path = self.all_face_path[temp_ind2] + '_' + str(self.Extract_Index[temp_ind2]) + '.jpg'
                if os.path.exists(face_path):
                    face = pil_loader(face_path)
                    break
                else:
                    continue
                    
                
            if self.transformFaces:
                face = self.transformFaces(face)
                
            individual_faces[numberFaces+1] = face

        #SAMPLE
        return global_feature, individual_faces, individual_label, label, numberFaces, idx
    
def pil_loader(path):    # 一般采用pil_loader函数。
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

EPOCH = 50   


parser = argparse.ArgumentParser(description='PyTorch GAF_2 Training')
parser.add_argument('--outf', default='./model/', help='folder to output images and model checkpoints') 
args = parser.parse_known_args()[0]


classes = ('Negative', 'Neutral', 'Positive')


train_faces_data_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomRotation(20),
        transforms.RandomHorizontalFlip(), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5115, 0.3799, 0.3297], std=[0.1825, 0.1584, 0.1492]),

    ])

val_faces_data_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5115, 0.3799, 0.3297], std=[0.1825, 0.1584, 0.1492]),

    ])

test_faces_data_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5115, 0.3799, 0.3297], std=[0.1825, 0.1584, 0.1492]),
    ])

train_dataset = EmotiWDataset(image_filelist='../GAF_2_global_features/GAF_2_Train_800/', face_filelist='../CroppedFaces_image_low_quality/GAF_2_Train/',maxFaces = 16,transformFaces=train_faces_data_transform)

trainloader = DataLoader(train_dataset, shuffle=True, batch_size=4, num_workers=2, pin_memory=True)

val_dataset = EmotiWDataset(image_filelist='../GAF_2_global_features/GAF_2_Val_800/', face_filelist='../CroppedFaces_image_low_quality/GAF_2_Val/',maxFaces = 16, transformFaces=val_faces_data_transform)

validationloader = DataLoader(val_dataset, shuffle =False, batch_size = 256, num_workers = 2, pin_memory=True)


In [4]:
class Net(nn.Module):
    def __init__(self, model_1):
        super(Net, self).__init__()
        self.n_heads = 1
        self.features = model_1.features
        self.fc1_layer = nn.Sequential(model_1.fc.fc6, nn.ReLU(), nn.Dropout(0.5))
        self.fc2_layer = nn.Sequential(model_1.fc.fc7, nn.ReLU(), nn.Dropout(0.5))          
        self.output_layer = nn.Sequential(model_1.fc.fc8)                
        
        # Self_fusion
        self.Self_Fusion_layer = Self_Fusion_layer(imgf_dim = 1024, facef_dim = 4096)
    
    def forward(self, x1, x2):
        for k, layer in self.features.items():
            x1 = layer(x1)
        x1 = x1.view(x1.size(0), -1)
        weights = torch.zeros(self.n_heads,x1.shape[0]).requires_grad_(requires_grad=True).cuda()
        x1 = self.fc1_layer(x1)
        x1 = self.fc2_layer(x1)
        attention_weights = self.Self_Fusion_layer(x1,x2)
        face_out = self.output_layer(x1) 
        return attention_weights, face_out 

In [5]:
class Self_Fusion_layer(nn.Module):
    def __init__(self, imgf_dim, facef_dim):
        super(Self_Fusion_layer, self).__init__()
        # Self_Attention
        self.face_fc_layer = nn.Sequential(nn.Linear(in_features=facef_dim, out_features=128), nn.ReLU())
        self.img_fc_layer = nn.Sequential(nn.Linear(in_features=imgf_dim, out_features=128), nn.ReLU())
        
    def forward(self,x1,x2):
        x1 = self.face_fc_layer(x1)
        x2 = self.img_fc_layer(x2)
        x2 = x2.repeat(x1.shape[0],1)
        attention_weights = torch.cosine_similarity(x1,x2)
        return attention_weights

In [6]:
# Loading model
vggface = VGGFace()
vggface.load_state_dict(torch.load('pretrained/vggface.pth'))

fc8_features = vggface.fc.fc8.in_features
vggface.fc.fc8 = nn.Linear(fc8_features, 3)
net = Net(vggface)
net = net.to(device)

In [7]:
criterion = nn.CrossEntropyLoss()  #损失函数为交叉熵，多用于多分类问题
base_params = list(map(id, net.features.parameters()))
logits_params = filter(lambda p: id(p) not in base_params, net.parameters())


params = [{'params': logits_params, 'lr':1e-5},
            {'params': net.features.parameters(), 'lr':1e-5}]

optimizer = optim.Adam(params,weight_decay = 1e-4)

scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)

margin_1 = 0.8
margin_2 = 0.6
relabel_epoch = 10
# Training
if __name__ == "__main__":
    best_acc = 77 
    print("Start Training, FacesNet!")
    with open("FacesNet_GAF_2_acc.txt", "w") as f:
        with open("FacesNet_GAF_2_log.txt", "w")as f2:
            for epoch in range(EPOCH):
                print('\nEpoch: %d' % (epoch + 1))
                net.train()
                sum_loss = 0.0
                correct = 0.0
                total = 0.0
                sum_group_loss = 0.0
                sum_rank_loss = 0.0
                sum_loss_CL = 0.0
                sum_individual_loss = 0.0
                
                for i, data in enumerate(trainloader, 0):
                    torch.cuda.empty_cache()
                    # 准备数据
                    length = len(trainloader)
                    
                    global_feature, individual_faces, individual_label, labels, numberFaces, index = data
                    
                    ind = np.where(numberFaces==0)
                    clear_labels = np.delete(labels, ind)
                    
                    global_feature, individual_faces, labels, clear_labels = global_feature.to(device),individual_faces.to(device), labels.to(device), clear_labels.to(device)
                    individual_label = individual_label.to(device)
                    optimizer.zero_grad()

                    # forward + backward
                    group_outputs = torch.zeros(np.count_nonzero(numberFaces),3).requires_grad_(requires_grad=True).to(device)
                    loss_GCE = torch.zeros(np.count_nonzero(numberFaces)).requires_grad_(requires_grad=True).to(device)
                    loss_CL = torch.zeros(np.count_nonzero(numberFaces)).requires_grad_(requires_grad=True).to(device)
                    loss_ICE = torch.zeros(np.count_nonzero(numberFaces)).requires_grad_(requires_grad=True).to(device)
                    count = 0
                    
                    if sum(numberFaces) == 0:
                        continue
                    
                    for j in range(labels.shape[0]):          
                        if numberFaces[j] == 0:
                            continue
                            
                        attention_weights, outputs = net(individual_faces[j,0:numberFaces[j]+2],global_feature[j])  
                        face_norm = outputs

                        loss_ICE[count] = criterion(outputs[:-2],individual_label[j][0:numberFaces[j]])
                        
                        # Contrastive Learning 
                        if numberFaces[j] > 1:
                            temp_range = range(numberFaces[j]+2)
                            temp_range1 = range(numberFaces[j])
                            top_idx = torch.where(individual_label[j][temp_range]==labels[j])
                            down_idx = torch.where(individual_label[j][temp_range]!=labels[j])
                            high_group = attention_weights[top_idx]
                            low_group = attention_weights[down_idx]
                            high_mean = torch.mean(high_group)
                            low_mean = torch.mean(low_group)
                            diff  = low_mean - high_mean + margin_1
                          
                            weights_sum = attention_weights[:-2].sum(dim=0) + 1e-9
                            if weights_sum < 1:
                                weights_sum = 1
                        
                            weights_norm = 1/weights_sum * attention_weights[:-2]
                            
                            group_outputs[count] = torch.matmul(weights_norm[temp_range1].T, face_norm[temp_range1])
                            outputs_norm = F.softmax(outputs[temp_range1],dim=1)
                            _ ,top1_idx = torch.topk(outputs_norm[:,labels[j]].squeeze(),1, largest = True)
                            trainloader.dataset.Extract_Index[index[j]] = top1_idx
                            _, predicted = torch.max(outputs[:-2].data, 1)
                                
                            # Relabel samples
                            if epoch + 1 >= relabel_epoch:
                                sm = torch.softmax(outputs[temp_range1], dim = 1)
                                Pmax, predicted_labels = torch.max(sm, 1) # predictions
                                Pgt = torch.gather(sm, 1, individual_label[j][temp_range1].view(-1,1)).squeeze() # retrieve predicted probabilities of targets
                                true_or_false = (Pmax - Pgt > margin_2)&(labels[j] != 1).to(device)
                                update_idx = true_or_false.nonzero().squeeze() # get samples' index in this mini-batch where (Pmax - Pgt > margin_2)

                                if update_idx.numel():
                                    relabels = predicted_labels[update_idx] # predictions where (Pmax - Pgt > margin_2)
                                    trainloader.dataset.Individual_Label[index[j]][update_idx.cpu()] = relabels.cpu()

                        else:
                            group_outputs[count] = face_norm[:-2]
                            low_group = attention_weights[-2:-1]
                            high_mean = attention_weights[0]
                            low_mean = torch.mean(low_group)
                            diff  = low_mean - high_mean + margin_1

                        if diff > 0:
                             loss_CL[count] = diff
                        else:
                             loss_CL[count] = 0.0
                        
                        count += 1
                        
                    loss_GCE = criterion(group_outputs, clear_labels)
                    loss = loss_GCE + loss_CL.mean(dim=0) + loss_ICE.mean(dim=0)
                    loss.backward()
                    optimizer.step()
                    
                    # 每训练1个batch打印一次loss和准确率
                    sum_loss += loss.item()
                    sum_group_loss += loss_GCE.item()
                    sum_loss_CL += loss_CL.mean(dim=0).item()
                    sum_individual_loss += loss_ICE.mean(dim=0).item()
                    _, predicted = torch.max(group_outputs.data, 1)
                    total += clear_labels.size(0)
                    correct += predicted.eq(clear_labels.data).cpu().sum()
                    print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                          % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('%03d  %05d |Loss: %.03f | Acc: %.3f%% | Group Loss: %.3f | Individual Loss: %.3f | Contrastive Loss: %.3f '
                          % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total, sum_group_loss/ (i + 1), sum_individual_loss/ (i + 1), sum_loss_CL/ (i + 1)))
                    f2.write('\n')
                    f2.flush()
  
                scheduler.step()
                torch.cuda.empty_cache()
                # 每训练完一个epoch测试一下准确率
                print("Waiting Test!")
                with torch.no_grad():
                    correct = 0
                    total = 0
                    class_correct = list(0. for j in range(3)) # 定义一个存储每类中测试正确的个数的 列表，初始化为0
                    class_total = list(0. for j in range(3))   # 定义一个存储每类中测试总数的个数的 列表，初始化为0
                    for data in validationloader:
                        net.eval()
                        
                        global_feature, individual_faces, individual_label, labels, numberFaces, index = data
                        
                        ind = np.where(numberFaces==0)
                        clear_labels = np.delete(labels, ind).int()
                        
                        global_feature, individual_faces, labels, clear_labels = global_feature.to(device),individual_faces.to(device), labels.to(device), clear_labels.to(device)

                        
                        # forward + backward
                        group_outputs = torch.zeros(np.count_nonzero(numberFaces),3).requires_grad_(requires_grad=True).to(device)
                        
                        count = 0
                        for j in range(labels.shape[0]):          
                            if numberFaces[j] == 0:
                                continue
                            label = labels[j]*torch.ones(numberFaces[j]).to(device)
                            attention_weights, outputs = net(individual_faces[j,0:numberFaces[j]],global_feature[j])
                            face_norm = outputs
                            weights_sum = attention_weights.sum(dim=0) + 1e-9# 防止分母为零
                            weights_norm = 1/weights_sum * attention_weights
                            group_outputs[count] = torch.matmul(weights_norm.T, face_norm)
                            count += 1                        
                        
                        torch.cuda.empty_cache()
                        _, predicted = torch.max(group_outputs.data, 1)
                        c = (predicted == clear_labels).squeeze() 
                        total += labels.size(0)
                        correct += (predicted == clear_labels).sum()                       
                        if  clear_labels.shape[0] == 1:
                            class_correct[clear_labels] += c
                        else:
                            for j in range(clear_labels.shape[0]):      # 因为每个batch都有多张图片，所以还需要一个小循环
                                label = clear_labels[j]   # 对各个类的进行各自累加
                                class_correct[label] += c[j]
                                class_total[label] += 1    
                        
                    for j in range(3):
                        print('Accuracy of %5s : %2d %%' % (
                                classes[j], 100 * class_correct[j] // class_total[j]))
                    print('Accuracy on val set：%.3f%%' % (100 * correct // total))
                    acc = 100. * correct / total
                    f.write("EPOCH=%03d,Accuracy= %.3f%%" % (epoch + 1, acc))
                    f.write('\n')
                    f.flush()
                    if acc > best_acc:
                        print('Saving model......')
                        #torch.save(net.state_dict(), '%snet_%03d.pth' % (args.outf, epoch + 1))
                        torch.save(net.state_dict(), 'GAF_2_best_net_face.pth')
                        f3 = open("FacesNet_GAF_2_Self_Fusion_best_acc.txt", "w")
                        f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, acc))
                        f3.close()
                        best_acc = acc

            print("Training Finished, TotalEPOCH=%d" % EPOCH)

Start Training, FacesNet!

Epoch: 1
[epoch:1, iter:1] Loss: 2.962 | Acc: 100.000% 
[epoch:1, iter:2] Loss: 2.991 | Acc: 50.000% 
[epoch:1, iter:3] Loss: 2.991 | Acc: 41.667% 
[epoch:1, iter:4] Loss: 3.010 | Acc: 37.500% 
[epoch:1, iter:5] Loss: 3.009 | Acc: 40.000% 
[epoch:1, iter:6] Loss: 3.006 | Acc: 37.500% 
[epoch:1, iter:7] Loss: 3.003 | Acc: 39.286% 
[epoch:1, iter:8] Loss: 3.002 | Acc: 40.625% 
[epoch:1, iter:9] Loss: 2.999 | Acc: 38.889% 
[epoch:1, iter:10] Loss: 3.006 | Acc: 35.000% 
[epoch:1, iter:11] Loss: 3.001 | Acc: 36.364% 
[epoch:1, iter:12] Loss: 2.994 | Acc: 37.500% 
[epoch:1, iter:13] Loss: 2.990 | Acc: 40.385% 
[epoch:1, iter:14] Loss: 2.992 | Acc: 41.071% 
[epoch:1, iter:15] Loss: 2.989 | Acc: 43.333% 
[epoch:1, iter:16] Loss: 2.988 | Acc: 43.750% 
[epoch:1, iter:17] Loss: 2.988 | Acc: 45.588% 
[epoch:1, iter:18] Loss: 2.987 | Acc: 45.833% 
[epoch:1, iter:19] Loss: 2.983 | Acc: 44.737% 
[epoch:1, iter:20] Loss: 2.984 | Acc: 45.000% 
[epoch:1, iter:21] Loss: 2.985 |

KeyboardInterrupt: 