In [1]:
import cv2
import math
import numpy as np
import torch.utils.data as data
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import os,torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from torchvision import models
import argparse
import torchfile
from PIL import Image
from torchvision import datasets
import random
import sys

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class EmotiWDataset(Dataset):
    
    def __init__(self, image_filelist, face_filelist, coordinates_filelist, maxFaces):
        """
        Args:
            filelist: List of names of image/feature files.
            root_dir: Dataset directory
            transform (callable, optional): Optional transformer to be applied
                                            on an image sample.
        """
        
        self.image_filelist = image_filelist
        self.face_filelist = face_filelist
        
        neg_filelist = sorted(os.listdir(image_filelist + 'Negative/'))
        neu_filelist = sorted(os.listdir(image_filelist + 'Neutral/'))
        pos_filelist = sorted(os.listdir(image_filelist + 'Positive/'))
        
        all_filelist = neg_filelist + neu_filelist + pos_filelist
        
        self.name_filelist = [x.split('.')[0] for x in all_filelist]

        self.label = []
        neg_label = np.array(np.zeros(len(neg_filelist)),dtype = np.int64)
        neu_label = np.array(np.ones(len(neu_filelist)),dtype = np.int64)
        pos_label = np.array(2*np.ones(len(pos_filelist)),dtype = np.int64)
        
        self.label.extend(neg_label)
        self.label.extend(neu_label)
        self.label.extend(pos_label)
        
        self.file_paths = []
        
        for f in neg_filelist:
            path = os.path.join(self.image_filelist,'Negative/',f)
            self.file_paths.append(path)
        for f in neu_filelist:
            path = os.path.join(self.image_filelist,'Neutral/',f)
            self.file_paths.append(path)
        for f in pos_filelist:
            path = os.path.join(self.image_filelist,'Positive/',f)
            self.file_paths.append(path)       

        neg_face_path = []
        neu_face_path = []
        pos_face_path = []
        
        self.all_face_path = []
        
        neg_path_filelist = [x.split('.')[0] for x in neg_filelist]
        neu_path_filelist = [x.split('.')[0] for x in neu_filelist]
        pos_path_filelist = [x.split('.')[0] for x in pos_filelist]

        for f in neg_path_filelist:      
            path = os.path.join(face_filelist,'Negative/',f)
            neg_face_path.append(path)    
        for f in neu_path_filelist:      
            path = os.path.join(face_filelist,'Neutral/',f)
            neu_face_path.append(path)    
        for f in pos_path_filelist:      
            path = os.path.join(face_filelist,'Positive/',f)
            pos_face_path.append(path)                
        
        self.all_face_path = neg_face_path + neu_face_path + pos_face_path
        
        self.maxFaces = maxFaces

        neg_coordinates_path = []
        neu_coordinates_path = []
        pos_coordinates_path = []
        self.all_coordinates_path = []
        for f in neg_path_filelist:      
            path = os.path.join(coordinates_filelist,'Negative/',f)
            neg_coordinates_path.append(path)    
        for f in neu_path_filelist:      
            path = os.path.join(coordinates_filelist,'Neutral/',f)
            neu_coordinates_path.append(path)    
        for f in pos_path_filelist:      
            path = os.path.join(coordinates_filelist,'Positive/',f)
            pos_coordinates_path.append(path)          
            
        self.all_coordinates_path = neg_coordinates_path + neu_coordinates_path + pos_coordinates_path        
        
    def __len__(self):
        return (len(self.file_paths)) 
 
    def __getitem__(self, idx):
        
        maxFaces = self.maxFaces
        #CROPPED FACE IMAGES
        face_features = np.zeros((maxFaces, 4096), dtype = 'float32')
        faces_coordinate = np.zeros((maxFaces,4), dtype = 'float32')
        
        counter = 0
        for i in range(maxFaces):
            face_path = self.all_face_path[idx] + '_' + str(i) + '.npz'  
            if os.path.exists(face_path) is False:
                break
            coordinate = np.load(self.all_coordinates_path[idx] + '_' + str(i) + '.npz')
            f_fea = np.load(face_path)
            face_feature = f_fea['faces_feature']  
            faces_coordinate[i] = coordinate['normalize_data']
            face_features[i] = face_feature
            counter = counter + 1

        label = self.label[idx]
        numberFaces = counter
            
        #SAMPLE
        return face_features, faces_coordinate, label, numberFaces

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

parser = argparse.ArgumentParser(description='PyTorch GAF_2 Training')
args = parser.parse_known_args()[0]

random.seed(42)
classes = ('Negative', 'Neutral', 'Positive')


train_dataset = EmotiWDataset(image_filelist='../../Data/GAF_2_Data/Train/', face_filelist='../../Data/GAF_2_crop_faces_features/Train/', coordinates_filelist = '../../Data/Coordinates_Data_low_quality/GAF_2_Train/',maxFaces = 16)

trainloader = DataLoader(train_dataset, shuffle=True, batch_size=16, num_workers = 0, pin_memory=True)

val_dataset = EmotiWDataset(image_filelist='../../Data/GAF_2_Data/Val/', face_filelist='../../Data/GAF_2_crop_faces_features/Val/',coordinates_filelist = '../../Data/Coordinates_Data_low_quality/GAF_2_Val/', maxFaces = 16)

validationloader = DataLoader(val_dataset, shuffle = False, batch_size = 128, num_workers = 0, pin_memory=True)


In [4]:
def generate_multi_graph(faces_coordinates):
    num_node = faces_coordinates.shape[0]
    graph_X = torch.ones(num_node+1,num_node+1)
    graph_Y = torch.ones(num_node+1,num_node+1)
    graph_WH = torch.ones(num_node+1,num_node+1)
    X = faces_coordinates[:,0].unsqueeze(1)
    Y = faces_coordinates[:,1].unsqueeze(1)
    WH = faces_coordinates[:,2:4]
    graph_X[0:num_node,0:num_node] = (1 - torch.norm(X[:, None]-X, dim=2, p=1))
    graph_Y[0:num_node,0:num_node] = (1 - torch.norm(Y[:, None]-Y, dim=2, p=1))
    graph_WH[0:num_node,0:num_node] = (1 - 1/2*torch.norm(WH[:, None]-WH, dim=2, p=1))
    return graph_X, graph_Y, graph_WH

In [5]:
class GCN(nn.Module):
    #Z = AXW
    def __init__(self, dim_in, dim_out):
        super(GCN,self).__init__()
        self.fc1 = nn.Linear(dim_in ,2048,bias=False)
        self.fc2 = nn.Linear(2048,dim_out,bias=False)
        self.activition = nn.LeakyReLU()
        
    def forward(self,X, A):
        X = self.activition(self.fc1(A.mm(X)))
        X = self.activition(self.fc2(A.mm(X)))
        return X

def normalize(A, symmetric=True):
    
    d = A.sum(1)
    if symmetric:
        #D = D^-1/2
        D = torch.diag(torch.pow(d , -0.5))
        return D.mm(A).mm(D)
    else :
        # D=D^-1
        D =torch.diag(torch.pow(d,-1))
    return D.mm(A)

In [6]:
class Multi_Channel_GCN(nn.Module):
    def __init__(self):
        super(Multi_Channel_GCN, self).__init__()
        self.gcnX_layer = GCN(dim_in = 4096, dim_out=1024)
        
        self.gcnY_layer = GCN(dim_in = 4096, dim_out=1024)

        self.gcnWH_layer = GCN(dim_in = 4096, dim_out=1024)
        
        self.channel_attention_layer = nn.Sequential(nn.Linear(1024*3,3,bias=False), nn.Softmax(dim=0))                
        
        self.fc_output = nn.Linear(1024,3)
        
    def forward(self, node_feature, A_X, A_Y, A_WH):
        A_X = normalize(A_X)
        A_Y = normalize(A_Y)
        A_WH = normalize(A_WH)
        feature_X = self.gcnX_layer(node_feature, A_X)
        feature_Y = self.gcnY_layer(node_feature, A_Y)
        feature_WH = self.gcnWH_layer(node_feature, A_WH)
        feature_All = torch.cat([feature_X,feature_Y,feature_WH],1)
        group_node_feature = feature_All[-1]
        weights = self.channel_attention_layer(group_node_feature)
        X = weights[0]*feature_X + weights[1]*feature_Y + weights[2]*feature_WH
        out = self.fc_output(X[-1])
        return out,weights 

In [7]:
net = Multi_Channel_GCN()
net = net.to(device)

In [8]:
# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss()  #损失函数为交叉熵，多用于多分类问题
optimizer = optim.Adam(net.parameters(),lr=1e-5, weight_decay = 1e-4)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

EPOCH = 50

# 训练
if __name__ == "__main__":
    best_acc = 75  #2 initializing best test accuracy
    print("Start Training!")  # 定义遍历数据集的次数
    with open("acc.txt", "w") as f:
        with open("log.txt", "w")as f2:
            for epoch in range(EPOCH):
                print('\nEpoch: %d' % (epoch + 1))
                net.train()
                sum_loss = 0.0
                correct = 0.0
                total = 0.0
                sum_group_loss = 0.0
                
                for i, data in enumerate(trainloader, 0):
                    torch.cuda.empty_cache()
                    # prepare the data
                    length = len(trainloader)
                    
                    face_features, faces_coordinates, labels, numberFaces = data
                    
                    ind = np.where(numberFaces==0)
                    clear_labels = np.delete(labels, ind)
                    face_features, faces_coordinates, labels, clear_labels = face_features.to(device), faces_coordinates.to(device), labels.to(device), clear_labels.to(device)
                    
                    optimizer.zero_grad()
                    
                    # forward + backward
                    group_outputs = torch.zeros(np.count_nonzero(numberFaces),3).requires_grad_(requires_grad=True).to(device)
                    count = 0
                    for j in range(labels.shape[0]):          
                        if numberFaces[j] == 0:
                            continue
                        individual_features = face_features[j][0:numberFaces[j]]    
                        group_feature = individual_features.mean(0)
                        node_features = torch.cat([individual_features, group_feature.unsqueeze(0)],0)
                        graph_X, graph_Y, graph_WH = generate_multi_graph(faces_coordinates[j][0:numberFaces[j]])
                        graph_X, graph_Y, graph_WH = graph_X.to(device), graph_Y.to(device), graph_WH.to(device)
                        
                        outputs, weights = net(node_features, graph_X, graph_Y, graph_WH)  
                        
                        group_outputs[count] = outputs
                        count += 1
                        
                    loss = criterion(group_outputs, clear_labels)
                    loss.backward()
                    optimizer.step()
                    
                    # print loss and acc.each epoch
                    sum_loss += loss.item()
                    _, predicted = torch.max(group_outputs.data, 1)
                    total += clear_labels.size(0)
                    correct += predicted.eq(clear_labels.data).cpu().sum()
                    print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
                          % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('%03d  %05d |Loss: %.03f | Acc: %.3f%% '
                          % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total))
                    f2.write('\n')
                    f2.flush()
  
                scheduler.step()
                torch.cuda.empty_cache()
                # test the accuracy after every epoch
                print("Waiting Test!")
                with torch.no_grad():
                    correct = 0
                    total = 0
                    class_correct = list(0. for j in range(3)) 
                    class_total = list(0. for j in range(3))
                    for data in validationloader:
                        net.eval()
                        
                        face_features, faces_coordinates, labels, numberFaces = data
                        
                        ind = np.where(numberFaces==0)
                        clear_labels = np.delete(labels, ind)
        
                        face_features, faces_coordinates, labels, clear_labels = face_features.to(device), faces_coordinates.to(device), labels.to(device), clear_labels.to(device)

                        
                        # forward + backward
                        group_outputs = torch.zeros(np.count_nonzero(numberFaces),3).requires_grad_(requires_grad=True).to(device)
                        
                        count = 0
                        for j in range(labels.shape[0]):          
                            if numberFaces[j] == 0:
                                continue
                            individual_features = face_features[j][0:numberFaces[j]]    
                            group_feature = individual_features.mean(0)
                            node_features = torch.cat([individual_features, group_feature.unsqueeze(0)],0)
                            graph_X, graph_Y, graph_WH = generate_multi_graph(faces_coordinates[j][0:numberFaces[j]])
                            graph_X, graph_Y, graph_WH = graph_X.to(device), graph_Y.to(device), graph_WH.to(device)

                            outputs, weights = net(node_features, graph_X, graph_Y, graph_WH)  
                            group_outputs[count] = outputs
                            
                            count += 1                                            
                        
                        torch.cuda.empty_cache()
                        _, predicted = torch.max(group_outputs.data, 1)
                        c = (predicted == clear_labels).squeeze() 
                        total += clear_labels.size(0)
                        correct += (predicted == clear_labels).sum()                       
                        if  clear_labels.shape[0] == 1:
                            class_correct[clear_labels] += c
                        else:
                            for j in range(clear_labels.shape[0]):  
                                label = clear_labels[j] 
                                class_correct[label] += c[j]
                                class_total[label] += 1    
                        
                    for j in range(3):
                        print('Accuracy of %5s : %.3f %%' % (
                                classes[j], 100 * class_correct[j] / class_total[j]))
                    print('Acc on Validation set:：%.3f%%' % (100 * correct / total))
                    acc = 100. * correct / total
                    f.write("EPOCH=%03d,Accuracy= %.3f%%" % (epoch + 1, acc))
                    f.write('\n')
                    f.flush()
                    if acc > best_acc:
                        print('Saving model......')
                        torch.save(net.state_dict(), '../../Trained/AffectNet_GAF_2.pth')
                        f3 = open("best_acc.txt", "w")
                        f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, acc))
                        f3.close()
                        best_acc = acc

            print("Training Finished, TotalEPOCH=%d" % EPOCH)

Start Training!

Epoch: 1
[epoch:1, iter:1] Loss: 1.101 | Acc: 43.750% 
[epoch:1, iter:2] Loss: 1.096 | Acc: 43.750% 
[epoch:1, iter:3] Loss: 1.096 | Acc: 41.667% 
[epoch:1, iter:4] Loss: 1.095 | Acc: 43.750% 
[epoch:1, iter:5] Loss: 1.092 | Acc: 48.750% 
[epoch:1, iter:6] Loss: 1.090 | Acc: 52.083% 
[epoch:1, iter:7] Loss: 1.088 | Acc: 53.571% 
[epoch:1, iter:8] Loss: 1.087 | Acc: 52.344% 
[epoch:1, iter:9] Loss: 1.083 | Acc: 54.167% 
[epoch:1, iter:10] Loss: 1.082 | Acc: 55.000% 
[epoch:1, iter:11] Loss: 1.082 | Acc: 53.977% 
[epoch:1, iter:12] Loss: 1.079 | Acc: 55.208% 
[epoch:1, iter:13] Loss: 1.076 | Acc: 57.212% 
[epoch:1, iter:14] Loss: 1.072 | Acc: 57.589% 
[epoch:1, iter:15] Loss: 1.071 | Acc: 57.500% 
[epoch:1, iter:16] Loss: 1.068 | Acc: 57.812% 
[epoch:1, iter:17] Loss: 1.067 | Acc: 56.618% 
[epoch:1, iter:18] Loss: 1.066 | Acc: 57.639% 
[epoch:1, iter:19] Loss: 1.064 | Acc: 58.224% 
[epoch:1, iter:20] Loss: 1.062 | Acc: 57.812% 
[epoch:1, iter:21] Loss: 1.060 | Acc: 58.92

KeyboardInterrupt: 