In [1]:
# prerequisites
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
import os
import numpy as np
import tensorflow as tf

In [3]:
from math import floor

In [4]:
use_bias = False
front_path = './images/fig1'
profile_path = '.images/fig2'
lambda_l1 = 0.001, #'weight of the loss for L1 texture loss') # 0.001
lambda_fea=100 #'weight of the loss for face model feature loss')
lambda_reg= 1e-5# 'weight of the loss for L2 regularitaion loss')
lambda_gan= 1# 'weight of the loss for gan loss')
lambda_gp=10# 'weight of the loss for gradient penalty on parameter of D')

# For training
dataset_size=  1000# 'dataset path')  # casia_aligned_250_250_jpg
profile_list=''# 'train profile list')
front_path=''#front data path')
front_list=''# 'train front list')
test_path=''# 'front data path')
is_train=True# 'train or test')
is_finetune= False# 'finetune') # False, True
face_mode='resnet50.npy'# 'face model path')
checkpoint='checkpoint/fnm'# 'checkpoint directory')
summary_dir= 'log/fnm'# 'logs directory')
checkpoint_ft='checkpoint/fnm/ck-09'#'finetune or test checkpoint path')
batch_size= 64# 'batch size')#was 16
epoc=10 # 'epoch')
critic= 1 #'number of D training times')
save_freq= 1000 # 'the frequency of saving model')
lr=1e-4# 'base learning rate')
beta1=0. # 'beta1 momentum term of adam')
beta2=0.9 # 'beta2 momentum term of adam')
stddev= 0.02 # 'stddev for W initializer')
use_bias=False # 'whether to use bias')
results='results/fnm' # 'path for saving results') #

############################
#   environment setting    #
############################
device_id='3,4'# 'device id')
ori_height=224 # 'original height of profile images')
ori_width=224 # 'original width of profile images')
height= 224 #'height of images') # do not modified
width= 224 # 'width of images') # do not modified
CHANNEL=3 # 'channel of images')
num_threads=8 # 'number of threads of enqueueing examples')


# Resnet50 model trained on VGGFace2 dataset

In [5]:
from Resnet50_ft_dag import resnet50_ft_dag

# Batch normalisation

In [6]:
class Batch_norm(nn.Module):
    
    def __init__(self, epsilon=1e-5, momentum = 0.9):
        super(Batch_norm, self).__init__()
        # self.mean  = mosv_dict['mean']
        self.epsilon  = epsilon
        self.momentum = momentum
        #self.scale = mosv_dict['scale']
        #self.variance = mosv_dict['variance']
        #self.epsilon = 1e-5
    def __call__(self, x, is_train = False):
        return nn.BatchNorm2d(x,eps = self.epsilon,  momentum = self.momentum, track_running_stats = is_train)
            
    
   

In [7]:
NORM = Batch_norm

# Options: conv2d, res_block 

In [68]:
def calc_conv_pad(input_size, output_size, filter_size, stride):
    return max(0, floor((stride * (output_size - 1) + filter_size - input_size)/2 + 0.5))

In [69]:
def calc_deconv_pad(input_size, output_size, filter_size, stride):
    return max(0, floor((stride * (input_size - 1) + filter_size - output_size)/2 + 0.5))

In [70]:
def conv2d( in_channels , out_channels , kernel_size = 3 , strides = 1  , 
           padding  =  None, 
           bias = use_bias,dilation_rate = 1, activation = None):
    layers = []
    if padding == None:
        padding = calc_conv_pad(in_channels , out_channels, kernel_size , strides)
    conv = nn.Conv2d( in_channels , out_channels , bias=bias, kernel_size = kernel_size, stride=strides, padding=padding, dilation=dilation_rate )
    layers.append( conv )
    if activation is not None:
        layers.append( activation )
    return nn.Sequential( *layers )

def deconv2d(in_channels , out_channels , kernel_size = 3 , strides = 1  , 
             padding  = None,
             bias= use_bias, dilation_rate = 1,
            activation = None):
    if padding == None:
        padding = calc_deconv_pad(in_channels , out_channels, kernel_size , strides)
    layers = [nn.ConvTranspose2d(in_channels, out_channels, kernel_size =  kernel_size, stride=strides, 
                      padding=padding, bias=bias)]
    if activation is not None:
        layers.append( activation )
    return nn.Sequential( *layers )
    

In [11]:
class res_block(nn.Module):
    def __init__(self,  in_channels , 
                 out_channels , kernel_size = 3, 
                 stride = 1  , padding  = 0 ,
                 bias = use_bias,  norm = NORM, activation2 = nn.ReLU, activation = None,):
        super(res_block, self).__init__()
        self.out_channels = out_channels
        self.activation = activation
        self.activation2 = activation2
        convs = [conv2d(in_channels , out_channels , kernel_size , stride , padding, bias, activation = self.activation), 
             nn.BatchNorm2d( in_channels ), 
             nn.ReLU(), 
             conv2d(out_channels, out_channels, kernel_size, stride, padding, bias, activation = self.activation),
             norm(in_channels)]
        self.layers = nn.Sequential(*convs)
    def forward(self, x):
        return self.activation2(self.layers(x) + x)

# Load data

In [12]:
DATA_PATH = r"./data/train_data"
DATA_PATH_Train_front=r'./data/train_data/front'

In [14]:
# Image size: even though image sizes are bigger than 96, we use this to speed up training
SIZE_H = SIZE_W = 96

# Number of classes in the dataset
NUM_CLASSES = 2

# Epochs: number of passes over the training data, we use it this small to reduce training babysitting time
EPOCH_NUM = 30

# Batch size: for batch gradient descent optimization, usually selected as 2**K elements


# Images mean and std channelwise
image_mean = [0.485, 0.456, 0.406]
image_std  = [0.229, 0.224, 0.225]

# Last layer (embeddings) size for CNN models
EMBEDDING_SIZE = 256

In [15]:
transformer = transforms.Compose([
    transforms.Resize((SIZE_H, SIZE_W)),        # scaling images to fixed size
    transforms.ToTensor(),                      # converting to tensors
    transforms.Normalize(image_mean, image_std) # normalize image data per-channel
]) 

In [16]:
transform = transforms.Compose([
transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), transforms.CenterCrop(255)])

train_dataset_front = datasets.ImageFolder(DATA_PATH_Train_front, transform=transformer)
train_dataset_profile = datasets.ImageFolder(os.path.join(DATA_PATH, 'profile'),  transform=transformer)

# Data Loader (Input Pipeline)
train_loader_front = torch.utils.data.DataLoader(dataset=train_dataset_front, batch_size=batch_size, shuffle=True)
train_loader_profile = torch.utils.data.DataLoader(dataset=train_dataset_profile, batch_size=batch_size, shuffle=True)

In [None]:
test_dataset = datasets.ImageFolder(root='./data/test_data/', transform=transform)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=bs, shuffle=False)

In [77]:
# dataset = dset.ImageFolder(root=dataroot,
#                            transform=transforms.Compose([
#                                transforms.Resize(image_size),
#                                transforms.CenterCrop(image_size),
#                                transforms.ToTensor(),
#                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),]))
# # Create the dataloader
# dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
#                                          shuffle=True, num_workers=workers)

# # Decide which device we want to run on
# device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

# # Plot some training images
# real_batch = next(iter(dataloader))
# plt.figure(figsize=(8,8))
# plt.axis("off")
# plt.title("Training Images")
# plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))

# Generator 

In [18]:
f7_shape = [7, 7, 2048]

#### second variant

In [81]:
class Generator(nn.Module):
    def __init__(self): #, profile, front, train):
        super(Generator, self).__init__() 
#         self.face_model = Resnet50_ft_dag()
#         self.feature_p = self.face_model.forward(profile)
#         self.feature_f = self.face_model.forward(front)
#         self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features*2)
#         self.is_train = train
        
        self.conv1 = conv2d(2048, 512, kernel_size=1, strides = 1)
        self.norm1 = NORM(512, is_train)
        self.res1_1 = res_block(512, 512, norm = NORM)
        self.res1_2 = res_block(512, 512, norm = NORM)
        self.res1_3 = res_block(512, 512, norm = NORM)
        self.res1_4 = res_block(512, 512, norm = NORM)
        
        self.dconv2 = nn.Sequential(deconv2d(512, 256, kernel_size=4, strides = 2), NORM(), nn.ReLU())
        self.res2 = res_block(256, 256, norm = NORM)
        self.dconv3 = nn.Sequential(deconv2d(256, 128, kernel_size=4, strides = 2), NORM(), nn.ReLU())
        self.res3 = res_block(128, 128, norm = NORM)
        self.dconv4 = nn.Sequential(deconv2d(128, 64, kernel_size=4, strides = 2), NORM(), nn.ReLU())
        self.res4 = res_block(64, 64, norm = NORM)
        self.dconv5 = nn.Sequential(deconv2d(64, 32, kernel_size=4, strides = 2), NORM(), nn.ReLU())
        self.res5 = res_block(32, 32, norm = NORM)
        self.dconv6 = nn.Sequential(deconv2d(32, 32, kernel_size=4, strides = 2), NORM(), nn.ReLU())
        self.res6 = res_block(32, 32, norm = NORM)
        self.gen = nn.Sequential(conv2d(32, 3,  kernel_size=1, strides = 1), nn.Tanh())
        
    def forward(self, feature):
        feat7, pool5 = feature[0],feature[1]
        conv1 = self.conv1(feat7)
        norm1 = self.norm1(conv1)
        relu1 = self.relu1(norm1)
        res1_1 = self.res1_1(relu1)
        res1_2 = self.res1_2(res1_1)
        res1_3 = self.res1_3(res1_2)
        res1_4 = self.res1_2(res1_3)
        dconv2 = self.deconv2(res1_4)
        res2 = self.res2(dconv2)
        dconv3 = self.dconv3(res2)
        res3 = self.res3(dconv3)
        dconv4 = self.dconv4(res3)
        res4 = self.res4(dconv4)
        dconv5 = self.dconv5(res4)
        res5 = self.res5(dconv5)
        dconv6 = self.dconv6(res5)
        res6 = self.res6(dconv6)
        gen = self.gen(res6)
        return (gen + 1)* 127.5

In [82]:
BS = batch_size

In [48]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.h0_0 = nn.Sequential(conv2d(3, 32, kernel_size=3, strides=2), nn.LeakyReLU())
        self.h0_1 = nn.Sequential(conv2d(32, 64, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h0_2 = nn.Sequential(conv2d(64, 128, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h0_3 = nn.Sequential(conv2d(128, 256, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h0_4 = nn.Sequential(conv2d(256, 256, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h0_5 = nn.Linear(256, 1)
        
        self.h1_0 = nn.Sequential(conv2d(3, 32, kernel_size=3, strides=2), nn.LeakyReLU())
        self.h1_1 = nn.Sequential(conv2d(32, 64, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h1_2 = nn.Sequential(conv2d(64, 128, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h1_3 = nn.Sequential(conv2d(128, 256, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h1_4 = nn.Linear(256, 1)
        
        self.h2_0 = nn.Sequential(conv2d(3, 32, kernel_size=3, strides=2), nn.LeakyReLU())
        self.h2_1 = nn.Sequential(conv2d(32, 64, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h2_2 = nn.Sequential(conv2d(64, 128, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h2_3 = nn.Sequential(conv2d(128, 256, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        #self.h2_4 = nn.Linear(self.h2_3.out_features, 1)
        self.h2_4 = nn.Linear(256, 1)
        
        self.h3_0 = nn.Sequential(conv2d(3, 32, kernel_size=3, strides=2), nn.LeakyReLU())
        self.h3_1 = nn.Sequential(conv2d(32, 64, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h3_2 = nn.Sequential(conv2d(64, 128, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h3_3 = nn.Sequential(conv2d(128, 256, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h3_4 = nn.Linear(256, 1)
        
        self.h4_0 = nn.Sequential(conv2d(3, 32, kernel_size=3, strides=2), nn.LeakyReLU())
        self.h4_1 = nn.Sequential(conv2d(32, 64, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h4_2 = nn.Sequential(conv2d(64, 128, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h4_3 = nn.Sequential(conv2d(128, 256, kernel_size=3, strides=2), NORM(), nn.LeakyReLU())
        self.h4_4 = nn.Linear(256, 1)
    
    
    def forward(self, images):
        eyes = tf.slice(images, [0,64,50,0], [bs,36,124,cfg.channel])
        nose = tf.slice(images, [0,75,90,0], [bs,65,44,cfg.channel])
        mouth = tf.slice(images, [0,140,75,0], [bs,30,74,cfg.channel])
        face = tf.slice(images, [0,64,50,0], [bs,116,124,cfg.channel])
        h0_0 = self.h0_0(images)
        h0_1 = self.h0_1(h0_0)
        h0_2 = self.h0_2(h0_1)
        h0_3 = self.h0_3(h0_2)
        h0_4 = self.h0_4(h0_3)
        h0_5 = self.h0_5(torch.reshape(h0_4, [BS, -1]))
        
        h1_0 = self.h1_0(eyes)
        h1_1 = self.h1_1(h1_0)
        h1_2 = self.h1_2(h1_1)
        h1_3 = self.h1_3(h1_2)
        h1_4 = self.h1_4(torch.reshape(h1_3, [BS, -1]))
        
        h2_0 = self.h2_0(nose)
        h2_1 = self.h2_1(h2_0)
        h2_2 = self.h2_2(h2_1)
        h2_3 = self.h2_3(h2_2)
        h2_4 = self.h2_4(torch.reshape(h2_3, [BS, -1]))
        
        h3_0 = self.h3_0(mouth)
        h3_1 = self.h3_1(h3_0)
        h3_2 = self.h3_2(h3_1)
        h3_3 = self.h3_3(h3_2)
        h3_4 = self.h3_4(torch.reshape(h3_3, [BS, -1]))
        
        h4_0 = self.h4_0(face)
        h4_1 = self.h4_1(h4_0)
        h4_2 = self.h4_2(h4_1)
        h4_3 = self.h4_3(h4_2)
        h4_4 = self.h4_4(h4_3)
        return h0_5, h1_4, h2_4, h3_4, h4_4

# G and D

In [49]:
#built network
z_dim = 224*224
mnist_dim = 224*224
#mnist_dim = train_dataset_front.train_data.size(1) * train_dataset.train_data.size(2)

G = Generator()
D = Discriminator().to(device)

In [50]:
criterion = nn.BCELoss() 

# optimizer
lr = 0.0002 
G_optimizer = optim.Adam(G.parameters(),betas=(beta1, beta2))
D_optimizer = optim.Adam(D.parameters(), betas=(beta1, beta2))


# Loss functions

In [51]:
Losses = []
D_losses, G_losses = [], []
D_finalLosses, G_finalLosses = [], []

In [52]:
weights_path= "./resnet50_ft_dag.pth"
Model = resnet50_ft_dag(weights_path=weights_path)

In [53]:
def reverse(x, dim):
    dim = x.dim() + dim if dim < 0 else dim
    return x[tuple(slice(None, None) if i != dim
             else torch.arange(x.size(i)-1, -1, -1).long()
             for i in range(x.dim()))]

In [54]:
def loss(profile, front):
    #=======================Train the generator=======================#
    feature_p = Model.forward(profile) # G_enc(x)
    feature_f = Model.forward(front) # G_enc(y)
    G.zero_grad()
    gen_p = G.forward(feature_p) # ~x
    gen_f = G.forward(feature_f) # ~y
    feature_gen_p = Model.forward(gen_p) #G_enc(~x)
    feature_gen_f = Model.forward(gen_f) #G_enc(~y)
    
    pool5_p_norm = feature_p[-1]/(torch.norm(feature_p[-1],dim = 1, keepdim = True) + epsilon)
    pool5_f_norm = feature_f[-1]/(torch.norm(feature_f[-1],dim = 1, keepdim = True) + epsilon)
    
    pool5_gen_p_norm = feature_gen_p[-1]/(torch.norm(feature_gen_p[-1],dim = 1, keepdim = True) + epsilon)
    pool5_gen_f_norm = feature_gen_f[-1]/(torch.norm(feature_gen_f[-1],dim = 1, keepdim = True) + epsilon)
    
     # 1. Frontalization Loss: L1-Norm
    front_loss = torch.mean(torch.sum(torch.abs(front/255. - gen_f/255.), [1,2,3]))
    
    # 2. identity perseption loss l2-norm
    feature_distance = 0.5*(1 - torch.sum(torch.mul(pool5_p_norm, pool5_gen_p_norm), [1])) + \
                                0.5*(1 - torch.sum(torch.mul(pool5_f_norm, pool5_gen_f_norm), [1]))
    feature_loss = torch.mean(feature_distance)
    Losses.append(feature_loss)
    
    #.
    #trainable var
    #all_vars = torch.autograd.Variables()
    vars_gen = G.parameters(requires_grad= True)
    vars_dis = D.parameters(requires_grad= True)
    
    
    
    # 3. Regulation loss
    loss = nn.MSELoss(lambda_reg)
    reg_gen = loss(weights_list=[var for var in vars_gen]) # if 'kernel' in var.name
    reg_dis = loss(weights_list=[var for var in vars_dis])# if 'kernel' in var.name
    G_losses.append(reg_gen)
    D_losses.append(reg_dis)
    
    
    # 4. Adversarial Loss
    d_loss = torch.mean(torch.add(self.df1)*0.5 + torch.add(self.df2)*0.5 - tf.add_n(self.dr)) / 5
    self.g_loss = - tf.reduce_mean(torch.add(df1)*0.5 + tf.add(df2)*0.5) / 5
    D_losses.append(self.d_loss)
    G_losses.append(self.g_loss)
    
    # 5. Symmetric Loss - not applied
    mirror_p = reverse(self.gen_p, dim=[2])
    sym_loss = torch.mean(torch.sum(torchf.abs(mirror_p/225. - gen_p/255.), [1,2,3]))
      
     # 6. Drift Loss - not applied
    self.drift_loss = 0
    #torch.mean(torch.add(torch.square(df)) + torch.add(torch.square(dr))) / 10

    Gen_loss =  reg_gen + lambda_l1 * front_loss + lambda_fea * feature_loss + g_loss/lambda_gan
    Dis_loss = lambda_gan * d_loss + lambda_gp * gradient_penalty + 1/reg_dis
    G_finalLosses.append(Gen_loss)
    D_finalLosses.append(Dis_loss)
    Gen_loss.backward()
    Dis_loss.backward()
    
    G_optimizer.step()
    D_optimizer.step()
    return Gen_loss,  Dis_loss
#     z = Variable(torch.randn(bs, z_dim).to(device))
#     y = Variable(torch.ones(bs, 1).to(device))

#     G_output = G(z)
#     D_output = D(G_output)
#     G_loss = criterion(D_output, y)

#     # gradient backprop & optimize ONLY G's parameters
#     G_loss.backward()
#     G_optimizer.step()
        
#     return G_loss.data.item()

# Main

In [55]:
train_loader_profile.

SyntaxError: invalid syntax (<ipython-input-55-988f6f069aa3>, line 1)

In [59]:
d = list(enumerate(train_loader_profile))
print(d[0])

(0, [tensor([[[[ 2.0434,  2.0434,  2.0434,  ...,  2.0948,  2.0948,  2.0948],
          [ 2.0434,  2.0434,  2.0263,  ...,  2.0948,  2.0948,  2.0948],
          [ 2.0263,  2.0263,  2.0092,  ...,  2.0948,  2.0948,  2.0948],
          ...,
          [-0.9705, -0.9877, -0.9705,  ..., -1.2103, -1.1932, -1.2103],
          [-0.9363, -0.9877, -0.9705,  ..., -1.1932, -1.2103, -1.2445],
          [-1.0048, -1.0048, -0.9705,  ..., -1.2103, -1.2274, -1.2274]],

         [[ 2.2885,  2.2885,  2.2885,  ...,  2.3761,  2.3761,  2.3761],
          [ 2.2885,  2.2885,  2.2710,  ...,  2.3761,  2.3761,  2.3761],
          [ 2.2710,  2.2710,  2.2535,  ...,  2.3761,  2.3761,  2.3761],
          ...,
          [-0.6176, -0.6352, -0.6176,  ..., -0.8978, -0.8803, -0.8978],
          [-0.5826, -0.6352, -0.6176,  ..., -0.8803, -0.8978, -0.9328],
          [-0.6352, -0.6352, -0.6001,  ..., -0.8978, -0.9153, -0.9153]],

         [[ 2.5354,  2.5180,  2.4831,  ...,  2.5529,  2.5529,  2.5529],
          [ 2.5354,  2.51

In [60]:
# weights_path= "./resnet50_ft_dag.pth"
# Model = resnet50_ft_dag(weights_path=weights_path)
# Model(train_dataset_front[0][0])

In [87]:
Model = resnet50_ft_dag(weights_path=weights_path)


In [90]:
x = torch.rand([4, 3, 224, 224])
Model(x)
# G = Generator()
# x_gen = G(x)

RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 6422528 bytes. Buy new RAM!


In [126]:
import cv2

ModuleNotFoundError: No module named 'cv2'

In [71]:
batch_size = 3
BS = batch_size
Gen_loss, Dis_loss = loss(x, train_dataset_front[i])
D_losses.append(Gen_loss)
G_losses.append(Dis_loss)

RuntimeError: Given groups=1, weight of size 64 3 7 7, expected input[16, 256, 256, 2048] to have 3 channels, but got 256 channels instead

In [62]:
n_epoch = 200
for epoch in range(1, n_epoch+1):           
    D_losses, G_losses = [], []
    #d = list(enumerate(train_loader_front))
    i = 0
    for batch_idx, (x, _) in enumerate(train_loader_profile):
        batch_idy, (y, _) = d[i]
        Gen_loss, Dis_loss = loss(x, train_dataset_front[i])
        D_losses.append(Gen_loss)
        G_losses.append(Dis_loss)
        i += 1

    print('[%d/%d]: loss_d: %.3f, loss_g: %.3f' % (
            (epoch), n_epoch, torch.mean(torch.FloatTensor(D_losses)), torch.mean(torch.FloatTensor(G_losses))))

RuntimeError: Given input size: (2048x3x3). Calculated output size: (2048x-3x-3). Output size is too small