In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch

In [2]:
import tensorflow as tf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
tf.__version__

'1.13.1'

In [4]:
import numpy as np

In [5]:
class StyleController(nn.Module):
    def __init__(self, batch_size, input_size = 8):
         
        super().__init__()
        
        self.input_size = input_size
        self.batch_size = batch_size
        self.k = 64
        
        self.fc1 = nn.Linear(self.input_size, 128, bias = True)
        self.initialize_weights_with_he_biases_with_zero(self.fc1)
        
        self.ln1 = nn.LayerNorm(128)
        
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(128, 128, bias = True)
        self.initialize_weights_with_he_biases_with_zero(self.fc2)
        
        self.ln2 = nn.LayerNorm(128)
        
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(128, 4 * self.k, bias = True)
        self.initialize_weights_with_he_biases_with_zero(self.fc3) 
        
        self.fc4 = nn.Linear(128, 4 * self.k, bias = True)
        self.initialize_weights_with_he_biases_with_zero(self.fc4)
        
        
    def initialize_weights_with_he_biases_with_zero(self, layer : nn.Module):
        nn.init.kaiming_normal_(layer.weight)
        layer.bias.data.fill_(0.0)
    
    """
    x.shape = (batch_size, style_size) => by default (batch_size, 8)
    """
    def forward(self, x):
        
        if x is None:
            x = torch.randn((self.batch_size, self.input_size))
        
        x = self.fc1(x)
        x = self.ln1(x)
        x = self.relu1(x)
        
        print('module fc1 shape:', [dim for dim in x.shape])
        
        x = self.fc2(x)
        x = self.ln2(x)
        x = self.relu2(x)
        
        print('module fc2 shape:', [dim for dim in x.shape])
        
        gamma = self.fc3(x)
        gamma = torch.reshape(gamma, [-1, 1, 1, 4 * self.k])
        
        print('gamma shape:', [dim for dim in gamma.shape])
        
        beta = self.fc4(x)
        beta = torch.reshape(beta, [-1, 1, 1, 4 * self.k])
        
        print('beta shape:', [dim for dim in beta.shape])
        
        return torch.cat((beta, gamma), 0)
        

In [6]:
controller = StyleController(10,8)
res = controller.forward(None)
res.shape

module fc1 shape: [10, 128]
module fc2 shape: [10, 128]
gamma shape: [10, 1, 1, 256]
beta shape: [10, 1, 1, 256]


torch.Size([20, 1, 1, 256])

In [7]:
gamma = res[0:controller.batch_size, :, :, :]
beta = res[controller.batch_size :, :, :, :]

In [8]:
print("Gamma shape: ", gamma.shape)
print("Beta shape: ", beta.shape)

Gamma shape:  torch.Size([10, 1, 1, 256])
Beta shape:  torch.Size([10, 1, 1, 256])


In [9]:
def padding2(x: torch.Tensor, pad: int, pad_mode='reflect') -> torch.Tensor:
        """
        
        Custom padding to apply before convolution layer.
        :param x: input image
        :param pad: padding size
        :param pad_mode: padding mode
            :options:
                - reflect
                - zero
        
        """

        if pad_mode == 'reflect':
            return F.pad(input=x, pad=(pad, pad, pad, pad, 0, 0, 0, 0), mode='reflect')

        if pad_mode == 'zero':
            return F.pad(input=x, pad=(pad, pad, pad, pad, 0, 0, 0, 0), mode='constant')

        raise ValueError(f"{pad_mode} must be one of ['reflect', 'zero']!")

In [10]:
#t4d = torch.tensor(np.ones((2, 2, 2, 2)))
#p3d = (0, 0, 1, 1, 1, 1) # pad by (0, 1), (2, 1), and (3, 3)
#out = F.pad(t4d, p3d, "constant", value = 0)

In [11]:
#t4d = torch.tensor(np.ones((2, 2, 2, 2)))
#p3d = (0, 0, 1, 1, 1, 1) # pad by (0, 1), (2, 1), and (3, 3)
#out = F.pad(t4d, (0, 0, 1, 1, 1, 1), "reflect")

In [12]:
def upscale2dtorch(x, factor=2):
    assert isinstance(factor, int) and factor >= 1
    if factor == 1: return x
    s = x.shape
    x = torch.reshape(x, (-1, s[1], 1, s[2], 1, s[3]))
    x = torch.tile(x, (1, 1, factor, 1, factor, 1))
    x = torch.reshape(x, (-1, s[1] * factor, s[2] * factor, s[3]))
    return x
                  

In [13]:
class Decoder(nn.Module):
    def __init__(self, in_channels, style_controller):
         
        super().__init__()
        
        self.style_controller = style_controller 
        self.k = self.style_controller.k
        
        self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = 4 * self.k, kernel_size=3)
        self.initialize_weights_and_biases(self.conv1)
        
        self.instance_norm_layer1 = nn.InstanceNorm2d(4 * self.k)
        self.relu1 = nn.ReLU()
        
        self.conv2 = nn.Conv2d(in_channels = 4 * self.k, out_channels = 4 * self.k, kernel_size = 3, bias = False)
        nn.init.kaiming_normal_(self.conv2.weight)
        
        self.instance_norm_layer2 = nn.InstanceNorm2d(4 * self.k)
        
        self.conv3 = nn.Conv2d(in_channels = 4 * self.k, out_channels = 4 * self.k, kernel_size = 3)
        self.initialize_weights_and_biases(self.conv3)
        
        self.instance_norm_layer3 = nn.InstanceNorm2d(4 * self.k)
        self.relu2 = nn.ReLU()
        
        self.conv4 = nn.Conv2d(in_channels = 4 * self.k, out_channels = 4 * self.k, kernel_size = 3, bias = False)
        nn.init.kaiming_normal_(self.conv4.weight)
        
        self.instance_norm_layer4 = nn.InstanceNorm2d(4 * self.k)
        
        self.conv5 = nn.Conv2d(in_channels = 4 * self.k, out_channels = 4 * self.k, kernel_size = 3)
        self.initialize_weights_and_biases(self.conv5)
        
        self.instance_norm_layer5 = nn.InstanceNorm2d(4 * self.k)
        self.relu3 = nn.ReLU()
        
        self.conv6 = nn.Conv2d(in_channels = 4 * self.k, out_channels = 4 * self.k, kernel_size = 3, bias = False)
        nn.init.kaiming_normal_(self.conv6.weight)
        
        self.instance_norm_layer6 = nn.InstanceNorm2d(4 * self.k)
        
        self.conv7 = nn.Conv2d(in_channels = 4 * self.k, out_channels = 4 * self.k, kernel_size = 5)
        self.initialize_weights_and_biases(self.conv7)
        
        self.instance_norm_layer7 = nn.InstanceNorm2d(2 * self.k)
        self.relu4 = nn.ReLU()
        
        self.conv8 = nn.Conv2d(2 * self.k, self.k, 5)
        self.initialize_weights_and_biases(self.conv8)
        
        self.instance_norm_layer8 = nn.InstanceNorm2d(self.k)
        self.relu5 = nn.ReLU()
        
        self.conv9 = nn.Conv2d(self.k, 3, 7)
        self.initialize_weights_and_biases(self.conv9, True)
        
        self.tanh = nn.Tanh()
        
    
    def initialize_weights_and_biases(self, layer : nn.Module, bothWeightAndBias = False):
        if not bothWeightAndBias:
            nn.init.kaiming_normal_(layer.weight)
            layer.bias.data.fill_(0.0)
        else:
            layer.weight.data.fill_(0.0)
            layer.bias.data.fill_(0.0)
            
    def forward(self, styles, x):
        
        print("X shape after padding: ", padding2(x, 1, pad_mode="zero").shape)
        x_ = self.conv1(padding2(x, 1, pad_mode="zero"))
        print("X shape after conv: ", x_.shape)
        x_ = self.instance_norm_layer1(x_)
        print("X shape after instance norm: ", x_.shape)
        x_ = gamma * x_ + beta
        print("2: ", x_.shape)
        x_ = self.relu1(x_)
        
        print(self.instance_norm_layer2(self.conv2(padding2(x_, 1, pad_mode="zero"))).shape)
        print(x.shape)
        x += self.instance_norm_layer2(self.conv2(padding2(x_, 1, pad_mode="zero")))
        
        print('module res{} shape:'.format(1), [dim for dim in x.shape])
        
        x_ = self.conv3(padding2(x, 1, pad_mode="zero"))
        x_ = self.instance_norm_layer3(x_)
        x_ = gamma * x_ + beta
        x_ = self.relu2(x_)
        
        x += self.instance_norm_layer4(self.conv4(padding2(x_, 1, pad_mode="zero")))
        
        print('module res{} shape:'.format(2), [dim for dim in x.shape])
        
        x_ = self.conv5(padding2(x, 1, pad_mode="zero"))
        x_ = self.instance_norm_layer5(x_)
        print("Gmma shape: ", gamma.shape)
        x_ = gamma * x_ + beta
        x_ = self.relu3(x_)
        
        x += self.instance_norm_layer6(self.conv6(padding2(x_, 1, pad_mode="zero")))
        
        print('module res{} shape:'.format(3), [dim for dim in x.shape])
        
        x = upscale2d(x, 2, pad_mode="zero")
        x = self.conv7(padding2(x, 2))
        x = self.instance_norm_layer7(x)
        x = self.relu4(x)
        
        print('module deconv1 shape:', [dim for dim in x.shape])

        x = upscale2d(x, 2)
        x = self.conv8(padding2(x, 2, pad_mode="zero"))
        x = self.instance_norm_layer8(x)
        x = self.relu5(x)
        
        x = self.conv9(padding2(x, 3, pad_mode="zero"))
        return self.tanh(x)
        
        

In [14]:
controller = StyleController(10,8)
styles = torch.tensor(np.ones((10, 8), dtype=float)).float()
decoder = Decoder(256, controller)

In [None]:
decoder.forward(styles, torch.tensor(np.ones((10, 256, 256, 256))).float())

module fc1 shape: [10, 128]
module fc2 shape: [10, 128]
gamma shape: [10, 1, 1, 256]
beta shape: [10, 1, 1, 256]
X shape after padding:  torch.Size([10, 256, 258, 258])
X shape after conv:  torch.Size([10, 256, 256, 256])
X shape after instance norm:  torch.Size([10, 256, 256, 256])
2:  torch.Size([10, 256, 256, 256])
torch.Size([10, 256, 256, 256])
torch.Size([10, 256, 256, 256])
module res1 shape: [10, 256, 256, 256]
module res2 shape: [10, 256, 256, 256]
Gmma shape:  torch.Size([10, 1, 1, 256])


In [None]:
import tensorflow as tf
import numpy as np
import tensorflow.contrib.slim as slim
from tensorflow.contrib.layers import instance_norm, layer_norm

In [None]:
def padding(x, pad, pad_type='reflect'):
    if pad_type == 'zero' :
        print("Tf:" , x.shape)
        return tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]])
    if pad_type == 'reflect' :
        print("Tf: ", x.shape)
        return tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]], mode='REFLECT')
    else:
        raise ValueError('Unknown pad type: {}'.format(pad_type))

def conv(x, *args, pad=1, **kwargs):
    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='VALID'):
        x = padding(x, pad)
        print("X shape after padding in tf: ", x.shape)
        return slim.conv2d(x, *args, **kwargs)

def upscale2d(x, factor=2):
    assert isinstance(factor, int) and factor >= 1
    if factor == 1: return x
    with tf.variable_scope('Upscale2D'):
        s = x.shape
        x = tf.reshape(x, [-1, s[1], 1, s[2], 1, s[3]])
        x = tf.tile(x, [1, 1, factor, 1, factor, 1])
        x = tf.reshape(x, [-1, s[1] * factor, s[2] * factor, s[3]])
        return x


In [None]:
class WarpController(nn.Module):
    def __init__(self, batch_size, input_size_when_flatten, num_ldmark, scales):
         
        super().__init__()
        
        self.scales = scales
        
        self.flatten = nn.Flatten()
        
        self.fc1 = nn.Linear(input_size_when_flatten, 128, bias = True)
        self.initialize_weights_with_he_biases_with_zero(self.fc1)
        
        self.ln1 = nn.LayerNorm(128)
        
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(128, num_ldmark * 2, bias = False)
        self.init.trunc_normal_(self.fc2.weights)
        self.fc2.bias.fill_(0.0)
        
        self.fc3 = nn.Linear(num_ldmark * 2, num_ldmark * 2, bias = False)
        self.init.trunc_normal_(self.fc3.weights)
        self.fc3.bias.fill_(0.0)
      
        
    def initialize_weights_with_he_biases_with_zero(self, layer : nn.Module):
        nn.init.kaiming_normal_(layer.weight)
        layer.bias.data.fill_(0.0)
    
    def forward(self, x):
        
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.ln1(x)
        x = self.relu1(x)
        
        # BÖYLE BİR CONSTANT OLUŞTURMAK BURAYI BOZAR MI
        ldmark_mean = (np.random.normal(0,50, (num_ldmark,2)) + np.array([[0.5*h,0.5*w]])).flatten()
        ldmark_mean = torch.tensor(ldmark_mean, dtype=torch.float32)

        ldmark_pred = self.fc2(x)
        ldmark_pred = ldmark_pred + ldmark_mean
        #tf.identity ile aynı mı?
        ldmark_pred = nn.Identity(ldmark_pred)
        ldmark_diff = self.fc3(x)
        ldmark_diff = nn.Identity(ldmark_diff)
        # scales i yukarda bu şekilde dahil etmek problem mi
        ldmark_diff = nn.Identity(torch.reshape(scales, (-1, 1)) * ldmark_diff)
        
        src_pts = torch.reshape(ldmark_pred, (-1, num_ldmark, 2))
        dst_pts = torch.reshape(ldmark_pred + ldmark_diff, (-1, num_ldmark, 2))
        
        diff_norm = torch.mean(torch.norm(src_pts - dst_pts, dim = (1, 2)))
        images_transformed, dense_flow = sparse_image_warp(warp_input, src_pts, dst_pts, regularization_weight = 1e-6, num_boundary_points=0)
        dense_flow = nn.Identity(dense_flow)
        
        # böyle multiple şey döndürmek okay mi
        return images_transformed, images_rendered, ldmark_pred, ldmark_diff
        

In [None]:
with tf.variable_scope('WarpController'):

                        print('-- WarpController')

                        net = encoded
                        warp_input = tf.identity(images_rendered, name='warp_input')

                        net = slim.flatten(net)

                        net = slim.fully_connected(net, 128, scope='fc1')
                        print('module fc1 shape:', [dim.value for dim in net.shape])

                        num_ldmark = 16

                        # Predict the control points
                        ldmark_mean = (np.random.normal(0,50, (num_ldmark,2)) + np.array([[0.5*h,0.5*w]])).flatten()
                        ldmark_mean = tf.Variable(ldmark_mean.astype(np.float32), name='ldmark_mean')
                        print('ldmark_mean shape:', [dim.value for dim in ldmark_mean.shape])

                        ldmark_pred = slim.fully_connected(net, num_ldmark*2, 
                            weights_initializer=tf.truncated_normal_initializer(stddev=1.0),
                            normalizer_fn=None, activation_fn=None, biases_initializer=None, scope='fc_ldmark')
                        ldmark_pred = ldmark_pred + ldmark_mean
                        print('ldmark_pred shape:', [dim.value for dim in ldmark_pred.shape])
                        ldmark_pred = tf.identity(ldmark_pred, name='ldmark_pred')
                 

                        # Predict the displacements
                        ldmark_diff = slim.fully_connected(net, num_ldmark*2, 
                            normalizer_fn=None,  activation_fn=None, scope='fc_diff')
                        print('ldmark_diff shape:', [dim.value for dim in ldmark_diff.shape])
                        ldmark_diff = tf.identity(ldmark_diff, name='ldmark_diff')
                        ldmark_diff = tf.identity(tf.reshape(scales,[-1,1]) * ldmark_diff, name='ldmark_diff_scaled')



                        src_pts = tf.reshape(ldmark_pred, [-1, num_ldmark ,2])
                        dst_pts = tf.reshape(ldmark_pred + ldmark_diff, [-1, num_ldmark, 2])

                        diff_norm = tf.reduce_mean(tf.norm(src_pts-dst_pts, axis=[1,2]))
                        # tf.summary.scalar('diff_norm', diff_norm)
                        # tf.summary.scalar('mark', ldmark_pred[0,0])

                        images_transformed, dense_flow = sparse_image_warp(warp_input, src_pts, dst_pts,
                                regularization_weight = 1e-6, num_boundary_points=0)
                        dense_flow = tf.identity(dense_flow, name='dense_flow')

                return images_transformed, images_rendered, ldmark_pred, ldmark_diff

In [None]:
def decoder(encoded, scales, styles, texture_only=False, style_size=8, image_size=(112,112),
        keep_prob=1.0, phase_train=True, weight_decay=0.0, reuse=None, scope='Decoder'):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected],
                        activation_fn=tf.nn.relu,
                        # weights_initializer=tf.contrib.layers.xavier_initializer(),
                        weights_initializer=tf.contrib.layers.variance_scaling_initializer(),
                        weights_regularizer=slim.l2_regularizer(weight_decay)):
            with slim.arg_scope([slim.dropout, slim.batch_norm], is_training=phase_train):
                with slim.arg_scope([slim.fully_connected],
                    normalizer_fn=layer_norm, normalizer_params=None):
                    print('{} input shape:'.format(scope), [dim.value for dim in encoded.shape])
                        
                    batch_size = tf.shape(encoded)[0]
                    h, w = tuple(image_size)
                    k = 64
    
                    with tf.variable_scope('StyleController'):

                        if styles is None:
                            styles = tf.random_normal((batch_size, style_size))

                        net = tf.identity(styles, name='input_style')

                        net = slim.fully_connected(net, 128, scope='fc2')
                        print('module fc2 shape:', [dim.value for dim in net.shape])

                        net = slim.fully_connected(net, 128, scope='fc3')
                        print('module fc3 shape:', [dim.value for dim in net.shape])

                        gamma = slim.fully_connected(net, 4*k, activation_fn=None, normalizer_fn=None, scope='fc4')
                        gamma = tf.reshape(gamma, [-1, 1, 1, 4*k], name='gamma')
                        print('gamma shape:', [dim.value for dim in gamma.shape])

                        beta = slim.fully_connected(net, 4*k, activation_fn=None, normalizer_fn=None, scope='fc5')
                        beta = tf.reshape(beta, [-1, 1, 1, 4*k], name='beta')
                        print('beta shape:', [dim.value for dim in beta.shape])


                    
                    with tf.variable_scope('Decoder'):
                        print('-- Decoder')
                        net = encoded

                        adain = lambda x : gamma * instance_norm(x, center=False, scale=False) + beta

                        with slim.arg_scope([slim.conv2d_transpose, slim.conv2d],
                                    normalizer_fn=adain, normalizer_params=None):
                            for i in range(3):
                                print("net_ shape ", net.shape)
                                net_ = conv(net, 4*k, 3, scope='res{}_0'.format(i))
                                print('_net module res{} shape:'.format(i), [dim.value for dim in net_.shape])
                                net += conv(net_, 4*k, 3, activation_fn=None, biases_initializer=None, scope='res{}_1'.format(i))
                                print('net module res{} shape:'.format(i), [dim.value for dim in net.shape])

               
                        with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected],
                                normalizer_fn=layer_norm, normalizer_params=None):
                            net = upscale2d(net, 2)
                            net = conv(net, 2*k, 5, pad=2, scope='deconv1_1')
                            print('module deconv1 shape:', [dim.value for dim in net.shape])

                            net = upscale2d(net, 2)
                            net = conv(net, k, 5, pad=2, scope='deconv2_1')

                        net = conv(net, 3, 7, pad=3, activation_fn=None, normalizer_fn=None, 
                                    weights_initializer=tf.constant_initializer(0.0), scope='conv_image')
                        images_rendered = tf.nn.tanh(net, name='images_rendered')
                        print('images_rendered shape:', [dim.value for dim in images_rendered.shape])

                    if texture_only:
                        return images_rendered                        

                    with tf.variable_scope('WarpController'):

                        print('-- WarpController')

                        net = encoded
                        warp_input = tf.identity(images_rendered, name='warp_input')

                        net = slim.flatten(net)

                        net = slim.fully_connected(net, 128, scope='fc1')
                        print('module fc1 shape:', [dim.value for dim in net.shape])

                        num_ldmark = 16

                        # Predict the control points
                        ldmark_mean = (np.random.normal(0,50, (num_ldmark,2)) + np.array([[0.5*h,0.5*w]])).flatten()
                        ldmark_mean = tf.Variable(ldmark_mean.astype(np.float32), name='ldmark_mean')
                        print('ldmark_mean shape:', [dim.value for dim in ldmark_mean.shape])

                        ldmark_pred = slim.fully_connected(net, num_ldmark*2, 
                            weights_initializer=tf.truncated_normal_initializer(stddev=1.0),
                            normalizer_fn=None, activation_fn=None, biases_initializer=None, scope='fc_ldmark')
                        ldmark_pred = ldmark_pred + ldmark_mean
                        print('ldmark_pred shape:', [dim.value for dim in ldmark_pred.shape])
                        ldmark_pred = tf.identity(ldmark_pred, name='ldmark_pred')
                 

                        # Predict the displacements
                        ldmark_diff = slim.fully_connected(net, num_ldmark*2, 
                            normalizer_fn=None,  activation_fn=None, scope='fc_diff')
                        print('ldmark_diff shape:', [dim.value for dim in ldmark_diff.shape])
                        ldmark_diff = tf.identity(ldmark_diff, name='ldmark_diff')
                        ldmark_diff = tf.identity(tf.reshape(scales,[-1,1]) * ldmark_diff, name='ldmark_diff_scaled')



                        src_pts = tf.reshape(ldmark_pred, [-1, num_ldmark ,2])
                        dst_pts = tf.reshape(ldmark_pred + ldmark_diff, [-1, num_ldmark, 2])

                        diff_norm = tf.reduce_mean(tf.norm(src_pts-dst_pts, axis=[1,2]))
                        # tf.summary.scalar('diff_norm', diff_norm)
                        # tf.summary.scalar('mark', ldmark_pred[0,0])

                        images_transformed, dense_flow = sparse_image_warp(warp_input, src_pts, dst_pts,
                                regularization_weight = 1e-6, num_boundary_points=0)
                        dense_flow = tf.identity(dense_flow, name='dense_flow')

                return images_transformed, images_rendered, ldmark_pred, ldmark_diff


In [16]:
 decoder(tf.constant(np.ones((10,256, 256, 3)), dtype=tf.float64), None, tf.constant(np.ones((10, 8))))

TypeError: ignored