In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
from tensorflow import keras

# model will infer the input shape based on the call

# we do not know the input shape
model = keras.Sequential()

# NOTE: Whenever PyTorch uses padding (i.e. padding = (3,3)), you need to use keras.layers.ZeroPadding2D(padding=(3,3)) to replicate those effects

### BASE NETWORK

model.add(keras.layers.Conv2D(filters=64, kernel_size=(7, 7), strides=(2, 2), padding="same", use_bias=False)) # conv1
model.add(keras.layers.BatchNormalization(axis=3, momentum=0.1, epsilon=1e-05, center=True, scale=True)) # bn1
model.add(keras.layers.Activation('relu')) # relu
model.add(keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2), padding="same", data_format="channels_last")) # maxpool
model.add(keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', use_bias=False)) # conv2
model.add(keras.layers.BatchNormalization(axis=3, momentum=0.1, epsilon=1e-05, center=True, scale=True)) # bn2

### LAYER 2 DOWNSAMPLE

model.add(keras.layers.Conv2D(filters=128, kernel_size=(1, 1), strides=(2, 2), padding='valid', use_bias=False)) # layer_2_zero
model.add(keras.layers.BatchNormalization(axis=3, momentum=0.1, epsilon=1e-05, center=True, scale=True)) # layer_2_one

### LAYER 3 DOWNSAMPLE

model.add(keras.layers.Conv2D(filters=256, kernel_size=(1, 1), strides=(2, 2), padding='valid', use_bias=False)) # layer_3_zero
model.add(keras.layers.BatchNormalization(axis=-1, momentum=0.1, epsilon=1e-05, center=True, scale=True)) # layer_3_one

### LAYER 4 DOWNSAMPLE

model.add(keras.layers.Conv2D(filters=512, kernel_size=(1, 1), strides=(2, 2), padding='valid', use_bias=False)) # layer_4_zero
model.add(keras.layers.BatchNormalization(axis=-1, momentum=0.1, epsilon=1e-05, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones')) # layer_4_zero

### DEC_C4 - UP

model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same') # dec_c4_up_zero
model.add(BatchNormalization(axis=-1, momentum=0.1, epsilon=1e-05, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones')) # dec_c4_up_one
model.add(Activation('relu')) # dec_c4_up_two

### DEC_C4 - CAT_CONV

model.add(Conv2D(256, kernel_size=(1, 1), strides=(1, 1), padding='valid', use_bias=True)) # dec_c4_cat_conv_zero
model.add(BatchNormalization(epsilon=1e-05, momentum=0.1, center=True, scale=True)) # dec_c4_cat_conv_one
model.add(ReLU()) # dec_c4_cat_conv_two

### HM

model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same') # hm_zero
model.add(ReLU()) # hm_one
model.add(Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), padding='valid', use_bias=True)) # hm_two

### REG

model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', use_bias=True)) # reg_zero
model.add(ReLU()) # reg_one
model.add(Conv2D(filters=2, kernel_size=(1, 1), strides=(1, 1), use_bias=True)) # reg_two

### WH

model.add(Conv2D(filters=256, kernel_size=(7, 7), strides=(1, 1), padding='same', activation='relu', use_bias=True)) # wh_zero
model.add(ReLU()) # wh_one
model.add(Conv2D(filters=8, kernel_size=(7, 7), strides=(1, 1), padding='same', activation=None)) # wh_two

print(model.summary())

# BatchNormalization weights are two dimensional while in Conv2D the weights are four dimensional.
# ReLU does not have any weights, if the numbers are less than zero it makes them zero, if the numbers are greater than zero it does not touch them
# whenever a layer gets trained, it does a lot of math, and the weights are the coefficinets to the math being done
# if the weights are not copied over then the results are going to be completely different

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 1, 1, 64)          200704    
                                                                 
 batch_normalization_2 (Batc  (None, 1, 1, 64)         256       
 hNormalization)                                                 
                                                                 
 activation_2 (Activation)   (None, 1, 1, 64)          0         
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 1, 1, 64)         0         
 2D)                                                             
                                                                 
Total params: 200,960
Trainable params: 200,832
Non-trainable params: 128
_________________________________________________________________
None


In [None]:
%pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.2-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.2


In [None]:
import torch.nn as nn
from torchinfo import summary

class NeuralNet(nn.Module):
  def __init__(self):
    super(NeuralNet, self).__init__()

    ### BASE NETWORK

    self.conv1 = nn.Conv2d(64, 64, kernel_size=(7, 7), stride=(2, 2), padding = (3, 3), bias = False)
    self.bn1 = nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    self.conv2 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    self.bn2 = BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

    ### LAYER 2 DOWNSAMPLE

    self.layer_2_zero = nn.Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
    self.layer_2_one = BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

    ### LAYER 3 DOWNSAMPLE

    self.layer_3_zero = nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
    self.layer_3_one = BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

    ### LAYER 4 DOWNSAMPLE

    self.layer_4_zero = nn.Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
    self.layer_4_one = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

    ### DEC_C4 - UP

    self.dec_c4_up_zero = Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    self.dec_c4_up_one = BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.dec_c4_up_two = ReLU(inplace)

    ### DEC_C4 - CAT_CONV

    self.dec_c4_cat_conv_zero = Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    self.dec_c4_cat_conv_one = BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    self.dec_c4_cat_conv_two = ReLU(inplace)

    ### HM

    self.hm_zero = Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    self.hm_one = ReLU(inplace)
    self.hm_two = Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1))

    ### REG

    self.reg_zero = Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    self.reg_one = ReLU(inplace)
    self.reg_two = Conv2d(256, 2, kernel_size=(1, 1), stride=(1, 1))

    ### WH

    self.wh_zero = Conv2d(64, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    self.wh_one = ReLU(inplace)
    self.wh_two = Conv2d(256, 8, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))

model = NeuralNet()
summary(model)

Layer (type:depth-idx)                   Param #
NeuralNet                                --
├─Conv2d: 1-1                            200,704
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
Total params: 200,832
Trainable params: 200,832
Non-trainable params: 0

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from tensorflow import keras

def test_torch_keras_conversion(torch_layers, keras_layers, input_shape):
  pytorch_model = nn.Sequential(*torch_layers)
  pytorch_model.eval()
  keras_model = keras.Sequential(keras_layers)

  # Setup input
  input_np = np.random.uniform(0, 1, input_shape)
  input_var = Variable(torch.FloatTensor(input_np).permute(0, 3, 1, 2)) # PyTorch's input needs to be reshaped to channels first

  print(f"KERAS INPUT SHAPE: {input_np.shape}")
  print(f"TORCH INPUT SHAPE: {input_var.shape}")

  # Make predictions:
  keras_model_output = keras_model.predict(input_np)
  keras_weights = keras_model.get_weights()

  # Very hacky code for making sure the weights get transferred properly for the different layers
  # Will not work if there are multiple layers in torch_layers / keras_layers (excluding ZeroPadding2D)
  if len(keras_weights) > 0:
    contains_conv2d = any(isinstance(layer, keras.layers.Conv2D) for layer in keras_layers)
    contains_batchnorm = any(isinstance(layer, keras.layers.BatchNormalization) for layer in keras_layers)
    if contains_conv2d:
      # Transpose weights if conv2d
      pytorch_model[0].weight.data = torch.from_numpy(np.transpose(keras_weights[0], [3, 2, 0, 1]))

    if contains_batchnorm:
      pytorch_model[0].weight.data = torch.from_numpy(keras_weights[0])
      pytorch_model[0].bias.data = torch.from_numpy(keras_weights[1])
      pytorch_model[0].running_mean.data = torch.from_numpy(keras_weights[2])
      pytorch_model[0].running_var.data = torch.from_numpy(keras_weights[3])

  pytorch_model_output = pytorch_model(input_var).permute(0, 2, 3, 1).data.numpy() # Reshape back to channels last for comparison

  print(pytorch_model_output.shape)
  print(keras_model_output.shape)
  error = np.max(pytorch_model_output - keras_model_output)
  print(f"Error: {error} is less than 1-e5? {error < 1e-5}")

In [None]:
# CONV2D Test
test_torch_keras_conversion(
    [nn.Conv2d(in_channels = 3, out_channels = 10, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias = False)],
    [keras.layers.ZeroPadding2D(padding=(3,3)), keras.layers.Conv2D(filters=10, kernel_size=(7, 7), strides=(2, 2), use_bias=False)],
    (1, 64, 64, 3)
)

KERAS INPUT SHAPE: (1, 64, 64, 3)
TORCH INPUT SHAPE: torch.Size([1, 3, 64, 64])
(1, 32, 32, 10)
(1, 32, 32, 10)
Error: 0.0 is less than 1-e5? True


In [None]:
# BATCHNORM TEST
test_torch_keras_conversion(
    [nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)],
    [keras.layers.BatchNormalization(axis=3, momentum=0.1, epsilon=1e-05, center=True, scale=True)],
    (1, 32, 32, 64)
)

KERAS INPUT SHAPE: (1, 32, 32, 64)
TORCH INPUT SHAPE: torch.Size([1, 64, 32, 32])
Error: 5.960464477539063e-08 is less than 1-e5? True


In [None]:
# RELU TEST
test_torch_keras_conversion(
    [nn.ReLU(inplace=True)],
    [keras.layers.ReLU()],
    (1, 32, 32, 64)
)

KERAS INPUT SHAPE: (1, 32, 32, 64)
TORCH INPUT SHAPE: torch.Size([1, 64, 32, 32])
Error: 0.0 is less than 1-e5? True


In [None]:
# MAXPOOL TEST
test_torch_keras_conversion(
    [nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)],
    [keras.layers.ZeroPadding2D(padding=1), keras.layers.MaxPool2D(pool_size=3, strides=2)],
    (1, 32, 32, 64)
)

KERAS INPUT SHAPE: (1, 32, 32, 64)
TORCH INPUT SHAPE: torch.Size([1, 64, 32, 32])
Error: 0.0 is less than 1e5? True


In [None]:
# Testing individual layer outputs (moved to function above to test other types of layers)
import torch
import torch.nn as nn
import numpy as np
from tensorflow import keras
from torch.autograd import Variable
from torchinfo import summary

TORCH_INPUT_SHAPE = (1, 3, 32, 32) # (BATCH, CHANNEL, WIDTH, HEIGHT)
KERAS_INPUT_SHAPE = (1, 32, 32, 3) # (BATCH, WIDTH, HEIGHT, CHANNEL)

# Create models
pytorch_model = nn.Sequential(nn.Conv2d(in_channels = 3, out_channels = 10, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias = False))
pytorch_model.eval()
keras_model = keras.Sequential()
keras_model.add(keras.layers.ZeroPadding2D(padding=(3,3)))
keras_model.add(keras.layers.Conv2D(filters=10, kernel_size=(7, 7), strides=(2, 2), use_bias=False))

# Setup input
input_np = np.random.uniform(0, 1, KERAS_INPUT_SHAPE)
input_var = Variable(torch.FloatTensor(input_np).permute(0, 3, 1, 2)) # PyTorch's input needs to be reshaped to channels first

# Make predictions
keras_model_output = keras_model.predict(input_np)
keras_weights = keras_model.get_weights()
pytorch_model[0].weight.data = torch.from_numpy(np.transpose(keras_weights[0], [3, 2, 0, 1])) # copy weights from keras to pytorch
pytorch_model_output = pytorch_model(input_var).permute(0, 2, 3, 1).data.numpy() # Reshape back to channels last for comparison

print(f"TORCH OUTPUT SHAPE: {pytorch_model_output.shape}")
print(f"KERAS OUTPUT SHAPE: {keras_model_output.shape}")

error = np.max(pytorch_model_output - keras_model_output)
print(f"Error: {error}")

print("\nTORCH MODEL:")
print(summary(pytorch_model))
print("\nKERAS MODEL: ")
print(keras_model.summary())

In [None]:
import torch.nn as nn
import torch
from .model_parts import CombinationModule

class DecNet(nn.Module):
    def __init__(self, heads, final_kernel, head_conv, channel):
        super(DecNet, self).__init__()
        self.dec_c2 = CombinationModule(128, 64, batch_norm=True)
        self.dec_c3 = CombinationModule(256, 128, batch_norm=True)
        self.dec_c4 = CombinationModule(512, 256, batch_norm=True)
        self.heads = heads
        for head in self.heads:
            classes = self.heads[head]
            if head == 'wh':
                fc = nn.Sequential(nn.Conv2d(channel, head_conv, kernel_size=7, padding=7//2, bias=True),
                                   nn.ReLU(inplace=True),
                                   nn.Conv2d(head_conv, classes, kernel_size=7, padding=7 // 2, bias=True))
            else:
                fc = nn.Sequential(nn.Conv2d(channel, head_conv, kernel_size=3, padding=1, bias=True),
                                   nn.ReLU(inplace=True),
                                   nn.Conv2d(head_conv, classes, kernel_size=final_kernel, stride=1,
                                             padding=final_kernel // 2, bias=True))
            if 'hm' in head:
                fc[-1].bias.data.fill_(-2.19)
            else:
                self.fill_fc_weights(fc)

            self.__setattr__(head, fc)


    def fill_fc_weights(self, layers):
        for m in layers.modules():
            if isinstance(m, nn.Conv2d):
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)


    def forward(self, x):
        c4_combine = self.dec_c4(x[-1], x[-2])
        c3_combine = self.dec_c3(c4_combine, x[-3])
        c2_combine = self.dec_c2(c3_combine, x[-4])
        dec_dict = {}
        for head in self.heads:
            dec_dict[head] = self.__getattr__(head)(c2_combine)
            if 'hm' in head:
                dec_dict[head] = torch.sigmoid(dec_dict[head])
        return dec_dict

In [None]:
from tensorflow.python.ops.gen_batch_ops import Batch
from keras.layers import Conv2D, BatchNormalization, ReLU, Concatenate, UpSampling2D
from tensorflow import keras

# Define CombinationModule
class CombinationModule(keras.layers.Layer):
  def __init__(self, filters, **kwargs):
    super(CombinationModule, self).__init__(**kwargs)

    self.up = keras.models.Sequential([
        Conv2D(filters-filters, kernel_size=3, strides=1, padding="same"),
        BatchNormalization(),
        ReLU()
    ])

    self.cat_conv = keras.models.Sequential([
        Conv2D(filters=filters, kernel_size=1, strides=1),
        BatchNormalization(),
        ReLU()
    ])

  def call(self, inputs):
    x, encoder_output = inputs

    # Upsample x
    x = keras.layers.UpSampling2D(size=(2,2))(x)
    x = self.up(x)

    # Concatenate x and encoder.output
    x = Concatenate([x, encoder_output], axis=-1)

    # Apply 1x1 convolution
    x = self.cat_conv(x)

    return x