In [1]:
import os
import numpy as np 
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate , LeakyReLU, BatchNormalization , ReLU , Embedding , LayerNormalization, MultiHeadAttention, Add
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
# from keras.engine import Layer, InputSpec
# from tensorflow.keras.engine.topology import Layer
import keras.backend as K
import pandas as pd
import numpy as np
import ast
import cv2
# import imutils
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
data_path =       'D:/learn/de cuong/code/split/data_generated/image/'
source_xml_path = 'D:/learn/de cuong/code/split/data_pubtables_1M/PubTables-1M-Structure_Annotations_Test/'
source_word_path ='D:/learn/de cuong/code/split/data_pubtables_1M/PubTables-1M-Structure_Table_Words/'


saved_txt_row_path ="D:/learn/de cuong/code/merge/data_merge_cell/row/"
saved_txt_col_path ="D:/learn/de cuong/code/merge/data_merge_cell/col/"

In [3]:
def convert_String_to_array(string_array):
    nested_list = ast.literal_eval(string_array)

    return nested_list

In [4]:
def identity_block(input_tensor, kernel_size, filter_num, stage):
    x = layers.Conv2D(filter_num, (1,1))(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Conv2D(filter_num, kernel_size, padding ='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.add([x, input_tensor])
    x = layers.Activation('relu')(x)
    return x 

In [5]:
def conv_block(input_tensor,kernel_size,filter_num,stage,strides=(2, 2)):
    x = layers.Conv2D(filter_num, (1, 1), strides=strides)(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Conv2D(filter_num, kernel_size, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    shortcut = layers.Conv2D(filter_num, (1, 1), strides=strides)(input_tensor)
    shortcut = layers.BatchNormalization()(shortcut)

    x = layers.add([x, shortcut])
    x = layers.Activation('relu')(x)

    return x 

In [6]:
def res_net_18(input_tensor):
    #input_tensor = Input(shape=(height, width, depth))

    x = layers.Conv2D(64, (7, 7),strides=(2, 2),padding='same',name='input')(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding = 'same')(x)

    x = conv_block(x, 3, 64, stage=2, strides=(1, 1))
    c2 = identity_block(x, 3, 64, stage=2)

    x = conv_block(c2, 3, 128, stage=3)
    c3 = identity_block(x, 3, 128, stage=3)

    x = conv_block(c3, 3, 256, stage=4)
    c4 = identity_block(x, 3, 256, stage=4)

    x = conv_block(c4, 3, 512, stage=5)
    c5 = identity_block(x, 3, 512, stage=5)

    #model = Model(inputs=input_tensor , outputs = x)
    return c2, c3 , c4 ,c5

In [7]:
def lateral(x, out_channels):
    x = layers.Conv2D(out_channels, (1, 1), padding='same')(x)
    return  x
def upsampling_add(x,y):
    b,h,w,c = y.shape
    x = tf.image.resize(x, (h,w), method='bilinear')
    return layers.add([x,y])
def smooth(x):
    x = Conv2D(256, kernel_size = 3, strides = 1, padding ='same')(x)
    return x

In [8]:
def transformer_encoder(inputs, num_heads, mlp_dim, dropout_rate):
    # Layer normalization 1
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    # Multi-head attention
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=inputs.shape[-1], dropout=dropout_rate)(x, x)
    # Skip connection 1
    x = layers.Add()([x, inputs])

    # Layer normalization 2
    y = layers.LayerNormalization(epsilon=1e-6)(x)
    # MLP (feed-forward network)
    y = layers.Dense(mlp_dim, activation=tf.nn.gelu)(y)
    y = layers.Dropout(dropout_rate)(y)
    y = layers.Dense(inputs.shape[-1])(y)
    # Skip connection 2
    return layers.Add()([y, x])

In [9]:
data_col = pd.read_csv('data_col.csv')
data_row = pd.read_csv('data_row.csv')


In [14]:
class Roi_Pooling(layers.Layer):
    def __init__(self, pool_size, **kwargs):
        super(Roi_Pooling, self).__init__(**kwargs)
        self.pool_size = pool_size
    def call(self, feature_map, rois):
        """
        feature _map  = [b,h,w,c]
        rois = [x1 ,y1, x2, y2]

        """
        b,h,w,c = feature_map.get_shape().as_list()
        batch_indices = tf.zeros(shape=tf.shape(rois)[0], dtype= tf.int32)
        boxes = rois[:,1:]
        pooles_feature = tf.image.crop_and_resize(feature_map, boxes, box_indices = batch_indices, crop_size=[self.pool_size[0], self.pool_size[1]] )

        return pooles_feature

    def get_config(self):
        config = super().get_config()
        config.update({'pool_size' : self.pool_size})
        return config

In [11]:
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches = 2500, projection_dims = 512,**kwargs ):
        super(PatchEncoder, self).__init__(**kwargs)
        self.num_patches = num_patches
        self.num_dims  = projection_dims

        self.x_positional_embeddings = Embedding(input_dim = num_patches, output_dim = projection_dims)
        self.y_positional_embeddings = Embedding(input_dim = num_patches, output_dim = projection_dims)

    def call(self, x):
        """
        x: input tensor after roi pooling ( shape = (1, 2500, 512))
        position embedding: (shape = (50, 512))
        """
        encoded_list = []
        x_position = tf.range(0,limit=self.num_patches, delta=1)
        y_position = tf.range(0,limit=self.num_patches, delta=1)

        for i in range(self.num_patches):
            x_idx = int(i%50)
            y_idx = int(i//50)
            encoded = self.x_positional_embeddings(x_position[x_idx]) + self.y_positional_embeddings(y_position[y_idx]) + x[0,i,:]  # x[i] is the patch embedding
            encoded_list.append(tf.expand_dims(encoded, axis=0))
        encoded_full = K.concatenate(encoded_list, axis=0)
        return encoded_full

    def get_config(self):
        config = super().get_config()
        config.update({
            "num_paches": self.num_patches,
            "d": self.num_dims,
        })
        return config


In [12]:
def mlp(x):
    x = Dense(3072, activation="gelu")(x)
    x = Dropout(0.1)(x)
    x = Dense(512)(x)
    x = Dropout(0.1)(x)
    return x
def transformer_encoder(x):
    skip_1 = x
    x = LayerNormalization()(x)
    x = MultiHeadAttention(
        num_heads=10, key_dim=512
    )(x, x)
    x = Add()([x, skip_1])

    skip_2 = x
    x = LayerNormalization()(x)
    x = mlp(x)
    x = Add()([x, skip_2])

    return x

In [20]:
INPUT_SHAPE =(1024,1024,3)
input_tensor = Input(shape=(INPUT_SHAPE), batch_size= 1 )
input_cell_box = Input(shape=(5), batch_size= 1)  
c2, c3 , c4 ,c5  = res_net_18(input_tensor)

m5 = lateral(c5, 256)

m4  = upsampling_add(m5,lateral(c4,256) )
m3  = upsampling_add(m4,lateral(c3,256) )
m2  = upsampling_add(m3,lateral(c2,256) )

p5 = smooth(m5)
p4 = smooth(m4)
p3 = smooth(m3)
p2 = smooth(m2)

p2_2 = Conv2D(128, kernel_size = 3, strides = 1, padding ='same', name = "p2_2")(p2)

crops = Roi_Pooling(pool_size=(7,7))(p2_2, input_cell_box)

#position embedded ing
embedded_patches= tf.keras.layers.Flatten()(crops)

embedded_patches= layers.Dense(512)(embedded_patches)
embedded_patches = layers.Activation('ReLU')(embedded_patches)
embedded_patches= layers.Dense(512)(embedded_patches)

# encoded = PatchEncoder()(embedded_patches) # 50*50 patches, each patch has 512 dims
# encoded = tf.expand_dims(encoded, axis = 0)

# cls_token = tf.zeros((1, 1, 512))

# x = tf.concat([cls_token, encoded], axis = 1)
# print("here")
# for _ in range(10):
#     x = transformer_encoder(x)
# x = LayerNormalization()(x) 
# x = x[:, 0, :] 
# x = Dropout(0.1)(x)
# x = Dense(50*50, activation="softmax")(x)

# x_sub = Dense(50*50, activation="softmax")(x)
# x_sub = Dropout(0.1)(x_sub)
# row = Dense(50*50, activation="softmax" , name = "out_row")(x_sub)
# col = Dense(50*50, activation="softmax" , name = "out_col")(x_sub)

input_ = [input_tensor, input_cell_box]
# out = [row, col]

model = Model(inputs = input_, outputs = embedded_patches)

In [19]:
crops.shape

TensorShape([1, 7, 7, 128])

In [32]:
model.compile(loss= {'row' : 'binary_crossentropy', 'col' : 'binary_crossentropy'}, optimizer='adam')

In [21]:
model.output_shape

(1, 512)

In [35]:
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_17 (InputLayer)          [(1, 1024, 1024, 3)  0           []                               
                                ]                                                                 
                                                                                                  
 input (Conv2D)                 (1, 512, 512, 64)    9472        ['input_17[0][0]']               
                                                                                                  
 batch_normalization_168 (Batch  (1, 512, 512, 64)   256         ['input[0][0]']                  
 Normalization)                                                                                   
                                                                                            

In [37]:
# from tensorflow.keras.utils import plot_model
# plot_model(model, to_file='model.png', show_shapes=True)