In [3]:
%load_ext autoreload
%autoreload 2

import os

os.chdir('..') # run once

In [4]:
from src.dataset_utils import DatasetLoader
from src.retrieval_model import FullyConnected
import tensorflow as tf
import numpy as np

In [3]:
loader = DatasetLoader("./data/flickr30k_vgg_image_feat_test.mat", "./data/flickr30k_text_feat_test.mat")

Loading image features from ./data/flickr30k_vgg_image_feat_test.mat
Loaded image feature shape: (1000, 4096)
Loading sentence features from ./data/flickr30k_text_feat_test.mat
Loaded sentence feature shape: (5000, 6000)


In [10]:
inputs = np.random.rand(500, 4096)
outputs = np.random.rand(500 * 2, 6000)
labels = np.random.rand(500 * 2, 500)

In [7]:
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Model


def fully_connected(hidden=2048, dropout=0.25):
    def _inner(in_layer):
        x = layers.Dense(hidden)(in_layer)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)
        x = layers.Dropout(dropout)(x)
        return x
    return _inner


def embedding_model(img_feats, sent_feats):
    img_in = layers.Input(shape=img_feats)
    sent_in = layers.Input(shape=sent_feats)

    img_fc = fully_connected()(img_in)
    img_fc2 = layers.Dense(512)(img_fc)
    img_embedded = tf.nn.l2_normalize(img_fc2,1, 1e-10)

    sent_fc = fully_connected()(sent_in)
    sent_fc2 = layers.Dense(512)(sent_fc)
    sent_embedded = tf.nn.l2_normalize(sent_fc2,1, 1e-10)

    model = Model(inputs=[img_in, sent_in], outputs=[img_embedded, sent_embedded])
    return model


In [8]:
m = embedding_model((4096,), (6000,))

In [9]:
m.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 4096)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 6000)]       0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 2048)         8390656     input_1[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 2048)         12290048    input_2[0][0]                    
______________________________________________________________________________________________