In [None]:
import tensorflow as tf
from tensorflow.keras import models, Model, mixed_precision
from tensorflow.keras.layers import *
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow_hub as hub
import tensorflow_text as text
import numpy as np
import pandas as pd
import json
import os
import re

physical_devices = tf.config.experimental.list_physical_devices('GPU')
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
mixed_precision.set_global_policy('mixed_float16')
print(f'Running on Python {sys.version}, Tensorflow {tf.__version__}.')

In [None]:
# Load data
with open('data.json', 'r') as f:
    data = json.load(f)

text_data = []
for i in data:
    text_data.append(i['title'])
    text_data.append(i['desc'])
title_data = data['title']
# remove all items where more than 50% of the text is not english

In [None]:
# model
title_input = Input(shape=(), name='title_input', dtype=tf.string)
category_input = Input(shape=(), name='category_input', dtype=tf.string)
img_input = Input(shape=(224, 224, 3), name='img_input', dtype=tf.float32)


preprocessing_layer = hub.KerasLayer('https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3')
title_embeddings = preprocessing_layer(title_input)
category_embeddings = preprocessing_layer(category_input)
# extract image features

x = layers.concatenate([title_embeddings, category_embeddings, img_features])
description_output = Dense(vocab_size, name='description_output')(x)

model = Model(inputs=[title_input, category_input, img_input], outputs=[description_output])
plot_model(model, "model.png", show_shapes=True)
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=[tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)],
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0, patience=13, verbose=1,
                                     mode='auto', baseline=None, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=10, verbose=1)
]
history = model.fit({"title_input": title, "category_input": category, "img_input": img}, {"description_output": description},
                    batch_size=32, epochs=10, callbacks=callbacks, use_multiprocessing=True, verbose=1)