In [None]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

import keras
import keras_nlp
import tensorflow as tf
import tensorflow.data as tf_data
import tensorflow.strings as tf_strings
import tensorflow.io as tf_io

keras.mixed_precision.set_global_policy("mixed_float16")

In [None]:
# train on TPU if appropriate
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)
print("GPUS: ", tf.config.list_physical_devices('GPU'))


# Check GPU availability
print("GPU Available:", tf.test.is_gpu_available())

# Check TPU availability
tpu_available = False
devices = tf.config.list_logical_devices()
for device in devices:
    if device.device_type == 'TPU':
        tpu_available = True
        break

print("TPU Available:", tpu_available)


---
**Define model constants**

In [None]:
# Data
BATCH_SIZE = 64 # Batch size we train on
SEQ_LEN = 512  # Length of training sequences

# Training
EPOCHS = 40

---
**Define the dataset as strings of full recipes**

To keep training managable for a laptop, we load the dataset into a tensorflow dataset object. This allows us to load data into memory as needed, opposed to all at once.

In [None]:
def csv_row_to_json(row):
    row = tf_io.decode_csv(records=row, record_defaults=[tf.constant([],dtype=tf.string)] * 7)

    title = row[1]
    ingredients = row[2]
    directions = row[3]
    ner = row[6]

    return tf_strings.join([
        '{"ner": ', ner, ', ',
        '"title": "', title, '", ',
        '"ingredients": ', ingredients, ', ',
        '"directions": ', directions, '}',
    ])


dataset = (
    tf_data.TextLineDataset("RecipeNLG/RecipeNLG_dataset.csv") # load the csv file line by line
    # tf_data.TextLineDataset("/kaggle/input/recipenlg/RecipeNLG_dataset.csv") # load inside kaggle notebook
    .skip(1) # skip the header row
    .shuffle(buffer_size=256) # store 256 shuffled records in memory at a time before reshuffling and refetching
    .map(lambda row: csv_row_to_json(row)) # map each row of the csv to a jsonified recipe
    # .ignore_errors() # ignore any errors in the csv file
    .apply(tf.data.experimental.ignore_errors()) # ignore any errors in the csv file
    .batch(BATCH_SIZE) # batch the dataset to train on multiple records at once
    .prefetch(tf.data.AUTOTUNE)
)

---
**Load the pretrained model**

In [None]:
preprocessor = keras_nlp.models.GPT2CausalLMPreprocessor.from_preset(
    "gpt2_base_en",
    sequence_length=SEQ_LEN,
)
gpt2_lm = keras_nlp.models.GPT2CausalLM.from_preset(
    "gpt2_base_en", preprocessor=preprocessor
)

---
**Define a text generator callback**

In [None]:
class TextGenerator(keras.callbacks.Callback):
    def __init__(self, k):
        self.prompt = '{"ner": ["tomatoes", "garlic", "pasta", "olive oli",',

    def on_epoch_end(self, epoch, logs=None):
        txt = gpt2_lm.generate(self.prompt)
        print(f"Top-K search generated text: \n{txt}\n")

---
**Finetune the model**

In [None]:
learning_rate = keras.optimizers.schedules.PolynomialDecay(
    5e-5,
    decay_steps=dataset.cardinality().numpy() * EPOCHS,
    end_learning_rate=0.0,
)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

gpt2_lm.compile(
    optimizer=keras.optimizers.Adam(learning_rate),
    loss=loss,
    weighted_metrics=["accuracy"],
)

checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/transfer_learning_cp_{epoch:02d}.keras',
    save_best_only=False,
)
text_generation_callback = TextGenerator(k=10)

callbacks = [
    checkpoint_callback,
    text_generation_callback,
]

gpt2_lm.fit(
    dataset, 
    epochs=EPOCHS,
    callbacks=callbacks,
)