In [None]:
!pip install tensorflow tensorflow-datasets transformers

import tensorflow_datasets as tfds

# Load the Xsum dataset from Huggingface
dataset = tfds.load('xsum', split='train', shuffle_files=True)

In [None]:
from transformers import AutoTokenizer

# Load the Huggingface tokenizer
tokenizer = AutoTokenizer.from_pretrained('google/pegasus-xsum')

# Define a function to preprocess the input and output text
def preprocess_data(example):
    input_text = example['document'].numpy().decode('utf-8')
    output_text = example['summary'].numpy().decode('utf-8')
    inputs = tokenizer.encode_plus(input_text, max_length=512, truncation=True, padding='max_length', return_tensors='tf')
    outputs = tokenizer.encode_plus(output_text, max_length=128, truncation=True, padding='max_length', return_tensors='tf')
    return inputs, outputs

# Preprocess the dataset
dataset = dataset.map(preprocess_data)

In [None]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

batch_size = 8

train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.models import Model

# Define the input and output sequence lengths
max_input_length = 512
max_output_length = 128

# Define the LSTM encoder-decoder model
encoder_inputs = Input(shape=(max_input_length,), name='encoder_inputs')
encoder_embedding = Embedding(input_dim=tokenizer.vocab_size, output_dim=128, name='encoder_embedding')(encoder_inputs)
encoder_lstm1 = LSTM(256, return_sequences=True, name='encoder_lstm1')(encoder_embedding)
encoder_lstm2 = LSTM(256, return_sequences=True, name='encoder_lstm2')(encoder_lstm1)
encoder_lstm3 = LSTM(256, name='encoder_lstm3')(encoder_lstm2)

decoder_inputs = Input(shape=(max_output_length,), name='decoder_inputs')
decoder_embedding = Embedding(input_dim=tokenizer.vocab_size, output_dim=128, name='decoder_embedding')(decoder_inputs)
decoder_lstm1 = LSTM(256, return_sequences=True, name='decoder_lstm1')(decoder_embedding, initial_state=[encoder_lstm3, encoder_lstm3])
decoder_lstm2 = LSTM(256, return_sequences=True, name='decoder_lstm2')(decoder_lstm1, initial_state=[encoder_lstm3, encoder_lstm3])
decoder_lstm3 = LSTM(256, return_sequences=True, name='decoder_lstm3')(decoder_lstm2, initial_state=[encoder_lstm3, encoder_lstm3])
decoder_outputs = Dense(tokenizer.vocab_size, activation='softmax', name='decoder_outputs')(decoder_lstm3)

model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs)


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Define the callbacks
checkpoint_path = 'best_model.h5'
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, save_weights_only=False, mode='min', verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, mode='min', verbose=1)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_dataset, validation_data=val_dataset, epochs=20, callbacks=[checkpoint, early_stop, reduce_lr])

In the above code, we define the callbacks to save the best model using ModelCheckpoint, stop the training early if the validation loss does not improve for 5 epochs using EarlyStopping, and reduce the learning rate by a factor of 0.1 if the validation loss does not improve for 2 epochs using ReduceLROnPlateau. We then compile the model with the Adam optimizer and sparse categorical crossentropy loss, and fit the model to the training and validation datasets for 20 epochs with the defined callbacks. The history object contains the training and validation metrics for each epoch.