In [2]:
!pip install gradio



In [51]:
import gradio as gr
import tensorflow as tf
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel, TextDataset, create_optimizer

# Load model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = TFGPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)

# Load and preprocess dataset
def load_dataset(file_path):
    return TextDataset(tokenizer=tokenizer, file_path=file_path, block_size=128)

dataset = load_dataset("custom_dataset.txt")

# Convert to tf.data.Dataset
def convert_to_tf_dataset(dataset, tokenizer, batch_size):
    def gen():
        for item in dataset:
            inputs = tokenizer(item, return_tensors='tf', padding=True, truncation=True).input_ids
            yield {"input_ids": inputs, "labels": inputs}
    return tf.data.Dataset.from_generator(gen, output_signature={
        "input_ids": tf.TensorSpec(shape=(None, None), dtype=tf.int32),
        "labels": tf.TensorSpec(shape=(None, None), dtype=tf.int32),
    }).batch(batch_size)

batch_size = 4
tf_dataset = convert_to_tf_dataset(dataset, tokenizer, batch_size=batch_size)

# Determine the number of steps per epoch
steps_per_epoch = tf.data.experimental.cardinality(tf_dataset).numpy()

# Training parameters
epochs = 3

# Define optimizer
optimizer, _ = create_optimizer(init_lr=5e-5, num_train_steps=steps_per_epoch * epochs, num_warmup_steps=0)

# Custom training loop
for epoch in range(epochs):
    epoch_loss = 0.0  # Initialize as a float in tf_dataset:
        with tf.GradientTape() as tape:
            outputs = model(batch["input_ids"], labels=batch["labels"], return_dict=True)
            loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(batch["labels"], outputs.logits, from_logits=True))
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        epoch_loss += loss.numpy()  # Convert tensor to float

    # Print average loss for the epoch
    print(f"Epoch {epoch + 1}/{epochs} - Loss: {epoch_loss}")

# Save the model
model.save_pretrained("path_to_save_finetuned_model")
tokenizer.save_pretrained("path_to_save_finetuned_model")

# Define Gradio function
def generate_text(prompt):
    input_ids = tokenizer.encode(prompt, return_tensors='tf')
    beam_output = model.generate(input_ids, max_length=100, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
    output = tokenizer.decode(beam_output[0], skip_special_tokens=True)
    return output

# Create and launch Gradio interface
iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", title="GPT-2 Text Generation", description="Type a prompt and get text generated by GPT-2.")
iface.launch()


All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


Epoch 1/3 - Loss: 0.0
Epoch 2/3 - Loss: 0.0
Epoch 3/3 - Loss: 0.0
Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


