In [None]:
!pip install --upgrade transformers

In [None]:
import pandas as pd
import tensorflow as tf
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [None]:
# Load the CSV file
data = pd.read_csv('./bhagwat_gita.csv')

In [None]:
# Import the GPT-2 tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [None]:
# Get the English translations of the verses
english_translations = data['Enlgish Translation'].tolist()

In [None]:
# Tokenize the English translations
tokenized_translations = [tokenizer.encode(translation, add_special_tokens=True) for translation in english_translations]


In [None]:
# Pad the tokenized translations
max_length = max(len(translation) for translation in tokenized_translations)
padded_translations = pad_sequences(tokenized_translations, maxlen=max_length)

In [None]:
# Convert the padded translations to tensors
input_ids = tf.constant(padded_translations)

In [None]:
# Create a dataset from the input_ids
dataset = tf.data.Dataset.from_tensor_slices(input_ids)


In [None]:
# Define the training parameters
batch_size = 4
num_epochs = 5

In [None]:
# Prepare the dataset for training
dataset = dataset.shuffle(len(input_ids)).batch(batch_size)


In [None]:
# Create the model
model = TFGPT2LMHeadModel.from_pretrained('gpt2')

Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [None]:
# Train the model
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
for epoch in range(num_epochs):
    total_loss = 0
    num_batches = 0
    for batch in dataset:
        with tf.GradientTape() as tape:
            logits = model(batch, training=True)[0]
            loss = loss_fn(batch[:, 1:], logits[:, :-1])
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        total_loss += loss
        num_batches += 1
    average_loss = total_loss / num_batches
    print(f"Epoch {epoch+1}: Loss = {average_loss}")

Epoch 1: Loss = 0.6881703734397888
Epoch 2: Loss = 0.6398693919181824
Epoch 3: Loss = 0.6106193661689758
Epoch 4: Loss = 0.5871436595916748
Epoch 5: Loss = 0.5682884454727173


In [None]:
# Save the model
model.save_pretrained('./')

# Load the model
model = TFGPT2LMHeadModel.from_pretrained('./')

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at ./.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [None]:
# Load the model
model = TFGPT2LMHeadModel.from_pretrained('./')

# Generate text
prompt = "In the Bhagavad Gita, Krishna tells Arjuna that..."

# Generate the creative text
generated_text = model.generate(
    input_ids=tokenizer.encode(prompt, add_special_tokens=True, return_tensors='tf'),
    max_length=1000,
    num_beams=5,
    temperature=0.7,
    no_repeat_ngram_size=2,
)

# Print the creative text
print(tokenizer.decode(generated_text[0], skip_special_tokens=True))

In [None]:
# Load the model
model = TFGPT2LMHeadModel.from_pretrained('./', from_pt=True)

# Generate text
prompt = "How can I tackle the difficult situation"

# Generate the creative text
generated_text = model.generate(
    input_ids=tokenizer.encode(prompt, add_special_tokens=True, return_tensors='tf'),
    max_length=1000,
    num_beams=5,
    temperature=0.7,
    no_repeat_ngram_size=2,
)

# Print the creative text
print(tokenizer.decode(generated_text[0], skip_special_tokens=True))