In [5]:
# Adding required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import random
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [19]:
file_path = "processed_recipes.txt"      # file path to take file as an input

text = open(file_path, "r").read()       # loading the txt file and storing it as text

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")      # Using a pretrained tokenizer from GPT-2 Model
tokenizer.pad_token = tokenizer.eos_token
encoded_text = tokenizer.encode(text, max_length=1024, truncation=True, padding="max_length")     # Encoding text using tokenizer and setting max_length as 1024
encoded_tensor = torch.tensor(encoded_text).unsqueeze(0)           # Converting encoded text to a tensor using torch.tensor and unsqueezing for increasing dimensions

dataset = torch.utils.data.TensorDataset(encoded_tensor)           # Creating dataset using encoded tensor using the torch,utils library
dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)    # Keeping the batch size as 8 for faster training

model = GPT2LMHeadModel.from_pretrained("gpt2")             # Creating a Pretrained GPT-2 Model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")     # Using CCR so to.device() is necessary
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)                # Using Adam optimizer and settign Learning Rate as 1e-5
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)  # Using CrossEntropy Loss as Loss function

# Trainign the model for 10 epochs
for epoch in range(10):
    for i, batch in enumerate(dataloader):                    # Putting the dataloader in batches of 8
        batch_input = batch[0].to(device)                     # Moving data to device (CPU or GPU)
        model.train()                                         # Applying training function to the model
         
        optimizer.zero_grad()                                 # Setting zero gradient function to the optimizer
        
        outputs = model(batch_input)                          # Getting the output for training parameters
        predictions = outputs.logits[:, :-1]                  # Setting dimensions by removing last element
        batch_input = batch_input[:, :-1]                     # Setting batch size decreasing by 1, as to adjust for loss function
        
        loss = loss_fn(predictions.view(-1, predictions.size(-1)), batch_input.view(-1))  # Flattening and calculating loss
        loss.backward()                                       # Backpropagating loss
        optimizer.step()   
    print(f"Epoch: {epoch+1}, Batch: {i+1}, Loss: {loss.item()}")       # Printing epoch no, batch and loss 

model.save_pretrained("trained_recipe_model_pytorch")         # Saving pretrained model

print("Training complete! Model saved as 'trained_recipe_model_pytorch'")    


Epoch: 1, Batch: 1, Loss: 9.368671417236328
Epoch: 2, Batch: 1, Loss: 8.87807846069336
Epoch: 3, Batch: 1, Loss: 8.512247085571289
Epoch: 4, Batch: 1, Loss: 8.192841529846191
Epoch: 5, Batch: 1, Loss: 7.700817108154297
Epoch: 6, Batch: 1, Loss: 7.334148406982422
Epoch: 7, Batch: 1, Loss: 6.919040203094482
Epoch: 8, Batch: 1, Loss: 6.6689324378967285
Epoch: 9, Batch: 1, Loss: 6.401946544647217
Epoch: 10, Batch: 1, Loss: 6.205134868621826
Training complete! Model saved as 'trained_recipe_model_pytorch'
