## Importing Modules

In [39]:
import os
import pathlib
import numpy as np
import pandas as pd
import nltk

import torch
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader, random_split, RandomSampler, SequentialSampler

from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
from transformers import get_linear_schedule_with_warmup

from tqdm.auto import tqdm
import random
import datetime
import time
import statistics
from nltk.translate.bleu_score import sentence_bleu

if torch.cuda.is_available():
    print("GPU is available!")
else:
    print("GPU is not available.")

GPU is available!


In [2]:
MAIN_PATH = str(pathlib.Path().resolve())
DATASET_PATH = MAIN_PATH + '\\datasets'
MODEL_PATH = MAIN_PATH + '\\models'

In [3]:
models = os.listdir(MODEL_PATH)
models

['bert-base-cased',
 'bert-base-multilingual-cased',
 'bert-base-uncased',
 'bert-large-cased',
 'bert-large-uncased',
 'flan-t5-base',
 'flan-t5-large',
 'flan-t5-small',
 'gpt2',
 'gpt2-large',
 'gpt2-medium']

In [4]:
model_path = MODEL_PATH + '\\' + models[8]
model_path

'D:\\Python\\LLM_Environment\\models\\gpt2'

In [5]:
configuration = GPT2Config.from_pretrained(model_path)
model = GPT2LMHeadModel.from_pretrained(model_path, config=configuration)

## Import Dataset

In [13]:
filenames = os.listdir(DATASET_PATH)
filenames

['Recipes.csv', 'Recipes_1000.csv', 'train.csv']

In [14]:
file_path = DATASET_PATH + '\\' + filenames[1]
file_path

'D:\\Python\\LLM_Environment\\datasets\\Recipes_1000.csv'

In [18]:
df = pd.read_csv(file_path)
df.reset_index(drop=True, inplace=True)
df.shape

(1000, 4)

In [19]:
df.head()

Unnamed: 0,RecipeId,name,ingredients,instructions
0,38,Low-Fat Berry Blue Frozen Dessert,"blueberries, granulated sugar, vanilla yogurt,...",Toss 2 cups berries with sugar. Let stand for ...
1,39,Biryani,"saffron, milk, hot green chili peppers, onions...",Soak saffron in warm milk for 5 minutes and pu...
2,40,Best Lemonade,"sugar, lemons, rind of, lemon, zest of, fresh ...","Into a 1 quart Jar with tight fitting lid, put..."
3,41,Carina's Tofu-Vegetable Kebabs,"extra firm tofu, eggplant, zucchini, mushrooms...","Drain the tofu, carefully squeezing out excess..."
4,42,Cabbage Soup,"plain tomato juice, cabbage, onion, carrots, c...",Mix everything together and bring to a boil. R...


In [9]:
tokenizer = GPT2TokenizerFast.from_pretrained(model_path)

In [20]:
device = "cuda" if torch.cuda.is_available() else "cpu"

input_sequence = "beef, salt, pepper"
input_ids = tokenizer.encode(input_sequence, return_tensors='pt')

model = model.to(device)
#combine both sampling techniques
sample_outputs = model.generate(input_ids.to(device),
                              do_sample = True, max_length = 120,
                              top_k = 50, top_p = 0.85,
                              num_return_sequences = 3)

print("Output:\n" + 100 * '-')
for i, sample_output in enumerate(sample_outputs):
    print("{}: {}...".format(i, tokenizer.decode(sample_output, skip_special_tokens = True)))
    print('  ---')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
0: beef, salt, pepper and white vinegar. Mix well. When you get the meat tender, it's time to add it to your salad. I used to do this on all my salads. I was always pretty sure I had it when I saw them, but that's not always the case. But I found myself eating it a lot and I really loved the crunch of the cheese and the saltiness. I love the saltiness. I love the garlic. You can definitely use this recipe to make salad for many meals in a row and it can be quite an appetizer for anyone....
  ---
1: beef, salt, pepper, garlic, ginger, garlic, parsley, parsley juice, thyme, thyme seeds, oregano, parsley powder, basil, oregano, oregano leaves, parsley, parsley pulp, pepper, salt, pepper flakes, pepper, pepper paste, pepper paste vinegar, salt, pepper, vinegar, garlic, garlic leaves, vinegar, garlic powder, garlic acid, garlic acid, garlic acid, garlic acid, garlic acid, garlic acid

In [24]:
def form_string(ingredient,instruction):
    # s = f"<|startoftext|>Ingredients:\n{ingredient.strip()}\n\nInstructions:\n{instruction.strip()}<|endoftext|>"
    s = f"<|startoftext|>Ingredients: {ingredient.strip()}. " \
        f"Instructions: {instruction.strip()}<|endoftext|>"
    return s

def extract_string(recipe):
    str = recipe.replace('<|startoftext|>', '').replace('<|endoftext|>', '')
    inst_pos = str.find('Instructions: ')
    ingredients = str[len('Ingredients: '): inst_pos-1]
    instructions = str[inst_pos+len('Instructions: '):]
    return ingredients, instructions

data = df.apply(lambda x:form_string(
    x['ingredients'], x['instructions']), axis=1).to_list()
data[0]

"<|startoftext|>Ingredients: blueberries, granulated sugar, vanilla yogurt, lemon juice. Instructions: Toss 2 cups berries with sugar. Let stand for 45 minutes, stirring occasionally. Transfer berry-sugar mixture to food processor. Add yogurt and process until smooth. Strain through fine sieve. Pour into baking pan (or transfer to ice cream maker and process according to manufacturers' directions). Freeze uncovered until edges are solid but centre is soft.  Transfer to processor and blend until smooth again. Return to pan and freeze until edges are solid. Transfer to processor and blend until smooth again. Fold in remaining 2 cups of blueberries. Pour into plastic mold and freeze overnight. Let soften slightly to serve.<|endoftext|>"

In [25]:
tokenizer = GPT2TokenizerFast.from_pretrained(model_path,
                                              bos_token='<|startoftext|>',
                                              eos_token='<|endoftext|>',
                                              unk_token='<|unknown|>',
                                              pad_token='<|pad|>'
                                             )

In [26]:
vocab_list = sorted(tokenizer.vocab.items(), key=lambda x:x[1])
for i in range(5555, 5566):
    print(vocab_list[i])

('ĠPhoto', 5555)
('Ġplus', 5556)
('rick', 5557)
('arks', 5558)
('Ġalternative', 5559)
('Ġpil', 5560)
('Ġapprox', 5561)
('that', 5562)
('Ġobjects', 5563)
('ĠRo', 5564)
('ĠAndroid', 5565)


In [27]:
print("The max model length is {} for this model".format(tokenizer.model_max_length))
print("The end of sequence token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.eos_token_id), tokenizer.eos_token_id))
print("The beginning of sequence token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.bos_token_id), tokenizer.bos_token_id))
print("The unknown token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.unk_token_id), tokenizer.unk_token_id))
print("The padding token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.pad_token_id), tokenizer.pad_token_id))

The max model length is 1024 for this model
The end of sequence token <|endoftext|> has the id 50256
The beginning of sequence token <|startoftext|> has the id 50257
The unknown token <|unknown|> has the id 50258
The padding token <|pad|> has the id 50259


In [28]:
batch_size = 2
max_length = 180  # maximum sentence length

# standard PyTorch approach of loading data in using a Dataset class.
class RecipeDataset(Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.input_ids = []
        self.attn_masks = []
        self.origin_ingredients = []
        self.origin_instructions = []

        for recipe in data:
            encodings = tokenizer.encode_plus(recipe,
                                              truncation=True,
                                              padding='max_length',
                                              max_length=max_length,
                                              return_tensors='pt'       # return PyTorch tensor
                                             )
            self.input_ids.append(torch.squeeze(encodings['input_ids'],0))
            # attention_mask tells model not to incorporate these PAD tokens into its interpretation of the sentence
            self.attn_masks.append(torch.squeeze(encodings['attention_mask'],0))
            ingredients, instructions = extract_string(recipe)
            self.origin_ingredients.append(ingredients)
            self.origin_instructions.append(instructions)


    def __len__(self):
        return len(self.data)

    def __getitem__(self,idx):
        return self.input_ids[idx], self.attn_masks[idx], self.origin_ingredients[idx], self.origin_instructions[idx]

In [29]:
dataset = RecipeDataset(data, tokenizer)

# Split into training and validation sets
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

  900 training samples
  100 validation samples


In [30]:
print(f"dataset size {dataset.__len__()}")
print(f"dataset[0]: \n  input_ids: {dataset[0][0]}\n  attn_masks: {dataset[0][1]}")

dataset size 1000
dataset[0]: 
  input_ids: tensor([50257, 41222,    25,  4171, 20853,    11, 19468,  4817,  7543,    11,
        16858, 32132,    11, 18873, 13135,    13, 27759,    25,   309,   793,
          362, 14180, 36322,   351,  7543,    13,  3914,  1302,   329,  4153,
         2431,    11, 26547, 10491,    13, 20558,   275,  6996,    12,    82,
        35652, 11710,   284,  2057, 12649,    13,  3060, 32132,   290,  1429,
         1566,  7209,    13,   520,  3201,   832,  3734,   264, 12311,    13,
        39128,   656, 16871,  3425,   357,   273,  4351,   284,  4771,  8566,
        16009,   290,  1429,  1864,   284, 11372,     6, 11678,   737, 34917,
        18838,  1566, 13015,   389,  4735,   475,  7372,   318,  2705,    13,
          220, 20558,   284, 12649,   290, 13516,  1566,  7209,   757,    13,
         8229,   284,  3425,   290, 16611,  1566, 13015,   389,  4735,    13,
        20558,   284, 12649,   290, 13516,  1566,  7209,   757,    13, 39957,
          287,  5637

In [31]:
# Create the DataLoaders for our training and validation datasets.
# We'll take training samples in random order.
train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )

# For validation the order doesn't matter, so we'll just read them sequentially.
validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

In [34]:
print(f"Weight shape {model.transformer.wte.weight.shape}")
# this step is necessary because I've added some tokens (bos_token, etc.) to the embeddings
# otherwise the tokenizer and model tensors won't match up
model.resize_token_embeddings(len(tokenizer))
print(f"Number of tokens: {len(tokenizer)}")

# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

Weight shape torch.Size([50257, 768])
Number of tokens: 50260


In [35]:
word_embeddings = model.transformer.wte.weight # Word Token Embeddings

print(word_embeddings.shape)

torch.Size([50260, 768])


In [36]:
epochs = 3
learning_rate = 2e-5
warmup_steps = 1e2
# The epsilon parameter eps = 1e-8 is “a very small number to prevent any division by zero in the implementation”
epsilon = 1e-8
# optim = Adam(model.parameters(), lr=5e-5)
optim = AdamW(model.parameters(), lr = learning_rate, eps = epsilon)

def format_time(elapsed):
    return str(datetime.timedelta(seconds=int(round((elapsed)))))

In [37]:
# Total number of training steps is [number of batches] x [number of epochs].
# (Note that this is not the same as the number of training samples).
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
# This changes the learning rate as the training loop progresses
scheduler = get_linear_schedule_with_warmup(optim,
                                            num_warmup_steps = warmup_steps,
                                            num_training_steps = total_steps)

In [38]:
def infer(prompt):
    input = f"<|startoftext|>Ingredients: {prompt.strip()}"
    input = tokenizer(input, return_tensors="pt")
    input_ids      = input["input_ids"]
    attention_mask = input["attention_mask"]

    output = model.generate(input_ids.to(device),
                            attention_mask=attention_mask.to(device),
                            max_new_tokens=max_length,
                            # temperature = 0.5,
                            do_sample = True, top_k = 50, top_p = 0.85)
                            # num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    return output

In [40]:
total_t0 = time.time()

training_stats = []

for epoch_i in range(0, epochs):
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    t0 = time.time()

    total_train_loss = 0

    model.train()  # `train` just changes the *mode* (train vs. eval), it doesn't *perform* the training.

    for step, batch in enumerate(train_dataloader):     # step from enumerate() = number of batches

        b_input_ids = batch[0].to(device)   # tokens (of multiple documents in a batch)
        b_labels    = batch[0].to(device)
        b_masks     = batch[1].to(device)   # mask of [1] for a real word, [0] for a pad

        model.zero_grad()
        # loss = model(X.to(device), attention_mask=a.to(device), labels=X.to(device)).loss
        outputs = model(  input_ids = b_input_ids,
                          labels = b_labels,
                          attention_mask = b_masks,
                          token_type_ids = None
                        )

        loss = outputs[0]

        batch_loss = loss.item()
        total_train_loss += batch_loss

        # Get sample every x batches.
        if step % 100 == 0 and not step == 0:

            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}. Loss: {:>5,}.   Elapsed: {:}.'.format(step, len(train_dataloader), batch_loss, elapsed))

            model.eval()

            sample_output = infer("eggs, flour, butter, sugar")
            print(sample_output)

            # `train` just changes the *mode* (train vs. eval), it doesn't *perform* the training.
            model.train()

        loss.backward()
        optim.step()
        scheduler.step()

    # Calculate the average loss over all the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)

    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epoch took: {:}".format(training_time))


    print("")
    print("Running Validation...")

    t0 = time.time()

    model.eval()

    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:

        b_input_ids = batch[0].to(device)
        b_labels = batch[0].to(device)
        b_masks = batch[1].to(device)

        with torch.no_grad():

            outputs  = model(input_ids = b_input_ids,
                             attention_mask = b_masks,
                             labels = b_labels)

            loss = outputs[0]

        batch_loss = loss.item()
        total_eval_loss += batch_loss

    avg_val_loss = total_eval_loss / len(validation_dataloader)

    validation_time = format_time(time.time() - t0)

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")
print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


Training...


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   100  of    450. Loss: 3.9301090240478516.   Elapsed: 0:00:10.
Ingredients: eggs, flour, butter, sugar, and vanilla, creamed egg, vanilla extract, and vanilla extractFor the oven to set the mixture and the butter, mix the butter, eggs, butter, sugar, and vanilla, and- blend the eggs in a mixer fitted with a stand mixer and Beat the butter, egg, salt, and vanilla until smooth, andBake the softened butter in a small bowl, whisking, sugar, and eggs, stirring frequently, and then the eggs. in a small bowl, heat butter, stirring, until the eggs are melted and the mixture is melted. Stir in the dry ingredients. Pour the dry mixture in a bowl.Cook mixture and butter.The waffle maker works best.Sprinkle sugar and vanilla.Form the butter on a greased pan and butter.Bake for 10 minutes, stirring, and then a stand mixer fitted with mixer, scraping and whisking together. (


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   200  of    450. Loss: 2.8541507720947266.   Elapsed: 0:00:23.
Ingredients: eggs, flour, butter, sugar, milk, milk and water, vanilla, salt, vanilla extract, lemon juice, lemon juice, salt and pepper, vanilla extract, pepper, salt, pepper, garlic, ginger, basil, garlic powder, ginger extract, pepper, salt, pepper, garlic powder, and salt.Cook, stirring frequently, cream a 1/2 cup vegetable oil and butter until lightly browned and browned, stirring in eggs, sugar, vanilla extract and salt. In a bowl, cream the eggs, sugar and butter until creamy; add salt, salt and pepper and mix in the butter. Add in the salt. Cover and chill until it has chilled, stirring. Drain, remove from heat and drain.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   300  of    450. Loss: 2.918560266494751.   Elapsed: 0:00:35.
Ingredients: eggs, flour, butter, sugar, milk, corn, sugar, salt, pepper, pepper pepper, garlic, and onion, Instructions: Preheat oven to 350°F. Melt butter in a pan and cook. In the butter mixture, stir together flour and butter. In the milk, stir together. Stir in onions. Cut into 4-inch thick slices and cut into thin slices. Bake in pan until golden brown. Remove from heat and cool for 5 minutes. In a large skillet over medium heat, combine eggs, flour, butter, sugar, garlic, onions, salt, pepper, garlic, and onion. Cut into small strips, and arrange in the center of pan. Bake in 350°F oven for 15 minutes or until golden brown. Remove from heat and cool for 10 minutes or until firm. Serve with the remaining bacon and onion. Instructions: Prepare oven to 350°F. Remove from heat and brown the


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   400  of    450. Loss: 2.6272802352905273.   Elapsed: 0:00:47.
Ingredients: eggs, flour, butter, sugar, garlic, and salt Instructions: Preheat oven to 350°F. Mix together eggs, flour, sugar, garlic, salt, and lemon juice in a large skillet. Add remaining ingredients. Cook until golden brown on all sides and golden brown on top. Drain eggs on a cookie sheet and cool completely before placing on a cooling rack. Let stand for 15 minutes. When ready to serve, warm butter over medium heat. Instructions: Place the butter on a cookie sheet, place in the oven until thickened and set aside. Drain the cookies on a cookie sheet.

  Average training loss: 6.36
  Training epoch took: 0:00:54

Running Validation...
  Validation Loss: 2.16
  Validation took: 0:00:01

Training...


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   100  of    450. Loss: 1.7224706411361694.   Elapsed: 0:00:10.
Ingredients: eggs, flour, butter, sugar, milk, eggs. Instructions: Put egg yolks into butter. Cover and cook on low heat for 1 hour. Remove from heat. Cut into 3 or 4 equal pieces. Refrigerate at least 5 minutes before slicing. Instructions: Preheat oven to 425 degrees Fahrenheit. Line a baking sheet with foil. Brush cake with butter and lightly sprinkle sugar. Preheat oven to 425 degrees Fahrenheit. Place butter over waxed paper. Roll out on top. Bake at 425 degrees Fahrenheit for 20 to 25 minutes. Cool completely, or until toothpick inserted into the center comes out clean. Pour the remaining chocolate mixture over waxed paper and bake until cool to room temperature. Serve hot on top of prepared cake.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   200  of    450. Loss: 1.4465839862823486.   Elapsed: 0:00:22.
Ingredients: eggs, flour, butter, sugar, cornstarch, water, vanilla, vanilla extract, baking soda, vanilla extract, cinnamon, cinnamon sugar, baking powder, and sugar. Instructions: Beat eggs in large bowl and beat flour mixture in dry ingredients. Pour into prepared pan and bake for 20 to 25 minutes or until firm. Remove from heat and stir together flour and sugar. Whisk in milk, butter, vanilla, cinnamon sugar, cinnamon mixture. In a large bowl, beat in remaining butter. Beat until combined. Pour into prepared pan. Bake for 20 to 25 minutes or until lightly browned and browned. Transfer to rack for about 20 minutes or until cool.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   300  of    450. Loss: 2.1579365730285645.   Elapsed: 0:00:34.
Ingredients: eggs, flour, butter, sugar, baking powder. Instructions: In a large bowl, mix together egg, flour, sugar and baking powder. Add the milk and beat well. Stir in the butter. Stir in the flour and beat well. Add the eggs and beat well. Fold in the dry ingredients. Beat until thickened. Add the vanilla bean mix and beat well. Cover and refrigerate for at least 2 hours, or until firm.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   400  of    450. Loss: 1.675294041633606.   Elapsed: 0:00:45.
Ingredients: eggs, flour, butter, sugar, vanilla extract, sugar. Instructions: Heat butter and butter together in a nonstick skillet. Add flour mixture and sugar and cook over medium heat, stirring frequently. Add eggs and beat well. Stir in vanilla and sugar mixture and continue to cook over medium-high heat. Stir in vanilla and stir in flour mixture until melted. Pour over pan. Bake at 400 degrees Fahrenheit for 35-40 minutes. Cool on rack until ready to use.

  Average training loss: 2.17
  Training epoch took: 0:00:51

Running Validation...
  Validation Loss: 2.04
  Validation took: 0:00:01

Training...


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   100  of    450. Loss: 1.4124552011489868.   Elapsed: 0:00:10.
Ingredients: eggs, flour, butter, sugar, salt, butter, salt. Instructions: Mix eggs, flour, butter, salt, butter. Mix until well blended. Pour batter into muffin cups, making sure they are well coated. Bake at 350 F until the eggs are firm. Chill muffins for 30 minutes, or until the muffins are lightly browned and golden.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   200  of    450. Loss: 1.1604747772216797.   Elapsed: 0:00:21.
Ingredients: eggs, flour, butter, sugar, egg, flour, eggs, sugar. Instructions: Melt butter, flour, sugar and eggs in microwave with a high speed blender until blended. Stir in flour and sugar. Stir in egg and butter. Gradually add flour and sugar to mixture. Beat well, stirring constantly, until mixture forms a smooth ball. Pour into a greased 7 x 8x 8-inch pan. Roll out edges on top, leaving a zipped zucchini piece. Bake at 350°F for 15 to 20 minutes, or until golden brown. Cool completely before serving.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   300  of    450. Loss: 1.063637375831604.   Elapsed: 0:00:32.
Ingredients: eggs, flour, butter, sugar, butter and milk. Instructions: Beat eggs, butter, sugar, butter and milk until stiff peaks form. Add flour, then mix until very smooth. Add eggs and mix well. Place in shallow baking dish. Bake in 350°F oven for 10 to 15 minutes or until soft. Remove from heat and let cool completely. In a large bowl, mix eggs, flour, butter and milk until stiff peaks form. Place in shallow baking dish and bake until soft. Remove from heat.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   400  of    450. Loss: 0.8958448767662048.   Elapsed: 0:00:44.
Ingredients: eggs, flour, butter, sugar, baking powder, salt, egg white, flour, flour blend, sugar, brown sugar. Instructions: Combine egg whites, flour, baking powder, salt, eggs, sugar, baking powder. Mix well. Beat with a whisk until light and fluffy. Spread with a greased 9x13-inch round cookie sheet. Cover tightly and refrigerate overnight.

  Average training loss: 2.09
  Training epoch took: 0:00:50

Running Validation...
  Validation Loss: 2.03
  Validation took: 0:00:01

Training complete!
Total training took 0:02:38 (h:mm:ss)


In [41]:
# Create a DataFrame from our training statistics.
df_stats = pd.DataFrame(data=training_stats)

# Use the 'epoch' as the row index.
df_stats = df_stats.set_index('epoch')
df_stats

Unnamed: 0_level_0,Training Loss,Valid. Loss,Training Time,Validation Time
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,6.357306,2.161529,0:00:54,0:00:01
2,2.173984,2.040881,0:00:51,0:00:01
3,2.093194,2.030801,0:00:50,0:00:01


In [42]:
model_save_path = './model'

print("Saving model to %s" % model_save_path)

# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

Saving model to ./model


('./model\\tokenizer_config.json',
 './model\\special_tokens_map.json',
 './model\\vocab.json',
 './model\\merges.txt',
 './model\\added_tokens.json',
 './model\\tokenizer.json')

In [43]:
model = GPT2LMHeadModel.from_pretrained(model_save_path)
tokenizer = GPT2TokenizerFast.from_pretrained(model_save_path)
model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50260, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50260, bias=False)
)

In [44]:
# model = GPT2LMHeadModel.from_pretrained(model_save_path)
# tokenizer = GPT2TokenizerFast.from_pretrained(model_save_path)
# model.to(device)
print(infer("eggs, mushroom, butter, sugar"))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Ingredients: eggs, mushroom, butter, sugar, salt, pepper, sugar, black pepper, eggs, sugar. Instructions: Combine eggs and sugar, salt, pepper, sugar, black pepper, eggs. Pour over eggs and sugar mixture. Gradually add eggs to mixture. Bake at 350 degrees for 20 to 25 minutes. Allow to cool before slicing into squares.


In [45]:
infer("onion, garlic, chicken breast")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'Ingredients: onion, garlic, chicken breast, salt, pepper, pepper, fresh thyme, onion, parsley, oregano, fresh parsley. Instructions: Cut onions into 2 halves and mix in garlic cloves and salt. Cover with plastic wrap and cook in a low heat until translucent, about 20 minutes. Drain, and season with salt and pepper. Garnish with parsley. Makes 2 large glasses.'

In [46]:
print(infer("avocado, lime"))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Ingredients: avocado, lime, salt, fresh cilantro, pepper, coriander, garlic powder, sugar, and salt. Instructions: Combine avocado and lime, salt, fresh cilantro, garlic powder, sugar, salt. Mix well. Add cilantro to lime mixture. Cover and refrigerate overnight.


In [47]:
print(infer("beef, salt, pepper"))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Ingredients: beef, salt, pepper, garlic powder, ginger, dried oregano, thyme, salt. Instructions: Put beef, garlic, oregano, thyme, salt, pepper and oregano in a blender or food processor. Blend until smooth. Add water, stirring often. Add onion, oregano, basil and thyme. Blend again. Blend for another minute, until the mixture is slightly thickened. Add broth and blend for another minute. Add oregano and stir until smooth. Combine mixture thoroughly and refrigerate. Cover and chill at room temperature until ready to serve. Instructions: Melt beef, butter, garlic, oregano, thyme and parsley in a large saucepan over medium heat. Add onion, salt and pepper and stir until smooth. Add broth and simmer for another minute, stirring occasionally. Add broth, stirring constantly, until mixture is tender. Bring to a


In [48]:
# Using BLEU score to compare the real sentences with the generated ones

scores=[]

for i in range(10):
    ingredients = val_dataset[i][2]
    reference = val_dataset[i][3]
    candidate = infer(ingredients)
    scores.append(sentence_bleu(reference, candidate))

print(statistics.mean(scores))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:

9.08185564546079e-232
