## Importing Modules

In [1]:
import os
import pathlib
import numpy as np
import pandas as pd
import nltk

import torch
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader, random_split, RandomSampler, SequentialSampler

from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
from transformers import get_linear_schedule_with_warmup

from tqdm.auto import tqdm
import random
import datetime
import time
import statistics
from nltk.translate.bleu_score import sentence_bleu

if torch.cuda.is_available():
    print("GPU is available!")
else:
    print("GPU is not available.")

  from .autonotebook import tqdm as notebook_tqdm


GPU is available!


In [2]:
MAIN_PATH = str(pathlib.Path().resolve())
DATASET_PATH = MAIN_PATH + '\\datasets'
MODEL_PATH = MAIN_PATH + '\\models'

In [3]:
models = os.listdir(MODEL_PATH)
models

['bert-base-cased',
 'bert-base-multilingual-cased',
 'bert-base-uncased',
 'bert-large-cased',
 'bert-large-uncased',
 'flan-t5-base',
 'flan-t5-large',
 'flan-t5-small',
 'gpt2',
 'gpt2-large',
 'gpt2-medium']

In [4]:
model_path = MODEL_PATH + '\\' + models[10]
model_path

'D:\\Python\\LLM_Environment\\models\\gpt2-medium'

In [5]:
configuration = GPT2Config.from_pretrained(model_path)
model = GPT2LMHeadModel.from_pretrained(model_path, config=configuration)

## Import Dataset

In [6]:
filenames = os.listdir(DATASET_PATH)
filenames

['Recipes.csv', 'Recipes_1000.csv', 'train.csv']

In [7]:
file_path = DATASET_PATH + '\\' + filenames[1]
file_path

'D:\\Python\\LLM_Environment\\datasets\\Recipes_1000.csv'

In [8]:
df = pd.read_csv(file_path)
df.reset_index(drop=True, inplace=True)
df.shape

(1000, 4)

In [9]:
df.head()

Unnamed: 0,RecipeId,name,ingredients,instructions
0,38,Low-Fat Berry Blue Frozen Dessert,"blueberries, granulated sugar, vanilla yogurt,...",Toss 2 cups berries with sugar. Let stand for ...
1,39,Biryani,"saffron, milk, hot green chili peppers, onions...",Soak saffron in warm milk for 5 minutes and pu...
2,40,Best Lemonade,"sugar, lemons, rind of, lemon, zest of, fresh ...","Into a 1 quart Jar with tight fitting lid, put..."
3,41,Carina's Tofu-Vegetable Kebabs,"extra firm tofu, eggplant, zucchini, mushrooms...","Drain the tofu, carefully squeezing out excess..."
4,42,Cabbage Soup,"plain tomato juice, cabbage, onion, carrots, c...",Mix everything together and bring to a boil. R...


In [10]:
tokenizer = GPT2TokenizerFast.from_pretrained(model_path)

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"

input_sequence = "beef, salt, pepper"
input_ids = tokenizer.encode(input_sequence, return_tensors='pt')

model = model.to(device)
#combine both sampling techniques
sample_outputs = model.generate(input_ids.to(device),
                              do_sample = True, max_length = 120,
                              top_k = 50, top_p = 0.85,
                              num_return_sequences = 3)

print("Output:\n" + 100 * '-')
for i, sample_output in enumerate(sample_outputs):
    print("{}: {}...".format(i, tokenizer.decode(sample_output, skip_special_tokens = True)))
    print('  ---')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Output:
----------------------------------------------------------------------------------------------------
0: beef, salt, pepper, onion and a little salt and pepper. Pour 1/2 cup of the sauce over the chicken and then roll up the chicken to create a patty, like so: 1/2 – 1/2 cup of the sauce, 1 teaspoon salt, 1 teaspoon pepper, salt and 1 teaspoon minced garlic. Cut off 1/4 inch of the chicken (or shred it) and then cut up and wrap the chicken with a wrap to create a sandwich: 1/4 – 1/2 cup of sauce, 1 teaspoon salt, 1 teaspoon pepper, salt and 1...
  ---
1: beef, salt, pepper, onion powder, garlic powder, and salt. Add 1 Tbsp. of the chicken broth. Bring to a boil and add 1/4 tsp. more of the garlic powder. Lower the heat to a simmer and cook for 25-30 minutes or until the meat is cooked through. Once cooked, remove from the heat. While you're waiting for the chicken to cook, remove your chicken from the refrigerator and chop up the meat into small chunks. Then place the chunks into

In [12]:
def form_string(ingredient,instruction):
    # s = f"<|startoftext|>Ingredients:\n{ingredient.strip()}\n\nInstructions:\n{instruction.strip()}<|endoftext|>"
    s = f"<|startoftext|>Ingredients: {ingredient.strip()}. " \
        f"Instructions: {instruction.strip()}<|endoftext|>"
    return s

def extract_string(recipe):
    str = recipe.replace('<|startoftext|>', '').replace('<|endoftext|>', '')
    inst_pos = str.find('Instructions: ')
    ingredients = str[len('Ingredients: '): inst_pos-1]
    instructions = str[inst_pos+len('Instructions: '):]
    return ingredients, instructions

data = df.apply(lambda x:form_string(
    x['ingredients'], x['instructions']), axis=1).to_list()
data[0]

"<|startoftext|>Ingredients: blueberries, granulated sugar, vanilla yogurt, lemon juice. Instructions: Toss 2 cups berries with sugar. Let stand for 45 minutes, stirring occasionally. Transfer berry-sugar mixture to food processor. Add yogurt and process until smooth. Strain through fine sieve. Pour into baking pan (or transfer to ice cream maker and process according to manufacturers' directions). Freeze uncovered until edges are solid but centre is soft.  Transfer to processor and blend until smooth again. Return to pan and freeze until edges are solid. Transfer to processor and blend until smooth again. Fold in remaining 2 cups of blueberries. Pour into plastic mold and freeze overnight. Let soften slightly to serve.<|endoftext|>"

In [13]:
tokenizer = GPT2TokenizerFast.from_pretrained(model_path,
                                              bos_token='<|startoftext|>',
                                              eos_token='<|endoftext|>',
                                              unk_token='<|unknown|>',
                                              pad_token='<|pad|>'
                                             )

In [14]:
vocab_list = sorted(tokenizer.vocab.items(), key=lambda x:x[1])
for i in range(5555, 5566):
    print(vocab_list[i])

('ĠPhoto', 5555)
('Ġplus', 5556)
('rick', 5557)
('arks', 5558)
('Ġalternative', 5559)
('Ġpil', 5560)
('Ġapprox', 5561)
('that', 5562)
('Ġobjects', 5563)
('ĠRo', 5564)
('ĠAndroid', 5565)


In [15]:
print("The max model length is {} for this model".format(tokenizer.model_max_length))
print("The end of sequence token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.eos_token_id), tokenizer.eos_token_id))
print("The beginning of sequence token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.bos_token_id), tokenizer.bos_token_id))
print("The unknown token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.unk_token_id), tokenizer.unk_token_id))
print("The padding token {} has the id {}".format(tokenizer.convert_ids_to_tokens(tokenizer.pad_token_id), tokenizer.pad_token_id))

The max model length is 1024 for this model
The end of sequence token <|endoftext|> has the id 50256
The beginning of sequence token <|startoftext|> has the id 50257
The unknown token <|unknown|> has the id 50258
The padding token <|pad|> has the id 50259


In [16]:
batch_size = 2
max_length = 180  # maximum sentence length

# standard PyTorch approach of loading data in using a Dataset class.
class RecipeDataset(Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.input_ids = []
        self.attn_masks = []
        self.origin_ingredients = []
        self.origin_instructions = []

        for recipe in data:
            encodings = tokenizer.encode_plus(recipe,
                                              truncation=True,
                                              padding='max_length',
                                              max_length=max_length,
                                              return_tensors='pt'       # return PyTorch tensor
                                             )
            self.input_ids.append(torch.squeeze(encodings['input_ids'],0))
            # attention_mask tells model not to incorporate these PAD tokens into its interpretation of the sentence
            self.attn_masks.append(torch.squeeze(encodings['attention_mask'],0))
            ingredients, instructions = extract_string(recipe)
            self.origin_ingredients.append(ingredients)
            self.origin_instructions.append(instructions)


    def __len__(self):
        return len(self.data)

    def __getitem__(self,idx):
        return self.input_ids[idx], self.attn_masks[idx], self.origin_ingredients[idx], self.origin_instructions[idx]

In [17]:
dataset = RecipeDataset(data, tokenizer)

# Split into training and validation sets
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

  900 training samples
  100 validation samples


In [18]:
print(f"dataset size {dataset.__len__()}")
print(f"dataset[0]: \n  input_ids: {dataset[0][0]}\n  attn_masks: {dataset[0][1]}")

dataset size 1000
dataset[0]: 
  input_ids: tensor([50257, 41222,    25,  4171, 20853,    11, 19468,  4817,  7543,    11,
        16858, 32132,    11, 18873, 13135,    13, 27759,    25,   309,   793,
          362, 14180, 36322,   351,  7543,    13,  3914,  1302,   329,  4153,
         2431,    11, 26547, 10491,    13, 20558,   275,  6996,    12,    82,
        35652, 11710,   284,  2057, 12649,    13,  3060, 32132,   290,  1429,
         1566,  7209,    13,   520,  3201,   832,  3734,   264, 12311,    13,
        39128,   656, 16871,  3425,   357,   273,  4351,   284,  4771,  8566,
        16009,   290,  1429,  1864,   284, 11372,     6, 11678,   737, 34917,
        18838,  1566, 13015,   389,  4735,   475,  7372,   318,  2705,    13,
          220, 20558,   284, 12649,   290, 13516,  1566,  7209,   757,    13,
         8229,   284,  3425,   290, 16611,  1566, 13015,   389,  4735,    13,
        20558,   284, 12649,   290, 13516,  1566,  7209,   757,    13, 39957,
          287,  5637

In [19]:
# Create the DataLoaders for our training and validation datasets.
# We'll take training samples in random order.
train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )

# For validation the order doesn't matter, so we'll just read them sequentially.
validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

In [20]:
print(f"Weight shape {model.transformer.wte.weight.shape}")
# this step is necessary because I've added some tokens (bos_token, etc.) to the embeddings
# otherwise the tokenizer and model tensors won't match up
model.resize_token_embeddings(len(tokenizer))
print(f"Number of tokens: {len(tokenizer)}")

# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

Weight shape torch.Size([50257, 1024])
Number of tokens: 50260


In [21]:
word_embeddings = model.transformer.wte.weight # Word Token Embeddings

print(word_embeddings.shape)

torch.Size([50260, 1024])


In [22]:
epochs = 3
learning_rate = 2e-5
warmup_steps = 1e2
# The epsilon parameter eps = 1e-8 is “a very small number to prevent any division by zero in the implementation”
epsilon = 1e-8
# optim = Adam(model.parameters(), lr=5e-5)
optim = AdamW(model.parameters(), lr = learning_rate, eps = epsilon)

def format_time(elapsed):
    return str(datetime.timedelta(seconds=int(round((elapsed)))))

In [23]:
# Total number of training steps is [number of batches] x [number of epochs].
# (Note that this is not the same as the number of training samples).
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
# This changes the learning rate as the training loop progresses
scheduler = get_linear_schedule_with_warmup(optim,
                                            num_warmup_steps = warmup_steps,
                                            num_training_steps = total_steps)

In [24]:
def infer(prompt):
    input = f"<|startoftext|>Ingredients: {prompt.strip()}"
    input = tokenizer(input, return_tensors="pt")
    input_ids      = input["input_ids"]
    attention_mask = input["attention_mask"]

    output = model.generate(input_ids.to(device),
                            attention_mask=attention_mask.to(device),
                            max_new_tokens=max_length,
                            # temperature = 0.5,
                            do_sample = True, top_k = 50, top_p = 0.85)
                            # num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    return output

In [25]:
total_t0 = time.time()

training_stats = []

for epoch_i in range(0, epochs):
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    t0 = time.time()

    total_train_loss = 0

    model.train()  # `train` just changes the *mode* (train vs. eval), it doesn't *perform* the training.

    for step, batch in enumerate(train_dataloader):     # step from enumerate() = number of batches

        b_input_ids = batch[0].to(device)   # tokens (of multiple documents in a batch)
        b_labels    = batch[0].to(device)
        b_masks     = batch[1].to(device)   # mask of [1] for a real word, [0] for a pad

        model.zero_grad()
        # loss = model(X.to(device), attention_mask=a.to(device), labels=X.to(device)).loss
        outputs = model(  input_ids = b_input_ids,
                          labels = b_labels,
                          attention_mask = b_masks,
                          token_type_ids = None
                        )

        loss = outputs[0]

        batch_loss = loss.item()
        total_train_loss += batch_loss

        # Get sample every x batches.
        if step % 100 == 0 and not step == 0:

            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}. Loss: {:>5,}.   Elapsed: {:}.'.format(step, len(train_dataloader), batch_loss, elapsed))

            model.eval()

            sample_output = infer("eggs, flour, butter, sugar")
            print(sample_output)

            # `train` just changes the *mode* (train vs. eval), it doesn't *perform* the training.
            model.train()

        loss.backward()
        optim.step()
        scheduler.step()

    # Calculate the average loss over all the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)

    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epoch took: {:}".format(training_time))


    print("")
    print("Running Validation...")

    t0 = time.time()

    model.eval()

    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:

        b_input_ids = batch[0].to(device)
        b_labels = batch[0].to(device)
        b_masks = batch[1].to(device)

        with torch.no_grad():

            outputs  = model(input_ids = b_input_ids,
                             attention_mask = b_masks,
                             labels = b_labels)

            loss = outputs[0]

        batch_loss = loss.item()
        total_eval_loss += batch_loss

    avg_val_loss = total_eval_loss / len(validation_dataloader)

    validation_time = format_time(time.time() - t0)

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")
print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


Training...


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   100  of    450. Loss: 3.32277774810791.   Elapsed: 0:00:28.
Ingredients: eggs, flour, butter, sugar, cocoa powder, salt, pepper, nutmeg, eggs. Instructions:Put all ingredients in a bowl. Stir until well blended. Stir in lemon juice.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   200  of    450. Loss: 1.347068190574646.   Elapsed: 0:00:59.
Ingredients: eggs, flour, butter, sugar, baking soda, eggs, margarine, lemon juice, salt, baking powder, salt, eggs, butter, vanilla extract. Instructions: In a large bowl, beat the egg whites with 1 cup sugar and beat the egg whites and butter until light and fluffy, and fluffy. Fold in flour and add to the dry ingredients. Stir until well blended and a thick dough is formed. Roll out dough into a tight ball and place on lightly floured surface. Use a 1 inch cookie cutter to cut out circles. Place in a greased 9 inch pie pan, pour about 1/4 cup of lemon juice over the dough and bake at 350 degrees for 35-40 minutes or until lightly browned and lightly toasted. Remove from oven and sprinkle with lemon slices. Enjoy!


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   300  of    450. Loss: 1.3673527240753174.   Elapsed: 0:01:32.
Ingredients: eggs, flour, butter, sugar, salt, cinnamon. Instructions: Combine flour and baking powder in a large bowl. Using an electric mixer, beat the eggs until fluffy. Add the remaining ingredients. Stir until well blended. Divide the batter evenly into prepared pans. Bake for 25 minutes, or until a toothpick inserted in the middle comes out clean. Allow to cool completely on wire racks before using.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   400  of    450. Loss: 2.4715497493743896.   Elapsed: 0:02:03.
Ingredients: eggs, flour, butter, sugar, salt, vanilla extract, egg yolk, cinnamon, nutmeg. Instructions: In a medium-sized bowl, combine eggs, flour, butter, sugar, salt, vanilla extract and eggs. Add in eggs and mix until combined. Beat in remaining ingredients until well blended. Beat in nutmeg. Instructions: In a large bowl, mix all dry ingredients and stir into a lightly greased large bowl. Beat together with a wooden spoon until smooth. Roll out dough on a lightly floured surface, until 1/4 inch thick and 4 cm in diameter. Shape into 8 even rounds. Place on parchment paper lined baking sheets or greased cookie sheet and bake until golden brown and firm, about 25 minutes.

  Average training loss: 4.50
  Training epoch took: 0:02:21

Running Validation...
  Validation Loss: 1.87
  Validation took: 0:00:05

Training...


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   100  of    450. Loss: 2.021016836166382.   Elapsed: 0:00:29.
Ingredients: eggs, flour, butter, sugar, baking soda, salt, vanilla extract. Instructions: In a large bowl, whisk together eggs, flour, butter, sugar, baking soda, salt. In another large bowl, stir together baking soda, cinnamon and vanilla. In another large bowl, beat together butter and sugar until stiff peaks form, about 5 minutes. Add cinnamon, vanilla and beat until blended, about 1 minute more. In another large bowl, beat together flour, sugar, baking soda, vanilla and flour. Beat until stiff peaks form, about 5 minutes. Add eggs, one at a time, beat gently to incorporate and beat until combined, about 1 minute more. Divide batter evenly between greased cookie sheets and refrigerate for 1 hour. Bake cookies at 375 degrees for 20 to 25 minutes, or until edges are lightly browned.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   200  of    450. Loss: 2.3956758975982666.   Elapsed: 0:01:03.
Ingredients: eggs, flour, butter, sugar, salt, pepper, milk, milk, margarine, brown sugar, butter, butter, eggs, flour, vinegar, vanilla extract, flour, sugar, flour, egg yolks, milk, egg whites, cream, salt, cocoa powder, sugar, vanilla extract. Instructions: Combine flour and butter in a large bowl. Beat in eggs, milk,  margarine, brown sugar, butter, sugar,   flour and mix well. Beat in    eggs and  flour mixture until well blended. Stir in egg yolks,  margarine,  vinegar and vanilla. Pour in prepared batter into a well greased pan. Bake at 350 degrees Fahrenheit for 30-35 minutes. Sprinkle over tops of cakes,    top with candied  nuts,  candied orange, candied oranges, candy,  candies, chocolate chips,  


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   300  of    450. Loss: 1.2305002212524414.   Elapsed: 0:01:37.
Ingredients: eggs, flour, butter, sugar, vanilla extract, baking powder, baking soda. Instructions: Heat oven to 350 degrees F. Line 2 or 3 9 inch pie crusts with parchment paper or spray with nonstick spray.   Place the butter in a large bowl; add the sugar, vanilla and baking powder and stir until sugar has dissolved.   Add eggs, flour and butter; stir until smooth.  Spread over prepared pie crusts;  place over a lightly greased 9 inch pan and bake until edges are set, 25 to 30 minutes or until a toothpick inserted into center comes out clean.  Allow to cool slightly before cutting into 1/2 inch thick squares.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   400  of    450. Loss: 1.2030420303344727.   Elapsed: 0:02:08.
Ingredients: eggs, flour, butter, sugar, milk, vanilla extract, sugar, salt, baking soda, baking powder, baking soda water. Instructions: Combine flour, butter, sugar and milk. Stir in vanilla extract. Gradually stir in sugar until well blended. Stir in baking soda and baking powder. Stir in   salt. Spread mixture evenly over muffin tins. Bake 20-30 minutes or until tops are golden brown. Top with icing sugar. Refrigerate until firm. Makes 6 cups of    cupcake.

  Average training loss: 1.85
  Training epoch took: 0:02:25

Running Validation...
  Validation Loss: 1.79
  Validation took: 0:00:05

Training...


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   100  of    450. Loss: 2.4820361137390137.   Elapsed: 0:00:29.
Ingredients: eggs, flour, butter, sugar, cocoa powder, baking powder. Instructions: Mix together all ingredients except for butter. Stir in eggs and stir. Stir in cocoa powder, sugar, and baking powder. Spread on greased cookie sheets and bake in preheated 425°F oven for 25 minutes.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   200  of    450. Loss: 2.396599531173706.   Elapsed: 0:00:59.
Ingredients: eggs, flour, butter, sugar, salt, salt, sugar, water, water, egg yolks. Instructions:    Mix the dry ingredients in a bowl. Add the eggs,  flour, butter and sugar.  Mix well. Add the salt and water and mix well.  Gradually add the flour until all the flour is used.  Mix all of the dry ingredients together until  it forms a ball.  Roll out to about 1/2 cm thick. Roll the dough to about 1/2  cm thick.  Brush each  dough circle with  beaten egg white, and sprinkle with sugar.  Bake in a preheated oven at 350 degrees for 35 minutes.     To serve:  Place  the  dough on a greased pie plate and brush with  beaten egg whites.  Cut up the dough and sprinkle with a little sugar.  Brush each with 


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   300  of    450. Loss: 2.077214002609253.   Elapsed: 0:01:34.
Ingredients: eggs, flour, butter, sugar, brown sugar, brown sugar, salt, vanilla extract, vanilla. Instructions: Combine eggs, flour, butter, and sugar in a large bowl. Add the brown sugar and salt to combine; whisk until smooth. Add the vanilla extract, and stir well. Pour into a greased 10 inch pie pan. Bake at 350°F for 30 to 35 minutes.


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  Batch   400  of    450. Loss: 0.8129180669784546.   Elapsed: 0:02:05.
Ingredients: eggs, flour, butter, sugar, eggs, margarine, milk, margarine, vanilla extract, cinnamon, nutmeg, salt. Instructions: Place the flour in a food processor. Process until a fine flour is formed. Add butter, sugar and eggs. Process until the mixture is creamy and well blended. Add remaining ingredients, mixture should be dry enough to handle. Add a little salt. Fold in the margarine and vanilla. Make a well in the centre of the mixture. Drop into the well. Pour in the hot milk and let stand for a while. Cover with plastic wrap and freeze overnight. Invert onto a parchment paper and let rise another 15 minutes. In a food processor, combine the eggs, margarine and vanilla until combined. Beat well. Add the milk and margarine mixture to the mixture. Fold in the cinnamon, nutmeg and salt.

  Average training loss: 1.78
  Training epoch took: 0:02:24

Running Validation...
  Validation Loss: 1.76
  Validation t

In [26]:
# Create a DataFrame from our training statistics.
df_stats = pd.DataFrame(data=training_stats)

# Use the 'epoch' as the row index.
df_stats = df_stats.set_index('epoch')
df_stats

Unnamed: 0_level_0,Training Loss,Valid. Loss,Training Time,Validation Time
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,4.496187,1.870418,0:02:21,0:00:05
2,1.846435,1.785793,0:02:25,0:00:05
3,1.781866,1.764926,0:02:24,0:00:05


In [27]:
model_save_path = './model'

print("Saving model to %s" % model_save_path)

# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

Saving model to ./model


('./model\\tokenizer_config.json',
 './model\\special_tokens_map.json',
 './model\\vocab.json',
 './model\\merges.txt',
 './model\\added_tokens.json',
 './model\\tokenizer.json')

In [28]:
model = GPT2LMHeadModel.from_pretrained(model_save_path)
tokenizer = GPT2TokenizerFast.from_pretrained(model_save_path)
model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50260, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=3072, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=4096, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50260, bias=False)
)

In [29]:
# model = GPT2LMHeadModel.from_pretrained(model_save_path)
# tokenizer = GPT2TokenizerFast.from_pretrained(model_save_path)
# model.to(device)
print(infer("eggs, mushroom, butter, sugar"))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Ingredients: eggs, mushroom, butter, sugar, brown sugar, flour, baking powder, butter, eggs, milk. Instructions: Heat the butter in a medium saucepan over a moderate heat. Add the mushrooms, fry in the butter for about 5 minutes or until softened and translucent. Stir in the sugar, flour, baking powder and salt. Let the mixture cook slowly stirring occasionally until the mixture comes to a boil. Reduce the heat and simmer over low heat for about 3 hours or until the mixture has thickened. Remove from heat and reserve 2 cups of the butter to use in the next recipe.


In [30]:
infer("onion, garlic, chicken breast")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'Ingredients: onion, garlic, chicken breast, onions, salt, pepper, garlic powder, butter, lemon juice, green onion, chicken breast, fresh basil. Instructions: In a heavy bottomed   ovenproof  saucepan, heat 1 cup of  the oil to 350 degrees. Add onion and saute over medium heat for 3-4 minutes, stirring occasionally. Add garlic and cook 3 minutes, turning, stirring occasionally. Add chicken and  add 2 cups  boiling water. Bring to a boil and reduce heat to simmer. Cook chicken uncovered  until juices run clear, about 2 minutes. Remove chicken from heat and return to  boiling water.  Remove the skin and shred with a fork; add to saucepan. Bring to a boil, stirring frequently, and cook  for 3 minutes. Remove chicken, skin and shreds.  Transfer chicken to a medium  bowl, add the basil and  stir to combine.'

In [31]:
print(infer("avocado, lime"))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Ingredients: avocado, lime, cilantro, lime juice, salt, pepper, sour cream, chicken broth, fresh cilantro, red wine vinegar, lemon juice, fresh cilantro, lime juice, fresh parsley, fresh cilantro, fresh cilantro, fresh basil, sour cream, sour cream, olive oil, cornstarch, cheddar cheese, brown rice, sour cream, mayonnaise. Instructions: Cut avocado into slices, dice and place slices on a baking sheet. Set aside. Drain avocado slices and rinse under cool water. Combine avocado slices and cilantro, lime juice, salt, pepper and sour cream in a saucepan. Bring to a boil over medium heat, stirring constantly.  Pour in broth, simmer 5 minutes. Stir in the cilantro, lime juice and vinegar, and bring to a boil. Reduce heat and simmer 10 minutes longer. Stir in the remaining broth. Add cheddar


In [32]:
print(infer("beef, salt, pepper"))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Ingredients: beef, salt, pepper, curry powder, lime juice, tomato paste, water, cayenne pepper, coconut milk. Instructions: In a large saucepan, bring beef to a boil. Cook until browned. Drain and let cool. Blend coconut milk with beef and salt. Heat mixture until hot but not thickening, stirring constantly. Add coconut and tomato paste, and stir well. Return to saucepan. Cook for 20 minutes. Remove from heat, stir in cayenne pepper, and stir until sauce is thick. Add cooked beef and coconut milk mixture. Bring to a boil over medium heat. Simmer stirring constantly until the coconut mixture has been thickened. Remove from heat and pour into cooled saucepan. Pour remaining coconut milk into a bowl. Heat over medium heat until mixture boils. Remove from heat. Spoon mixture into prepared baking dish. Bake in preheated oven at 400 degrees Fahrenheit for 20


In [33]:
# Using BLEU score to compare the real sentences with the generated ones

scores=[]

for i in range(10):
    ingredients = val_dataset[i][2]
    reference = val_dataset[i][3]
    candidate = infer(ingredients)
    scores.append(sentence_bleu(reference, candidate))

print(statistics.mean(scores))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:

9.112175060372617e-232
