## Setup

In [None]:
from google.colab import drive
root = '/content/drive'
drive.mount(root)

Mounted at /content/drive


### Configure Environment

In [None]:
!pip install -U bitsandbytes

!pip install -U transformers accelerate
!pip install evaluate rouge_score

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-c

### Imports

In [None]:
# ✅ Confirm install
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)
from peft import PeftModel, PeftConfig

from datasets import load_from_disk
from huggingface_hub import login
import evaluate

import os

import pandas as pd
import re

### Login to Hugging Face

In [None]:
os.environ['HF_TOKEN'] = '<REDACTED>'
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ['TORCH_USE_CUDA_DSA'] = "1"

login(token=os.environ["HF_TOKEN"])

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


### Load Dataset

In [None]:
recipe_nlg_data_path = f'{root}/MyDrive/NLP-266/Project/RecipeNLG'

### Globals

In [None]:
dataset = load_from_disk(f'file://{recipe_nlg_data_path}/numbered_recipe_nlg_dataset')

In [None]:
SYSTEM_PROMPT = """
You are an expert in generating recipe instructions from the recipe title and ingredient list.
""".strip()

PROMPT_HPS = {
  'do_sample': True,
  'temperature': 0.7,
  'top_p': 0.9,
  'max_new_tokens': 256,
  'stop_sequences':None,
  'repetition_penalty': 1.1,
  'use_cache': True
}

cot_prompt = """
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response. The recipe instructions will be generated in a later response.
""".strip()

BATCH_SIZE = 32

### Generation Helpers

In [None]:
number_instruction_text = "Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13])."
after_example_text = "After the following examples, provide your response."
main_instruction = "Write recipe instructions for the following recipe title and ingredient list."
use_reasoning_text = "Use the provided reasoning to inform your response."
nl = '\n'

def extract_text(text, tag):
    split_str = "Your response:"
    end = text.find(split_str)
    return text[end + len(split_str):].strip()

def form_example(row):
    direction_list = row['directions'].translate(str.maketrans('', '', '[]')).split('", "')
    directions = '\n'.join([f'[{i+1}] {direction}' for i, direction in enumerate(direction_list)])

    return f'''Recipe Title: {row['title']}
Ingredients: {row['ingredients']}
Recipe Instructions:
{directions}
'''


def form_examples(row, prompt_set, n_prompts, retrieved=False):
    # For each example_id, select from the prompt set
    if retrieved:
        example_ids = row['retrieval_neighbors'][:n_prompts] # changed
    else:
        example_ids = prompt_set.shuffle().select(range(n_prompts))['prompt_id']

    examples = prompt_set.select(example_ids)

    # For each selection
    return {"examples": ['\n'.join([form_example(row) for i, row in enumerate(examples)])]}


def form_prompt(title, ingredients, reasoning=None, examples=None, cot_prompt=None): #only
    if not cot_prompt or type(cot_prompt) != str:
      prompt = f"""{SYSTEM_PROMPT}
{main_instruction}{nl + use_reasoning_text if reasoning else ""}
{number_instruction_text}{nl + after_example_text + nl + examples if examples else ""}
Recipe Title: {title}
Ingredients: {ingredients}{nl + "Reasoning: " + reasoning if reasoning else ""}
Your response:""".strip()

    else: #only when generating reasoning
      prompt = f"""{SYSTEM_PROMPT}{nl + cot_prompt if cot_prompt else ""}
Recipe Title: {title}
Ingredients: {ingredients}
Your response:"""

    return prompt

def generate_instruction_batch(batch, model=None, tokenizer=None, cot_prompt=False, use_examples=False):
    if not cot_prompt:
      batch['reasoning'] = ["" for _ in batch['title']]
    if not use_examples:
      batch['examples'] = ["" for _ in batch['title']]

    prompts = [form_prompt(title, ingredients, reasoning, nl.join(examples), cot_prompt)
    for title, ingredients, reasoning, examples
    in zip(batch['title'], batch['ingredients'], batch['reasoning'], batch['examples'])
    ]

    print(prompts[0])

    inputs = tokenizer(
        prompts,
        return_tensors="pt",
        padding=True,
        truncation=True
    ).to(model.device)

    hps = PROMPT_HPS.copy()

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            **hps,
            pad_token_id=tokenizer.pad_token_id
        )

    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    return {'prompts': prompts, 'generated': decoded}



def generate_reason_batch(batch, model=None, tokenizer=None, cot_prompt=""): # TODO: fix the prompt comprehension
    prompts = [
        f"""{SYSTEM_PROMPT}{nl + cot_prompt if cot_prompt else ""}
Recipe Title: {title}
Ingredients: {ingredients}
Your response:"""
        for title, ingredients in zip(batch['title'], batch['ingredients'])
    ]

    print(prompts[0])

    inputs = tokenizer(
        prompts,
        return_tensors="pt",
        padding=True,
        truncation=True
    ).to(model.device)

    hps = PROMPT_HPS.copy()

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            **hps,
            pad_token_id=tokenizer.pad_token_id
        )

    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return {'reasoning': [extract_text(decoded_example, "reasoning") for decoded_example in decoded]}

### Model Config

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_enable_fp32_cpu_offload=True
)

In [None]:
model_id = "teknium/OpenHermes-2.5-Mistral-7B"
#model_id = "Qwen/Qwen2.5-14B-Instruct-1M" # original
#model_id = "Qwen/Qwen2.5-Coder-32B-Instruct" #even bigger (still smaller than 34B model that didn't work)

tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/51.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/101 [00:00<?, ?B/s]

In [None]:
small_test = dataset['test']

In [None]:
train_ = load_from_disk(f'file://{recipe_nlg_data_path}/train_processed_1k')
print(train_)

Dataset({
    features: ['title', 'ingredients', 'directions', 'source', 'NER', 'n_ingredients', 'n_steps', 'n_ner', 'domain', 'avg_step_length', 'total_step_length', 'directions_formatted', 'prompt', 'input_ids', 'labels'],
    num_rows: 1000
})


In [None]:
model = AutoModelForCausalLM.from_pretrained(
  model_id,
  return_dict=True,
  torch_dtype=torch.float16,
  device_map="auto",
  quantization_config=quantization_config
)

config.json:   0%|          | 0.00/624 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

In [None]:
very_big_save_path = f'{recipe_nlg_data_path}/Generated_Data/32b_generated_train_reasoning'

model_very_big_ft_cot_instruction_generated = train_.map(
    generate_reason_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot_prompt": cot_prompt, "model": model, "tokenizer": tokenizer}
)

model_very_big_ft_cot_instruction_generated.save_to_disk(very_big_save_path)

In [None]:
#adapter_path_1 = f'{recipe_nlg_data_path}/qlora_1k_test_2e_1l'
adapter_path = f'{recipe_nlg_data_path}/qlora_10k_test'

#for n_layers, adapter_path in [('10', adapter_path_1), ('2', adapter_path_2)]:

config = PeftConfig.from_pretrained(adapter_path)

model_ft = PeftModel.from_pretrained(model, adapter_path)

ft_generation_save_path = f'file://{recipe_nlg_data_path}/Generated_Data/qlora_10k_test_2e_4l'

model_ft_generated = small_test.map(
  generate_instruction_batch,
  batched=True,
  batch_size=BATCH_SIZE,
  fn_kwargs={"cot_prompt": False, "model": model_ft, "tokenizer": tokenizer}
)

model_ft_generated.save_to_disk(ft_generation_save_path)

### Setup Test Set for Examples

In [None]:

def get_retrieval_neighbors(example):
  for i, embed_example in enumerate(embed_dataset['test']):
    if example['title'] == embed_example['title'] and example['ingredients'] == embed_example['ingredients'] and example['directions'] == embed_example['directions']:
      return {"retrieval_neighbors": embed_example['retrieval_neighbors']}

embed_dataset = load_from_disk(f'file://{recipe_nlg_data_path}/test_retrieval_embeddings')
small_test = small_test.map(get_retrieval_neighbors)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

### CoT Few-Shot

In [None]:
adapter_path = f'{recipe_nlg_data_path}/qlora_1k_test_2e_4l'
#adapter_path = f'{recipe_nlg_data_path}/qlora_1k_test_5e'

#for n_layers, adapter_path in [('10', adapter_path_1), ('2', adapter_path_2)]:

config = PeftConfig.from_pretrained(adapter_path)

model_ft = PeftModel.from_pretrained(model, adapter_path)

for shots in range(1, 4):
  for retrieval in [True, False]:

    cot_shot_generation_save_path = f'{recipe_nlg_data_path}/Generated_Data/cot_nft_shots_{shots}_retrieval_{retrieval}'

    if not os.path.exists(f'{cot_shot_generation_save_path}_reasoning'):

      shot_small_test = small_test.map(form_examples, fn_kwargs={
          "prompt_set": embed_dataset['retrieval'], "n_prompts": shots, "retrieved": retrieval})

      cot_shot_reason_generated = shot_small_test.map(
        generate_reason_batch,
        batched=True,
        batch_size=BATCH_SIZE,
        fn_kwargs={"cot_prompt": cot_prompt, "model": model, "tokenizer": tokenizer}
      )

      cot_shot_reason_generated.save_to_disk(f'file://{cot_shot_generation_save_path}_reasoning')

    else:

      cot_shot_reason_generated = load_from_disk(f'file://{cot_shot_generation_save_path}_reasoning')

    if not os.path.exists(f'{cot_shot_generation_save_path}'):

      cot_shot_instruction_generated = cot_shot_reason_generated.map(
        generate_instruction_batch,
        batched=True,
        batch_size=BATCH_SIZE,
        fn_kwargs={"cot_prompt": True, "model": model, "tokenizer": tokenizer, "use_examples": True}
      )

      cot_shot_instruction_generated.save_to_disk(f'file://{cot_shot_generation_save_path}')



Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response. The recipe instructions will be generated in a later response.
Recipe Title: Spiced Chicken With Couscous Stuffing
Ingredients: ["2/3 cup couscous", "1/4 cup chopped dried figs", "2 tbsp sliced almonds, toasted", "1 clove garlic, finely chopped", "1 tbsp chopped fresh cilantro", "1 tsp grated orange peel", "1/4 cup orange juice", "2 None Cornish game hens (1 1/2 lbs each)", "1 tsp cayenne pepper", "1 tsp ground cinnamon", "1 tsp ground cumin", "1 tsp sea salt", "4 large carrots, peeled and halved lengthwise", "4 None parsnips, peeled and halved lengt

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Use the provided reasoning to inform your response.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: Sauteed Soft-Shell Crab
Ingredients: ["1 1/2 cups low-fat milk", "4 small soft-shell crabs, cleaned", "3/4 cup all-purpose flour", "Salt and freshly ground black pepper, to taste", "2 tablespoons olive oil", "2 tablespoons unsalted butter", "2 tablespoons chopped flat-leaf parsley", "Remoulade Sauce , for serving"]
Recipe Instructions:
[1] "1.
[2] Place the milk in a shallow bowl large enough to hold the crabs in a single layer.
[3] Add the crabs and let them soak for 1 hour.
[4] Drain and discard the milk.
[5] 2.
[6] Season the flour with salt and pepper in a paper or pla

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response. The recipe instructions will be generated in a later response.
Recipe Title: Spiced Chicken With Couscous Stuffing
Ingredients: ["2/3 cup couscous", "1/4 cup chopped dried figs", "2 tbsp sliced almonds, toasted", "1 clove garlic, finely chopped", "1 tbsp chopped fresh cilantro", "1 tsp grated orange peel", "1/4 cup orange juice", "2 None Cornish game hens (1 1/2 lbs each)", "1 tsp cayenne pepper", "1 tsp ground cinnamon", "1 tsp ground cumin", "1 tsp sea salt", "4 large carrots, peeled and halved lengthwise", "4 None parsnips, peeled and halved lengt

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Use the provided reasoning to inform your response.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: Chicken And Vegetable Stir-Fry
Ingredients: ["1 clove garlic, crushed", "1 tsp ground ginger", "1 tsp ground cumin", "2 None skinless chicken breasts, cubed", "2 tbsp olive oil", "1 None red onion, cut into wedges", "2 cups baby carrots, trimmed, sliced", "1 None zucchini, sliced", "1 cup apple juice or chicken stock", "1 None lemon, grated zest and juice", "1/4 cup cilantro leaves, plus extra, for garnish", "None None prepared couscous, to serve"]
Recipe Instructions:
[1] "In a small bowl, combine garlic, ginger and cumin and season with salt and pepper. Add chicken and t

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response. The recipe instructions will be generated in a later response.
Recipe Title: Spiced Chicken With Couscous Stuffing
Ingredients: ["2/3 cup couscous", "1/4 cup chopped dried figs", "2 tbsp sliced almonds, toasted", "1 clove garlic, finely chopped", "1 tbsp chopped fresh cilantro", "1 tsp grated orange peel", "1/4 cup orange juice", "2 None Cornish game hens (1 1/2 lbs each)", "1 tsp cayenne pepper", "1 tsp ground cinnamon", "1 tsp ground cumin", "1 tsp sea salt", "4 large carrots, peeled and halved lengthwise", "4 None parsnips, peeled and halved lengt

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Use the provided reasoning to inform your response.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: Popover with Hot Turkey Salad
Ingredients: ["2 eggs, room temperature", "1 cup milk, room temperature", "1 cup all-purpose flour", "1/2 tsp. salt", "4 cups diced turkey, cooked", "2 cups diced celery", "2 cups shredded cheddar cheese", "1 can (2-1/4 oz.) sliced ripe olives, drained", "1 cup mayonnaise", "salad dressing", "1/4 cup milk", "1/8 tsp. pepper", "1 dash onion powder", "1-1/2 cup crushed potato chips", "Tomato wedges (optional)"]
Recipe Instructions:
[1] "In a mixing bowl, beat eggs until lemon-colored and foamy.
[2] Add milk, flour and salt; beat just until smoot

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response. The recipe instructions will be generated in a later response.
Recipe Title: Spiced Chicken With Couscous Stuffing
Ingredients: ["2/3 cup couscous", "1/4 cup chopped dried figs", "2 tbsp sliced almonds, toasted", "1 clove garlic, finely chopped", "1 tbsp chopped fresh cilantro", "1 tsp grated orange peel", "1/4 cup orange juice", "2 None Cornish game hens (1 1/2 lbs each)", "1 tsp cayenne pepper", "1 tsp ground cinnamon", "1 tsp ground cumin", "1 tsp sea salt", "4 large carrots, peeled and halved lengthwise", "4 None parsnips, peeled and halved lengt

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Use the provided reasoning to inform your response.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: Chicken And Vegetable Stir-Fry
Ingredients: ["1 clove garlic, crushed", "1 tsp ground ginger", "1 tsp ground cumin", "2 None skinless chicken breasts, cubed", "2 tbsp olive oil", "1 None red onion, cut into wedges", "2 cups baby carrots, trimmed, sliced", "1 None zucchini, sliced", "1 cup apple juice or chicken stock", "1 None lemon, grated zest and juice", "1/4 cup cilantro leaves, plus extra, for garnish", "None None prepared couscous, to serve"]
Recipe Instructions:
[1] "In a small bowl, combine garlic, ginger and cumin and season with salt and pepper. Add chicken and t

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response. The recipe instructions will be generated in a later response.
Recipe Title: Spiced Chicken With Couscous Stuffing
Ingredients: ["2/3 cup couscous", "1/4 cup chopped dried figs", "2 tbsp sliced almonds, toasted", "1 clove garlic, finely chopped", "1 tbsp chopped fresh cilantro", "1 tsp grated orange peel", "1/4 cup orange juice", "2 None Cornish game hens (1 1/2 lbs each)", "1 tsp cayenne pepper", "1 tsp ground cinnamon", "1 tsp ground cumin", "1 tsp sea salt", "4 large carrots, peeled and halved lengthwise", "4 None parsnips, peeled and halved lengt

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Use the provided reasoning to inform your response.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: Chicken And Rice Pot Pie 
Ingredients: ["1 1/2 cups UNCLE BEN'S(R) ORIGINAL CONVERTED Brand Rice", "3 1/2 cups frozen carrot, peas, corn blend", "1 teaspoon cracked black pepper", "2 cans (15 oz) cream of chicken condensed soup (heart healthy variety)", "2 1/2 cups water", "1 cup whole milk", "1 cup chicken diced", "1 sheet commercially prepared puff pastry dough"]
Recipe Instructions:
[1] "Preheat oven to 375 degrees
[2] Mix rice, vegetables, pepper, soup, water, milk and chicken in a casserole dish.
[3] Cover in foil and bake in oven for 30 minutes.
[4] Uncover and top w

Saving the dataset (0/1 shards):   0%|          | 0/160 [00:00<?, ? examples/s]

In [None]:
adapter_path = f'{recipe_nlg_data_path}/7b_reasoning_model_from_teacher'
adapter_path = f'{recipe_nlg_data_path}/14b_reasoning_model_from_teacher'

config = PeftConfig.from_pretrained(adapter_path)

base_model = AutoModelForCausalLM.from_pretrained(
  config.base_model_name_or_path,
  return_dict=True,
  torch_dtype=torch.float16,
  device_map="auto",
  quantization_config=quantization_config
)

model_ft_teacher = PeftModel.from_pretrained(base_model, adapter_path)

ft_generation_save_path = f'file://{recipe_nlg_data_path}/Generated_Data/14b_reasoning_model_from_teacher_generated_reasoning'

### Generate Reasoning on Train Data for Fine-Tuning

In [None]:
test_reason = small_test.map(
    generate_reason_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot_prompt": cot_prompt, "model": model_ft_teacher, "tokenizer": tokenizer}
)
test_reason.save_to_disk(ft_generation_save_path)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response. The recipe instructions will be generated in a later response.
Recipe Title: No-Crust Sweet Potato Pie
Ingredients: ["4 medium sweet potatoes, or yams peeled, cooked, mashed", "1 cup sugar", "1/2 cup flour, all-purpose", "1/2 cup butter or margarine, softened", "1 each eggs", "1/2-1 teaspoons almond extract", "1 x whipped cream"]
Your response:
You are an expert in generating recipe instructions from the recipe title and ingredient list.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingre

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
adapter_path = f'{recipe_nlg_data_path}/7b_instruction_model_from_teacher'
adapter_path = f'{recipe_nlg_data_path}/14b_instruction_model_from_teacher'

config = PeftConfig.from_pretrained(adapter_path)

model_ft_teacher = PeftModel.from_pretrained(base_model, adapter_path)

ft_generation_save_path = f'file://{recipe_nlg_data_path}/Generated_Data/14b_instruction_model_from_teacher_generated_instructions'



In [None]:
test_generated = test_reason.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot_prompt": True, "model": model_ft_teacher, "tokenizer": tokenizer}
)
test_generated.save_to_disk(ft_generation_save_path)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Use the provided reasoning to inform your response.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
Recipe Title: No-Crust Sweet Potato Pie
Ingredients: ["4 medium sweet potatoes, or yams peeled, cooked, mashed", "1 cup sugar", "1/2 cup flour, all-purpose", "1/2 cup butter or margarine, softened", "1 each eggs", "1/2-1 teaspoons almond extract", "1 x whipped cream"]
Reasoning: 1. Peeel and cook the sweet potatoes until they are tender. This process should take approximately 30 minutes to an hour, including preparation time.
2. Allow the cooked sweet potatoes to cool slightly before mashing them. Mash until smooth and creamy, which should take about 5-10 minutes.
3. In a separate bowl, mix together the sugar, flour, and almond extract. T

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

### Fine-Tuned CoT

In [None]:
model_ft_cot_reason_generated = small_test.map(
    generate_reason_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot_prompt": cot_prompt, "model": model_ft, "tokenizer": tokenizer}
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response. The recipe instructions will be generated in a later response.
Recipe Title: Orange Cookies Iii
Ingredients: ["1 cup shortening", "2 cups white sugar", "2 eggs", "1 cup buttermilk", "1/2 cup orange juice", "2 tablespoons orange zest", "4 1/2 cups all-purpose flour", "2 teaspoons baking powder", "1 teaspoon salt", "1/2 teaspoon baking soda", "1/4 cup butter", "4 cups confectioners' sugar", "3 tablespoons thawed orange juice concentrate"]
Your response:
You are an expert in generating recipe instructions from the recipe title and ingredient list.
Befor

In [None]:
def remove_your_response_text(example):
  return {"reasoning": example['reasoning'][len('Your response: '):]}

model_ft_cot_reason_generated = model_ft_cot_reason_generated.map(remove_your_response_text)

In [None]:
adapter_path = f'{recipe_nlg_data_path}/qlora_1k_test_2e_4l'

model_ft = PeftModel.from_pretrained(base_model, adapter_path)

ft_generation_save_path = f'file://{recipe_nlg_data_path}/Generated_Data/ft_cot_reason_tuned'



In [None]:
model_ft_cot_instruction_generated = model_ft_cot_reason_generated_filtered.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot_prompt": True, "model": model_ft, "tokenizer": tokenizer}
)

model_ft_cot_instruction_generated.save_to_disk(ft_generation_save_path)

Map:   0%|          | 0/379 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Use the provided reasoning to inform your response.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
Recipe Title: Mango-Chile Ice Pops(Paletas De Mango Con Chile 
Ingredients: ["1 cup store-bought mango juice", "or nectar", "1/4 cup sugar", "2 tsp. fresh lemon juice", "1 tsp. ancho chile powder", "1 large mango, peeled, seeded,", "and cut into small cubes"]
Reasoning: First, gather all necessary ingredients - 1 cup of store-bought mango juice, 1/4 cup of sugar, 2 teaspoons of fresh lemon juice, 1 teaspoon of ancho chile powder, 1 large ripe mango, peeled, seeded, and cut into small cubes.
Next, if using fresh mango, make sure it is ripe and ready to use. If not, allow time for the mango to ripen.
Then, in a medium bowl, mix together the

Saving the dataset (0/1 shards):   0%|          | 0/379 [00:00<?, ? examples/s]

In [None]:
## Only do this after running below code

model_ft_generated = small_test.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, "model": model_ft, "tokenizer": tokenizer}
)

model_ft_generated.save_to_disk(ft_generation_save_path)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


You are an expert in generating recipe instructions from the recipe title and ingredient list.
Fill in the missing Recipe Instructions. Be sure to number each step. After the following examples, provide your response.

Recipe Title: Orange Cookies Iii
Ingredients: ["1 cup shortening", "2 cups white sugar", "2 eggs", "1 cup buttermilk", "1/2 cup orange juice", "2 tablespoons orange zest", "4 1/2 cups all-purpose flour", "2 teaspoons baking powder", "1 teaspoon salt", "1/2 teaspoon baking soda", "1/4 cup butter", "4 cups confectioners' sugar", "3 tablespoons thawed orange juice concentrate"]
Recipe Instructions:
You are an expert in generating recipe instructions from the recipe title and ingredient list.
Fill in the missing Recipe Instructions. Be sure to number each step. After the following examples, provide your response.

Recipe Title: Creamy Roasted Garlic And Almond Soup
Ingredients: ["3 garlic bulbs", "1 onion chopped", "4 tablespoons olive oil", "creme fraiche 30 cl.", "4 1/4 cu

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

### More Experiments

### OOB Generation

In [None]:
BATCH_SIZE = 32

In [None]:
BATCH_SIZE = 32
generated_dataset_oob_TEST = small_test.select([1,2]).map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, "model": model, "tokenizer": tokenizer}
)


#generated_dataset_oob.save_to_disk(f'{recipe_nlg_data_path}/oob_generated')

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


You are an expert in generating recipe instructions from the recipe title and ingredient list.
Enclose your recipe instructions in <instructions> tags and number each step.
Examples: None
Title: Tarragon Chicken Salad
Ingredients: ["1/4 cup MIRACLE WHIP FREE Dressing", "1/4 tsp. dried tarragon leaves", "3 oz. , cubed", "1 cup red or green grapes, seedless Target 1 lb For $2.99 thru 02/06", "1/2 whole wheat pita bread round", "2 SNACKWELL'S Sugar Free Shortbread Cookies", "4 fl oz (1/2 cup) orange juice"]
Your response:


'You are an expert in generating recipe instructions from the recipe title and ingredient list.\nEnclose your recipe instructions in <instructions> tags and number each step.\nExamples: None\nTitle: Tarragon Chicken Salad\nIngredients: ["1/4 cup MIRACLE WHIP FREE Dressing", "1/4 tsp. dried tarragon leaves", "3 oz. , cubed", "1 cup red or green grapes, seedless Target 1 lb For $2.99 thru 02/06", "1/2 whole wheat pita bread round", "2 SNACKWELL\'S Sugar Free Shortbread Cookies", "4 fl oz (1/2 cup) orange juice"]\nYour response:\n<instructions>\n1. In a small bowl, mix together the Miracle Whip dressing and dried tarragon leaves. Set aside.\n2. In a separate bowl, combine the cubed chicken, grapes, and half of the dressing mixture from step 1. Mix well to coat all ingredients evenly.\n3. Cut the whole wheat pita bread into triangles or squares. Serve with the chicken salad as desired.\n4. Garnish the plate with two Snackwell\'s sugar-free shortbread cookies.\n5. Pour the remaining orange 

### CoT Generation

In [None]:
cot_prompt = """
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes.
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response, the recipe instructions will be generated in a later response.
""".strip()

generated_dataset_cot_reasoning = small_test.map(
    generate_reason_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": cot_prompt, "model": model, "tokenizer": tokenizer}
)


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions.
Before writing the recipe instructions, first think step by step through the logic for all ingredients: what ingredients are needed, in what order tasks must be done, and how long each step takes. 
Are there any dependencies like marinating, chopping, or cooking that must be done before certain steps?

Only generate the reasoning in this response, the recipe instructions will be generated in a later response.
Enclose your reasoning in <reasoning> tags.
Title: Orange Cookies Iii
Ingredients: ["1 cup shortening", "2 cups white sugar", "2 eggs", "1 cup buttermilk", "1/2 cup orange juice", "2 tablespoons orange zest", "4 1/2 cups all-purpose flour", "2 teaspoons baking powder", "1 teaspoon salt", "1/2 teaspoon baking soda", "1/4 cup butter", "4 cups confectioners' sugar", "3 tablespoons thawed orange juice concentrate"]
Your response:
You are an expert in generating recipe instructions.
Before writing the recipe instructions, first think

In [None]:
generated_dataset_cot_instructions = generated_dataset_cot_reasoning.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": True, "model": model, "tokenizer": tokenizer}
)
generated_dataset_cot_instructions.save_to_disk(f'{recipe_nlg_data_path}/oob_cot_generated')

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Write recipe instructions for the following recipe title and ingredient list. 
    Use the provided reasoning to inform your response. 
    Enclose your recipe instructions in <instructions> tags and number each step.
    Recipe Title: Orange Cookies Iii
    Ingredients: ["1 cup shortening", "2 cups white sugar", "2 eggs", "1 cup buttermilk", "1/2 cup orange juice", "2 tablespoons orange zest", "4 1/2 cups all-purpose flour", "2 teaspoons baking powder", "1 teaspoon salt", "1/2 teaspoon baking soda", "1/4 cup butter", "4 cups confectioners' sugar", "3 tablespoons thawed orange juice concentrate"]
    Reasoning: Your response:
<reasoning>
First, we gather all the necessary ingredients to make the cookies. We have 1 cup of shortening, 2 cups of white sugar, 2 eggs, 1 cup of buttermilk, 1/2 cup of orange juice, 2 tablespoons of orange zest, 4 1/2 cups of all-purpose flour, 2 teaspoons of baking powder, 1 teaspoon of salt, 1/2 teaspoon of baking soda, 1/4 cup of butter, 4 cups of confectio

### Few-Shot Evaluation

In [None]:
three_shot = small_test.map(form_examples, fn_kwargs={"prompt_set": embed_dataset['retrieval'], "n_prompts": 3})
two_shot = small_test.map(form_examples, fn_kwargs={"prompt_set": embed_dataset['retrieval'], "n_prompts": 2})
one_shot = small_test.map(form_examples, fn_kwargs={"prompt_set": embed_dataset['retrieval'], "n_prompts": 1})
three_shot_ret = small_test.map(form_examples, fn_kwargs={"prompt_set": embed_dataset['retrieval'], "n_prompts": 3, "retrieved": True})
two_shot_ret = small_test.map(form_examples, fn_kwargs={"prompt_set": embed_dataset['retrieval'], "n_prompts": 2, "retrieved": True})
one_shot_ret = small_test.map(form_examples, fn_kwargs={"prompt_set": embed_dataset['retrieval'], "n_prompts": 1, "retrieved": True})

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
shots = {
    "one_shot": one_shot,
    "two_shot": two_shot,
    "three_shot": three_shot,
    "one_shot_ret": one_shot_ret,
    "two_shot_ret": two_shot_ret,
    "three_shot_ret": three_shot_ret
}

for name, shot in shots.items():
  generated_dataset_shot_instructions = shot.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot_prompt": False, "model": model_ft, "tokenizer": tokenizer, "use_examples": True}
)
  generated_dataset_shot_instructions.save_to_disk(f'{recipe_nlg_data_path}/Generated_Data/ft_{name}')

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: Strawberry Blueberry Crisp
Ingredients: ["2 pounds Fresh Strawberries, Hulled And Sliced", "1 pint Fresh Blueberries", "3 Tablespoons Cornstarch", "White Sugar To Taste If The Berries Are Tart", "1 cup Brown Sugar", "3/4 cups Flour", "3/4 cups Rolled Oats, Quick Cooking", "1 teaspoon Cinnamon", "1/2 cups Butter"]
Recipe Instructions:
[1] "Preheat oven to 350 F. Put the berries into a large bowl. Toss berries with cornstarch and white sugar (if using) until well coated. Butter a 10\" glass pie plate and place the berries into the pie plate. Set aside.
[2] In a medium sized bowl, mix together the brown sugar, flour, rolled oa

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: Layered Cheese And Pesto 
Ingredients: ["1 c fresh basil, firmly packed", "3/4 c grated Parmesan cheese", "1/2 c parsley, firmly packed", "1/4 c pine nuts or walnuts", "2 cloves garlic, quartered", "1/3 c olive oil", "1 8 0z. package cream cheese", "4 1/2 oz Brie, rind removed", "1/2 c whipping cream", "Paprika, if desired", "Crackers or French bread"]
Recipe Instructions:
[1] "In a blender or food processor bowl combine basil, grated cheese,parsley, nuts and garlic. Cover and pulse til a paste forms. With machine running, gradually add oil and process until consistency of soft butter.
[2] 
[3] Bring the cream cheese and Br

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: Tabbouleh Salad With Grilled Eggplant
Ingredients: ["1 cup bulgur", "1 1/2 cups boiling water", "Juice of 1 1/2 lemons", "A 1-pound eggplant, sliced 1/2-inch thick", "3 tablespoons minced scallions", "1 medium-size tomato, peeled, seeded, juiced and chopped", "1 garlic clove, minced", "3 tablespoons minced fresh flat-leaf parsley", "2 tablespoons minced fresh mint", "1 1/2 to 2 tablespoons extra-virgin olive oil", "Salt and freshly ground black pepper"]
Recipe Instructions:
[1] "Place the bulgur in a bowl, pour the boiling water over it and stir in the lemon juice.
[2] Cover and set it aside.
[3] Preheat a grill or broiler.

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: "Dirt" Dessert Recipe
Ingredients: ["1 (8 ounce.) cream cheese", "1 c. powdered sugar", "2 sm. boxes instant French vanilla pudding", "2 to 3 c. lowfat milk (3 is creamier)", "Cold Whip", "1 (20 ounce.) bag Oreo cookies"]
Recipe Instructions:
[1] "Mix cream cheese, powdered sugar, pudding, and lowfat milk well.
[2] Then mix in Cold Whip.
[3] Crush cookies.
[4] Layer in flower pot, starting with Oreos and ending with Oreos.
[5] Top with flowers."

Recipe Title: Delectable Dirt Dessert Recipe
Ingredients: ["1 lg. pkg. Oreos", "1/2 cube butter", "8 ounce. pkg. cream cheese", "1 c. powdered sugar", "3 c. lowfat milk", "2 Packag

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: "Dirt" Dessert Recipe
Ingredients: ["1 (8 ounce.) cream cheese", "1 c. powdered sugar", "2 sm. boxes instant French vanilla pudding", "2 to 3 c. lowfat milk (3 is creamier)", "Cold Whip", "1 (20 ounce.) bag Oreo cookies"]
Recipe Instructions:
[1] "Mix cream cheese, powdered sugar, pudding, and lowfat milk well.
[2] Then mix in Cold Whip.
[3] Crush cookies.
[4] Layer in flower pot, starting with Oreos and ending with Oreos.
[5] Top with flowers."

Recipe Title: Dirt & Worms Dessert
Ingredients: ["16 ounces, weight Cool Whip , Thawed", "8 ounces, weight Cream Cheese, Softened", "1 cup Powdered Sugar", "3 cups Milk", "2 boxes 

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Write recipe instructions for the following recipe title and ingredient list.
Each step should be numbered, and the number should precede the step text and be enclosed in square brackets (like [1] or [13]).
After the following examples, provide your response.
Recipe Title: "Dirt" Dessert Recipe
Ingredients: ["1 (8 ounce.) cream cheese", "1 c. powdered sugar", "2 sm. boxes instant French vanilla pudding", "2 to 3 c. lowfat milk (3 is creamier)", "Cold Whip", "1 (20 ounce.) bag Oreo cookies"]
Recipe Instructions:
[1] "Mix cream cheese, powdered sugar, pudding, and lowfat milk well.
[2] Then mix in Cold Whip.
[3] Crush cookies.
[4] Layer in flower pot, starting with Oreos and ending with Oreos.
[5] Top with flowers."

Recipe Title: Dirt & Worms Dessert
Ingredients: ["16 ounces, weight Cool Whip , Thawed", "8 ounces, weight Cream Cheese, Softened", "1 cup Powdered Sugar", "3 cups Milk", "2 boxes 

### Fine-Tuned Evaluation

In [None]:
small_test = small_test.map(lambda x: {"examples": ""})

In [None]:
generated_dataset_ft = small_test.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, 'model':model_ft, 'tokenizer':tokenizer}
)
generated_dataset_ft.save_to_disk(f'{recipe_nlg_data_path}/ft_generated_1k_5e_3')


Parameter 'fn_kwargs'={'cot': False, 'model': PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32002, 4096)
        (layers): ModuleList(
          (0-27): 28 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
              (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
              (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
            )
            (mlp): MistralMLP(
              (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
              (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
              (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
              (act_fn): SiLU()
     

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Fill in the missing Recipe Instructions. Be sure to number each step. After the following examples, provide your response.

Recipe Title: Orange Cookies Iii
Ingredients: ["1 cup shortening", "2 cups white sugar", "2 eggs", "1 cup buttermilk", "1/2 cup orange juice", "2 tablespoons orange zest", "4 1/2 cups all-purpose flour", "2 teaspoons baking powder", "1 teaspoon salt", "1/2 teaspoon baking soda", "1/4 cup butter", "4 cups confectioners' sugar", "3 tablespoons thawed orange juice concentrate"]
Recipe Instructions:


In [None]:
generated_dataset_ft = one_shot.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, 'model':model_ft, 'tokenizer':tokenizer}
)
generated_dataset_ft.save_to_disk(f'{recipe_nlg_data_path}/ft_generated_1k_5e_one_shot')

In [None]:
generated_dataset_ft = two_shot.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, 'model':model_ft, 'tokenizer':tokenizer}
)
generated_dataset_ft.save_to_disk(f'{recipe_nlg_data_path}/ft_generated_1k_5e_two_shot')

In [None]:
generated_dataset_ft = three_shot.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, 'model':model_ft, 'tokenizer':tokenizer}
)
generated_dataset_ft.save_to_disk(f'{recipe_nlg_data_path}/ft_generated_1k_5e_three_shot')

In [None]:
generated_dataset_ft = one_shot_ret.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, 'model':model_ft, 'tokenizer':tokenizer}
)
generated_dataset_ft.save_to_disk(f'{recipe_nlg_data_path}/ft_generated_1k_5e_one_shot_retrieval')

In [None]:
generated_dataset_ft = two_shot_ret.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, 'model':model_ft, 'tokenizer':tokenizer}
)
generated_dataset_ft.save_to_disk(f'{recipe_nlg_data_path}/ft_generated_1k_5e_two_shot_retrieval')

In [None]:
generated_dataset_ft = three_shot_ret.map(
    generate_instruction_batch,
    batched=True,
    batch_size=BATCH_SIZE,
    fn_kwargs={"cot": False, 'model':model_ft, 'tokenizer':tokenizer}
)
generated_dataset_ft.save_to_disk(f'{recipe_nlg_data_path}/ft_generated_1k_5e_three_shot_retrieval')

### Test Output

In [None]:
generated_dataset_base_test = small_test.select([1,2]).map(
    generate_instruction_batch,
    batched=True,
    batch_size=2,
    fn_kwargs={"cot": False, "model": model, "tokenizer": tokenizer}
)

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

You are an expert in generating recipe instructions from the recipe title and ingredient list.
Enclose your recipe instructions in <instructions> tags and number each step.
Examples: None
Title: Tarragon Chicken Salad
Ingredients: ["1/4 cup MIRACLE WHIP FREE Dressing", "1/4 tsp. dried tarragon leaves", "3 oz. , cubed", "1 cup red or green grapes, seedless Target 1 lb For $2.99 thru 02/06", "1/2 whole wheat pita bread round", "2 SNACKWELL'S Sugar Free Shortbread Cookies", "4 fl oz (1/2 cup) orange juice"]
Your response:
