In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer


MODEL_ID = "auhide/chef-gpt-en"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
chef_gpt = AutoModelForCausalLM.from_pretrained(MODEL_ID)

from datasets import load_dataset


dataset = load_dataset("m3hrdadfi/recipe_nlg_lite")


tokenizer_config.json:   0%|          | 0.00/477 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/245 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/907 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/3.46k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.88k [00:00<?, ?B/s]

Downloading and preparing dataset recipe_nlg_lite/1.0.0 (download: 6.40 MiB, generated: 10.84 MiB, post-processed: Unknown size, total: 17.24 MiB) to /root/.cache/huggingface/datasets/m3hrdadfi___recipe_nlg_lite/1.0.0/1.0.0/2fd5f76dc1ed88ff2d6485b11497d6ae9516f4ebb2a6cb528dfaf0520bd8e51a...


Downloading data:   0%|          | 0.00/6.71M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/6118 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1080 [00:00<?, ? examples/s]

Dataset recipe_nlg_lite downloaded and prepared to /root/.cache/huggingface/datasets/m3hrdadfi___recipe_nlg_lite/1.0.0/1.0.0/2fd5f76dc1ed88ff2d6485b11497d6ae9516f4ebb2a6cb528dfaf0520bd8e51a. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm

# Define your dataset class 
class RecipeDataset(Dataset):
    def __init__(self, data, tokenizer, max_length):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        description = self.data[idx]['ner']
        steps = self.data[idx]['steps']
        ingredients = self.data[idx]['ingredients']
        
        # Tokenize description and steps
        encoded_desc = self.tokenizer(f"ingredients>> {description}; recipe>>", padding='max_length', truncation=True, max_length=self.max_length, return_tensors="pt")
        encoded_steps = self.tokenizer(f"ingredients>> {ingredients}; recipe>> {steps}<|endoftext|>", padding='max_length', truncation=True, max_length=self.max_length, return_tensors="pt")
#         print(f"ingredients>> {description}; recipe>> {steps}<|endoftext|>")
        return encoded_desc, encoded_steps

# Split your dataset into train and validation sets
train_data = dataset['train']
val_data = dataset['test']

# Define batch size and number of epochs
batch_size = 1
num_epochs = 3

# Create data loaders
max_length = 256 
train_loader = DataLoader(RecipeDataset(train_data, tokenizer, max_length), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(RecipeDataset(val_data, tokenizer, max_length), batch_size=batch_size)

# Define optimizer and scheduler
optimizer = AdamW(chef_gpt.parameters(), lr=2e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
chef_gpt.to(device)

# Fine-tuning loop
for epoch in range(num_epochs):
    chef_gpt.train()
    total_loss = 0
    for batch_idx, batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}")):
        optimizer.zero_grad()
        input_desc, target_steps = batch
        input_desc = {k: v.to(device) for k, v in input_desc.items()}
        target_steps = {k: v.to(device) for k, v in target_steps.items()}
        outputs = chef_gpt(input_ids=input_desc['input_ids'], labels=target_steps['input_ids'])
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()

    avg_train_loss = total_loss / len(train_loader)

    # Validation
    chef_gpt.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            input_desc, target_steps = batch
            input_desc = {k: v.to(device) for k, v in input_desc.items()}
            target_steps = {k: v.to(device) for k, v in target_steps.items()}
            outputs = chef_gpt(input_ids=input_desc['input_ids'], labels=target_steps['input_ids'])
            val_loss = outputs.loss
            total_val_loss += val_loss.item()
    avg_val_loss = total_val_loss / len(val_loader)

    print(f"Epoch {epoch+1}: Avg Train Loss: {avg_train_loss}, Avg Val Loss: {avg_val_loss}")

# Save the fine-tuned model
chef_gpt.save_pretrained("fine_tuned_recipe_model")

cuda


Epoch 1: 100%|██████████| 6118/6118 [04:48<00:00, 21.22it/s]


Epoch 1: Avg Train Loss: 8.774734259002464, Avg Val Loss: 8.881945276039618


Epoch 2: 100%|██████████| 6118/6118 [04:47<00:00, 21.31it/s]


Epoch 2: Avg Train Loss: 8.775861708195858, Avg Val Loss: 8.881945276039618


Epoch 3: 100%|██████████| 6118/6118 [04:47<00:00, 21.30it/s]


Epoch 3: Avg Train Loss: 8.775441370882525, Avg Val Loss: 8.881945276039618


In [6]:
tester_tokens = tokenizer(f"ingredients>> 3.0 bone in pork chops, salt, pepper, 2.0 tablespoon vegetable oil, 2.0 cup chicken broth, 4.0 cup vegetable broth, 1.0 red onion, 4.0 carrots, 2.0 clove garlic, 1.0 teaspoon dried thyme, 0.5 teaspoon dried basil, 1.0 cup rotini pasta, 2.0 stalk celery ; recipe>>",return_tensors="pt")

In [7]:
tester_tokens.to(device)
output_test = chef_gpt.generate(
        tester_tokens.input_ids, 
        do_sample=True, 
        max_length=1000, 
        top_p=0.95,
        attention_mask=tester_tokens.attention_mask)

recipe = tokenizer.batch_decode(output_test)[0]

print(recipe)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


ingredients>> 3.0 bone in pork chops, salt, pepper, 2.0 tablespoon vegetable oil, 2.0 cup chicken broth, 4.0 cup vegetable broth, 1.0 red onion, 4.0 carrots, 2.0 clove garlic, 1.0 teaspoon dried thyme, 0.5 teaspoon dried basil, 1.0 cup rotini pasta, 2.0 stalk celery ; recipe>> in a shallow dish toss chops with salt and pepper to taste
in a heavy nonstick skillet, heat oil over medium-high heat
add chops and cook 10 minutes
sprinkle with salt and pepper
add 2 tablespoons broth and 1 / 2 cup broth
bring to a boil
reduce heat
cover and simmer for 3 minutes
add remaining broth and broth
cook and stir 2 minutes
add carrots, onion, remaining 2 tablespoons broth and remaining 1 / 2 cup broth
bring to a boil
reduce heat
cover and simmer for 5 minutes or until chops are tender
add garlic and next 3 ingredients
simmer for 2 minutes or until garlic is tender
stir in pasta
cover and simmer 1 minute or until pasta is tender but still firm
toss with reserved broth<|endoftext|>
