In [2]:
import kagglehub

path = kagglehub.dataset_download("saldenisov/recipenlg")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/saldenisov/recipenlg/versions/1


In [3]:
import pandas as pd
import os

In [4]:
path_to_kaggle_downloads = '/root/.cache/kagglehub/datasets/saldenisov/recipenlg/versions/1/dataset'  # Common path for Kaggle datasets
print(os.listdir(path_to_kaggle_downloads))

['full_dataset.csv']


In [5]:
dataset_path = "/root/.cache/kagglehub/datasets/saldenisov/recipenlg/versions/1/dataset"

os.listdir(dataset_path)

df = pd.read_csv(os.path.join(dataset_path, 'full_dataset.csv'))
print(df.head())

   Unnamed: 0                  title  \
0           0    No-Bake Nut Cookies   
1           1  Jewell Ball'S Chicken   
2           2            Creamy Corn   
3           3          Chicken Funny   
4           4   Reeses Cups(Candy)     

                                         ingredients  \
0  ["1 c. firmly packed brown sugar", "1/2 c. eva...   
1  ["1 small jar chipped beef, cut up", "4 boned ...   
2  ["2 (16 oz.) pkg. frozen corn", "1 (8 oz.) pkg...   
3  ["1 large whole chicken", "2 (10 1/2 oz.) cans...   
4  ["1 c. peanut butter", "3/4 c. graham cracker ...   

                                          directions  \
0  ["In a heavy 2-quart saucepan, mix brown sugar...   
1  ["Place chipped beef on bottom of baking dish....   
2  ["In a slow cooker, combine all ingredients. C...   
3  ["Boil and debone chicken.", "Put bite size pi...   
4  ["Combine first four ingredients and press in ...   

                                              link    source  \
0   www.cookbooks.com

In [6]:
print(df.info())
print(df.head())
print(df.columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2231142 entries, 0 to 2231141
Data columns (total 7 columns):
 #   Column       Dtype 
---  ------       ----- 
 0   Unnamed: 0   int64 
 1   title        object
 2   ingredients  object
 3   directions   object
 4   link         object
 5   source       object
 6   NER          object
dtypes: int64(1), object(6)
memory usage: 119.2+ MB
None
   Unnamed: 0                  title  \
0           0    No-Bake Nut Cookies   
1           1  Jewell Ball'S Chicken   
2           2            Creamy Corn   
3           3          Chicken Funny   
4           4   Reeses Cups(Candy)     

                                         ingredients  \
0  ["1 c. firmly packed brown sugar", "1/2 c. eva...   
1  ["1 small jar chipped beef, cut up", "4 boned ...   
2  ["2 (16 oz.) pkg. frozen corn", "1 (8 oz.) pkg...   
3  ["1 large whole chicken", "2 (10 1/2 oz.) cans...   
4  ["1 c. peanut butter", "3/4 c. graham cracker ...   

                            

In [7]:
def preprocess_data(df):
    df['ingredients'] = df['ingredients'].apply(lambda x: x.strip("[]").replace("'", "").replace(",", ", "))
    df['directions'] = df['directions'].str.lower().str.strip()
    df['title'] = df['title'].str.lower().str.strip()

    if 'dietary_restriction' in df.columns:
        df['dietary_restriction'] = df['dietary_restriction'].str.lower().str.strip()
    else:
        df['dietary_restriction'] = ''
    return df

df = preprocess_data(df)
print(df.head())

   Unnamed: 0                  title  \
0           0    no-bake nut cookies   
1           1  jewell ball's chicken   
2           2            creamy corn   
3           3          chicken funny   
4           4     reeses cups(candy)   

                                         ingredients  \
0  "1 c. firmly packed brown sugar",  "1/2 c. eva...   
1  "1 small jar chipped beef,  cut up",  "4 boned...   
2  "2 (16 oz.) pkg. frozen corn",  "1 (8 oz.) pkg...   
3  "1 large whole chicken",  "2 (10 1/2 oz.) cans...   
4  "1 c. peanut butter",  "3/4 c. graham cracker ...   

                                          directions  \
0  ["in a heavy 2-quart saucepan, mix brown sugar...   
1  ["place chipped beef on bottom of baking dish....   
2  ["in a slow cooker, combine all ingredients. c...   
3  ["boil and debone chicken.", "put bite size pi...   
4  ["combine first four ingredients and press in ...   

                                              link    source  \
0   www.cookbooks.com

In [8]:
!pip install transformers datasets torch



In [9]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments

In [10]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2', use_fast=True)
model = GPT2LMHeadModel.from_pretrained('gpt2')

tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
train_encodings = tokenizer(df['directions'].tolist(), truncation=True, padding=True, max_length=256)

class RecipeDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

train_dataset = RecipeDataset(train_encodings)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_steps=10_000,
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

In [None]:
trainer.train()

In [None]:
def generate_recipe(dietary_restriction, cuisine_type, prompt):
    input_text = f"{dietary_restriction} {cuisine_type} recipe: {prompt}"
    inputs = tokenizer.encode(input_text, return_tensors='pt')
    outputs = model.generate(inputs, max_length=200, num_return_sequences=1)
    recipe = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return recipe

dietary_restriction = "non veg"
cuisine_type = "french"
prompt = "pasta with cheese"
print(generate_recipe(dietary_restriction, cuisine_type, prompt))

In [None]:
model.save_pretrained('./my_recipe_model')
tokenizer.save_pretrained('./my_recipe_model')