In [None]:

from google.colab import files
import pandas as pd

uploaded = files.upload()
csv_file_path = '/content/Food Ingredients and Recipe Dataset with Image Name Mapping.csv'

df = pd.read_csv(csv_file_path)
print(df.head())

def clean_ingredients(ingredient_str):
    return ingredient_str.replace('[', '').replace(']', '').replace("'", "").replace('"', '')

df = df.dropna(subset=['Title', 'Ingredients', 'Cleaned_Ingredients'])

df['Title'] = df['Title'].astype(str)
df['Ingredients'] = df['Ingredients'].astype(str)
df['Cleaned_Ingredients'] = df['Cleaned_Ingredients'].astype(str)


data = []

for _, row in df.iterrows():

    data.append({
        "input_type": "qa",
        "input_text": f"How do I cook {row['Title']}?",
        "output_text": row['Cleaned_Ingredients'].strip()
    })


    keywords = clean_ingredients(row['Ingredients'])
    data.append({
        "input_type": "ingredients",
        "input_text": f"Ingredients: {keywords}",
        "output_text": row['Title'].strip()
    })

print(data[:3])

inputs = [d["input_text"] for d in data]
targets = [d["output_text"] for d in data]

print("مثال على إدخال:", inputs[0])
print("مثال على الهدف:", targets[0])

import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)

def encode_pairs(inputs, targets, max_input=64, max_target=128):
    input_ids, attention_masks, labels = [], [], []
    for inp, tgt in zip(inputs, targets):
        enc_in = tokenizer(inp, max_length=max_input, padding='max_length', truncation=True, return_tensors='pt')
        enc_out = tokenizer(tgt, max_length=max_target, padding='max_length', truncation=True, return_tensors='pt')

        label = enc_out.input_ids.squeeze()
        label[label == tokenizer.pad_token_id] = -100

        input_ids.append(enc_in.input_ids.squeeze())
        attention_masks.append(enc_in.attention_mask.squeeze())
        labels.append(label)

    return torch.stack(input_ids), torch.stack(attention_masks), torch.stack(labels)

input_ids, attention_masks, labels = encode_pairs(inputs, targets)

from torch.utils.data import Dataset, DataLoader
from torch import optim

class RecipeDataset(Dataset):
    def init(self, input_ids, attention_masks, labels):
        self.input_ids = input_ids
        self.attention_masks = attention_masks
        self.labels = labels

    def len(self):
        return len(self.input_ids)

    def getitem(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_masks[idx],
            'labels': self.labels[idx]
        }


dataset = RecipeDataset(input_ids, attention_masks, labels)

loader = DataLoader(dataset, batch_size=8, shuffle=True)

optimizer = optim.AdamW(model.parameters(), lr=5e-5)


EPOCHS = 10
model.train()

for epoch in range(EPOCHS):
    total_loss = 0
    for batch in loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{EPOCHS} - Loss: {total_loss / len(loader):.4f}")

def generate_response(prompt):
    model.eval()
    inputs = tokenizer(prompt, return_tensors='pt', max_length=64, padding='max_length', truncation=True).to(device)
    generated_ids = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_length=50,
        num_beams=3,
        do_sample=False
    )
    return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

prompts = [
    "How do I cook Pizza?",
    "Ingredients: chicken, rice, spices",
    "How do I cook Pasta?"
]

for p in prompts:
    print(f"\nPrompt: {p}")
    print("Response:", generate_response(p))