In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from torch.optim import AdamW
import pandas as pd


csv_file_path = r'C:\Users\User\Desktop\archive\Food Ingredients and Recipe Dataset with Image Name Mapping.csv'
df = pd.read_csv(csv_file_path)

df = df.dropna(subset=['Title', 'Instructions'])

df['Title'] = df['Title'].astype(str)
df['Instructions'] = df['Instructions'].astype(str)


data = []
for _, row in df.iterrows():
    question = f"How do I make {row['Title']}?"
    answer = row['Instructions']
    data.append({"question": question.strip(), "answer": answer.strip()})


tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = T5ForConditionalGeneration.from_pretrained('t5-small')



class RecipeDataset(Dataset):
    def init(self, data, tokenizer, max_len=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def len(self):
        return len(self.data)
    
    def getitem(self, idx):
        item = self.data[idx]
        input_text = item['question']
        target_text = item['answer']
        
        input_enc = self.tokenizer(
            input_text,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        target_enc = self.tokenizer(
            target_text,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        
        labels = target_enc.input_ids.squeeze()
        labels[labels == self.tokenizer.pad_token_id] = -100  # تجاهل padding
        
        return {
            'input_ids': input_enc.input_ids.squeeze(),
            'attention_mask': input_enc.attention_mask.squeeze(),
            'labels': labels
        }


dataset = RecipeDataset(data, tokenizer)
loader = DataLoader(dataset, batch_size=2, shuffle=True)


optimizer = AdamW(model.parameters(), lr=5e-5)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.train()


epochs = 3
for epoch in range(epochs):
    total_loss = 0
    for batch in loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(loader):.4f}")

def generate_instructions(question):
    model.eval()
    input_ids = tokenizer.encode(question, return_tensors='pt').to(device)
    output_ids = model.generate(
        input_ids,
        max_length=100,
        num_beams=2,
        early_stopping=True
    )
    answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return answer

while True:
    user_input = input("Ask about a recipe (type 'exit' to quit): ")
    if user_input.lower() in ['exit', 'quit']:
        break
    response = generate_instructions(user_input)
    print("Response:", response)