ParserError: Error tokenizing data. C error: EOF inside string starting at row 3881

In [2]:
import pandas as pd
import re

df = pd.read_csv('Indian_Food_Recipes.csv')

# 2. Define the "Common Pantry Items" to remove
# The paper mentions removing salt, flour, spices to force the model to focus on core ingredients [cite: 110]
common_items = ['salt', 'water', 'oil', 'chilli powder', 'turmeric', 'sugar', 'ghee']

def clean_ingredients(ingredient_str):
    if not isinstance(ingredient_str, str): return ""

    # Simple cleaning: remove common items
    ingredients = [i.strip() for i in ingredient_str.split(',')]
    filtered = [i for i in ingredients if not any(common in i.lower() for common in common_items)]

    return ", ".join(filtered)

# 3. Apply the cleaning
# The paper says they filtered ingredients and renamed columns [cite: 113]
df['Cleaned-Ingredients'] = df['TranslatedIngredients'].apply(clean_ingredients)

# 4. Save the file exactly as your cousin's code expects it
final_df = df[['Cleaned-Ingredients', 'TranslatedInstructions']]
final_df.to_csv('/content/Cleaned_Indian_Food_Dataset.csv', index=False)

print("Success! 'Cleaned_Indian_Food_Dataset.csv' has been created.")

Success! 'Cleaned_Indian_Food_Dataset.csv' has been created.


In [3]:
!pip install transformers datasets accelerate



In [4]:
import pandas as pd
from datasets import Dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling

# --- 1. LOAD & PREPARE DATA ---
# Load the file you just created
clean = pd.read_csv('/content/Cleaned_Indian_Food_Dataset.csv')

# IMPORTANT: Rename columns so they match what the training loop expects
clean.rename(columns={'Cleaned-Ingredients': 'ingredients', 'TranslatedInstructions': 'instructions'}, inplace=True)

# Remove any empty rows to prevent errors
clean.dropna(inplace=True)

# --- 2. FORMAT DATA ---
def format_recipe(row):
    return f"Ingredients: {row['ingredients']}\nRecipe: {row['instructions']}<|endoftext|>"

clean['text'] = clean.apply(format_recipe, axis=1)
dataset = Dataset.from_pandas(clean[['text']])

# --- 3. TOKENIZATION ---
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        padding="max_length",
        truncation=True,
        max_length=512
    )

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# --- 4. LOAD MODEL ---
model = GPT2LMHeadModel.from_pretrained('gpt2')

# --- 5. TRAINING ARGUMENTS ---
training_args = TrainingArguments(
    output_dir="./nutrigenie_finetuned",
    num_train_epochs=5,
    per_device_train_batch_size=4,
    learning_rate=0.001,
    save_steps=500,
    save_total_limit=2,
    logging_steps=50,
    prediction_loss_only=True,
)

# --- 6. START TRAINING ---
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

print("Starting Fine-Tuning...")
trainer.train()

# --- 7. SAVE MODEL ---
trainer.save_model("./nutrigenie_sft_model")
tokenizer.save_pretrained("./nutrigenie_sft_model")
print("Training Complete. Model saved to ./nutrigenie_sft_model")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Map:   0%|          | 0/6864 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Starting Fine-Tuning...


  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 3


[34m[1mwandb[0m: You chose "Don't visualize my results"


`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
50,4.1823
100,2.675
150,2.476
200,2.3462
250,2.3577
300,2.273
350,2.1109
400,2.1748
450,2.1054
500,2.0428


Training Complete. Model saved to ./nutrigenie_sft_model


In [6]:
import numpy as np

# --- 1. THE GI DATABASE (Simplified for Demo) ---
# In a real scenario, you would load this from a CSV file (Section 3.4.1)
gi_database = {
    # High GI (Bad for strict diets)
    'sugar': 65, 'jaggery': 84, 'white rice': 73, 'potato': 78, 'aloo': 78,
    'maida': 71, 'corn flour': 70, 'white bread': 75,

    # Medium GI
    'brown rice': 68, 'basmati rice': 60, 'honey': 61, 'pineapple': 59,

    # Low GI (Good!)
    'moong dal': 29, 'chana dal': 8, 'urad dal': 43, 'toor dal': 29,
    'spinach': 15, 'palak': 15, 'onion': 10, 'tomato': 15, 'carrot': 39,
    'paneer': 0, 'curd': 0, 'yoghurt': 0, 'chicken': 0, 'fish': 0, 'egg': 0,
    'bitter gourd': 0, 'karela': 0, 'methi': 0, 'oats': 55, 'bajra': 50
}

# --- 2. REWARD FUNCTION ---
def calculate_reward(generated_text, target_gi_threshold, input_ingredients):
    """
    Calculates a reward score based on:
    1. GI Score: Is the average GI below the patient's limit?
    2. Ingredient Penalty: Did the model forget ingredients requested by the user?

    Ref: Paper Section 3.4.2 [cite: 158]
    """

    # A. Extract ingredients from the generated text
    # (Simple text matching for this demo)
    text_lower = generated_text.lower()
    found_ingredients = []
    total_gi = 0
    count_carbs = 0

    for ing, gi in gi_database.items():
        if ing in text_lower:
            found_ingredients.append(ing)
            # Only count GI if it's a carb source (GI > 0) or explicitly listed
            # The paper calculates weighted average, here we do simple average for speed
            if gi >= 0:
                total_gi += gi
                count_carbs += 1

    # Avoid division by zero
    if count_carbs == 0:
        avg_gi = 0  # Safe fallback (e.g., only water/salt)
    else:
        avg_gi = total_gi / count_carbs

    # B. Calculate GI Reward (The Core Logic)
    # "Score = Threshold GI - Average GI" [cite: 145]
    # We want the Avg GI to be LOWER than the Threshold.
    # If Avg GI is 50 and Threshold is 60, Score = +10 (Good!)
    # If Avg GI is 70 and Threshold is 60, Score = -10 (Bad!)
    gi_score = target_gi_threshold - avg_gi

    # C. Calculate Missing Ingredient Penalty
    # "Penalty associated with every ingredient missing from the prompt" [cite: 165]
    missing_penalty = 0
    input_list = [i.strip().lower() for i in input_ingredients.split(',')]

    for req_item in input_list:
        # Check if the requested item appears in the generated text
        if req_item not in text_lower:
            missing_penalty += 2.0 # Penalty value mentioned as "secondary priority" [cite: 166]

    # Final Score
    final_reward = gi_score - missing_penalty

    return final_reward

# --- TEST THE REWARD FUNCTION ---
# Scenario: Patient 2 (DiSsCo 0.46) needs GI < 55.
# Case 1: Model writes a healthy recipe (Moong Dal)
reward_good = calculate_reward(
    "Recipe for Moong Dal: Boil moong dal with turmeric...",
    target_gi_threshold=55,
    input_ingredients="moong dal"
)

# Case 2: Model writes a bad recipe (Sugar & Potato)
reward_bad = calculate_reward(
    "Recipe for Sweet Potato: Boil potato and add sugar...",
    target_gi_threshold=55,
    input_ingredients="moong dal"
)

print(f"Reward for Healthy Recipe: {reward_good:.2f} (Should be positive)")
print(f"Reward for Unhealthy Recipe: {reward_bad:.2f} (Should be negative)")

Reward for Healthy Recipe: 26.00 (Should be positive)
Reward for Unhealthy Recipe: -18.50 (Should be negative)


In [7]:
!pip install trl

Collecting trl
  Downloading trl-0.26.1-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.26.1-py3-none-any.whl (517 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m517.4/517.4 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trl
Successfully installed trl-0.26.1


In [16]:
import torch
import pandas as pd
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl.experimental.ppo import PPOTrainer, PPOConfig
from trl import AutoModelForCausalLMWithValueHead
from datasets import Dataset

# --- 1. CONFIGURATION ---
MODEL_PATH = "./nutrigenie_sft_model"
TARGET_GI_THRESHOLD = 55

config = PPOConfig(
    learning_rate=1.41e-5,
    batch_size=4,
    mini_batch_size=4,
    gradient_accumulation_steps=1,
)

# --- 2. PREPARE MODEL & TOKENIZER ---
try:
    model = AutoModelForCausalLMWithValueHead.from_pretrained(MODEL_PATH)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    tokenizer.pad_token = tokenizer.eos_token
except OSError:
    print("ERROR: './nutrigenie_sft_model' not found. You must re-run the Phase 1 (SFT) Training code first.")
    raise

# --- 3. PREPARE DATASET ---
clean_data = pd.read_csv('/content/Cleaned_Indian_Food_Dataset.csv').dropna()
if 'Cleaned-Ingredients' in clean_data.columns:
    clean_data.rename(columns={'Cleaned-Ingredients': 'ingredients'}, inplace=True)

# Format strictly as input prompts
clean_data['query'] = "Ingredients: " + clean_data['ingredients']
dataset = Dataset.from_pandas(clean_data[['query']])

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

# --- 4. INITIALIZE TRAINER (ALL FIXES APPLIED) ---
ppo_trainer = PPOTrainer(
    args=config,                 # Fixed: 'config' -> 'args'
    model=model,
    ref_model=None,
    processing_class=tokenizer,  # Fixed: 'tokenizer' -> 'processing_class'
    train_dataset=dataset,       # Fixed: 'dataset' -> 'train_dataset'
    data_collator=collator
)

# --- 5. THE TRAINING LOOP ---
print("Starting Reinforcement Learning (PPO)...")

for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    if epoch >= 20: break

    query_tensors = []
    query_txts = batch['query']

    # A. Encode Queries
    for q in query_txts:
        inputs = tokenizer(q, return_tensors="pt", padding=True, truncation=True)
        query_tensors.append(inputs.input_ids[0])

    # B. Generate Recipes
    response_tensors = ppo_trainer.generate(
        query_tensors,
        max_new_tokens=100,
        pad_token_id=tokenizer.eos_token_id
    )

    batch['response'] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    # C. Calculate Rewards
    rewards = []
    for query, response in zip(batch['query'], batch['response']):
        ing_list = query.replace("Ingredients:", "").strip()

        # Calculate Reward using YOUR function
        score = calculate_reward(response, TARGET_GI_THRESHOLD, ing_list)
        rewards.append(torch.tensor(score, dtype=torch.float))

    # D. Update Model
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)

# --- 6. SAVE THE FINAL MODEL ---
print("Saving NutriGenie RL Model...")
model.pretrained_model.save_pretrained("./nutrigenie_final_rl_model")
tokenizer.save_pretrained("./nutrigenie_final_rl_model")
print("DONE! Your NutriGenie backend is fully built.")

TypeError: PPOTrainer.__init__() missing 2 required positional arguments: 'reward_model' and 'value_model'

In [1]:
!pip install trl==0.9.6



In [2]:
import os
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from datasets import Dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from transformers import AutoTokenizer
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead

# --- CONFIGURATION ---
SFT_MODEL_PATH = "./nutrigenie_sft_model"
RL_MODEL_PATH = "./nutrigenie_final_rl_model"
CSV_PATH = '/content/Cleaned_Indian_Food_Dataset.csv'
TARGET_GI_THRESHOLD = 55

# --- PART A: DATA CHECK ---
if not os.path.exists(CSV_PATH):
    print("⚠️ Data file not found. Please upload 'Cleaned_Indian_Food_Dataset.csv' or the raw Kaggle CSV.")
    # (If you need the cleaning code again, let me know, but usually files persist after a simple restart)
else:
    print("✅ Dataset found.")

# --- PART B: SFT TRAINING (Phase 1) ---
# We check if the model exists. If not, we train it.
if not os.path.exists(SFT_MODEL_PATH):
    print("\n⚠️ SFT Model not found (wiped during restart). Re-training Phase 1 now...")

    # Load Data
    clean = pd.read_csv(CSV_PATH)
    if 'Cleaned-Ingredients' in clean.columns:
        clean.rename(columns={'Cleaned-Ingredients': 'ingredients', 'TranslatedInstructions': 'instructions'}, inplace=True)
    clean.dropna(inplace=True)

    def format_recipe(row):
        return f"Ingredients: {row['ingredients']}\nRecipe: {row['instructions']}<|endoftext|>"

    clean['text'] = clean.apply(format_recipe, axis=1)
    dataset = Dataset.from_pandas(clean[['text']])

    # Tokenizer & Model
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    tokenizer.pad_token = tokenizer.eos_token

    def tokenize_function(examples):
        return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=512)

    tokenized_datasets = dataset.map(tokenize_function, batched=True)
    model = GPT2LMHeadModel.from_pretrained('gpt2')

    # Train
    training_args = TrainingArguments(
        output_dir=SFT_MODEL_PATH,
        num_train_epochs=3, # Reduced to 3 for speed (still effective)
        per_device_train_batch_size=4,
        learning_rate=0.001,
        save_strategy="no",
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets,
        data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
    )

    trainer.train()
    trainer.save_model(SFT_MODEL_PATH)
    tokenizer.save_pretrained(SFT_MODEL_PATH)
    print("✅ Phase 1 Complete: SFT Model Saved.")
else:
    print("✅ SFT Model found on disk. Skipping Phase 1 training.")

# --- PART C: REWARD FUNCTION ---
gi_database = {
    'sugar': 65, 'jaggery': 84, 'rice': 73, 'potato': 78, 'maida': 71,
    'moong dal': 29, 'spinach': 15, 'paneer': 0, 'chicken': 0, 'oats': 55,
    'garlic': 0, 'tomato': 15, 'onion': 10
}

def calculate_reward(generated_text, target_gi_threshold, input_ingredients):
    text_lower = generated_text.lower()
    total_gi = 0
    count_carbs = 0
    for ing, gi in gi_database.items():
        if ing in text_lower:
            if gi > 0:
                total_gi += gi
                count_carbs += 1
    avg_gi = total_gi / count_carbs if count_carbs > 0 else 0
    gi_score = target_gi_threshold - avg_gi
    missing_penalty = 0
    input_list = [i.strip().lower() for i in input_ingredients.split(',')]
    for req_item in input_list:
        if req_item not in text_lower:
            missing_penalty += 2.0
    return gi_score - missing_penalty

# --- PART D: PPO TRAINING (Phase 2) ---
print("\n🚀 Starting Phase 2: Reinforcement Learning (PPO)...")

# Load SFT Model
model = AutoModelForCausalLMWithValueHead.from_pretrained(SFT_MODEL_PATH)
tokenizer = AutoTokenizer.from_pretrained(SFT_MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token

# Prepare PPO Dataset
clean_data = pd.read_csv(CSV_PATH).dropna()
if 'Cleaned-Ingredients' in clean_data.columns:
    clean_data.rename(columns={'Cleaned-Ingredients': 'ingredients'}, inplace=True)
clean_data['query'] = "Ingredients: " + clean_data['ingredients']
dataset = Dataset.from_pandas(clean_data[['query']])

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

# Initialize Stable PPO Trainer
config = PPOConfig(
    learning_rate=1.41e-5,
    batch_size=4,
    mini_batch_size=4,
    gradient_accumulation_steps=1,
)

ppo_trainer = PPOTrainer(
    config=config,
    model=model,
    ref_model=None,
    tokenizer=tokenizer,
    dataset=dataset,
    data_collator=collator
)

# Training Loop
for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    if epoch >= 20: break

    query_tensors = []
    query_txts = batch['query']
    for q in query_txts:
        inputs = tokenizer(q, return_tensors="pt", padding=True, truncation=True)
        query_tensors.append(inputs.input_ids[0])

    response_tensors = ppo_trainer.generate(
        query_tensors, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id
    )
    batch['response'] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    rewards = []
    for query, response in zip(batch['query'], batch['response']):
        ing_list = query.replace("Ingredients:", "").strip()
        score = calculate_reward(response, TARGET_GI_THRESHOLD, ing_list)
        rewards.append(torch.tensor(score, dtype=torch.float))

    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)

# Save Final
print("Saving NutriGenie RL Model...")
model.pretrained_model.save_pretrained(RL_MODEL_PATH)
tokenizer.save_pretrained(RL_MODEL_PATH)
print(f"🎉 DONE! Final model saved to {RL_MODEL_PATH}")

✅ Dataset found.
✅ SFT Model found on disk. Skipping Phase 1 training.

🚀 Starting Phase 2: Reinforcement Learning (PPO)...


0it [00:00, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
4it [00:24,  6.24s/it]


AcceleratorError: CUDA error: device-side assert triggered
Search for `cudaErrorAssert' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [2]:
import torch
import pandas as pd
import os
from tqdm import tqdm
from transformers import AutoTokenizer
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from datasets import Dataset

# --- 1. CONFIGURATION ---
MODEL_PATH = "./nutrigenie_sft_model"
RL_MODEL_PATH = "./nutrigenie_final_rl_model"
TARGET_GI_THRESHOLD = 55

# --- 2. LOAD & PREPARE ---
# Load SFT Model (Phase 1)
try:
    model = AutoModelForCausalLMWithValueHead.from_pretrained(MODEL_PATH)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    tokenizer.pad_token = tokenizer.eos_token
except OSError:
    print("⚠️ SFT Model lost. Please re-run the Phase 1 training code first.")
    raise

# Prepare Data
clean_data = pd.read_csv('/content/Cleaned_Indian_Food_Dataset.csv').dropna()
if 'Cleaned-Ingredients' in clean_data.columns:
    clean_data.rename(columns={'Cleaned-Ingredients': 'ingredients'}, inplace=True)
clean_data['query'] = "Ingredients: " + clean_data['ingredients']

# TRUNCATION FIX: Filter out huge prompts to prevent crashes
clean_data = clean_data[clean_data['query'].str.len() < 200]
dataset = Dataset.from_pandas(clean_data[['query']])

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

# --- 3. INITIALIZE TRAINER (SAFE MODE) ---
config = PPOConfig(
    learning_rate=1.41e-5,
    batch_size=4,
    mini_batch_size=4,
    gradient_accumulation_steps=1,
)

ppo_trainer = PPOTrainer(
    config=config,
    model=model,
    ref_model=None,
    tokenizer=tokenizer,
    dataset=dataset,
    data_collator=collator
)

# --- 4. REWARD FUNCTION ---
gi_database = {'sugar': 65, 'jaggery': 84, 'rice': 73, 'potato': 78, 'maida': 71, 'moong dal': 29, 'spinach': 15, 'paneer': 0, 'chicken': 0, 'oats': 55, 'garlic': 0, 'tomato': 15, 'onion': 10}

def calculate_reward(generated_text, target_gi, input_ingredients):
    text_lower = generated_text.lower()
    total_gi = 0; count = 0
    for ing, gi in gi_database.items():
        if ing in text_lower and gi > 0:
            total_gi += gi; count += 1
    avg_gi = total_gi / count if count > 0 else 0

    # Reward: Distance from target + Penalty for missing ingredients
    score = (target_gi - avg_gi)
    input_list = [i.strip().lower() for i in input_ingredients.split(',')]
    for req in input_list:
        if req not in text_lower: score -= 2.0
    return score

# --- 5. TRAINING LOOP (SAFE GENERATION) ---
print("🚀 Starting Safe PPO Training...")

# Generation Args (Strict Limits to avoid CUDA Error)
gen_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    "max_new_tokens": 64 # Kept short to prevent crash
}

for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    if epoch >= 10: break # Run just 10 steps to finish quickly

    query_tensors = []
    for q in batch['query']:
        # Strict truncation
        inputs = tokenizer(q, return_tensors="pt", padding=True, truncation=True, max_length=128)
        query_tensors.append(inputs.input_ids[0])

    # Generate
    response_tensors = ppo_trainer.generate(query_tensors, **gen_kwargs)
    batch['response'] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    # Calculate Rewards
    rewards = []
    for query, response in zip(batch['query'], batch['response']):
        ing_list = query.replace("Ingredients:", "").strip()
        score = calculate_reward(response, TARGET_GI_THRESHOLD, ing_list)
        rewards.append(torch.tensor(score, dtype=torch.float))

    # Step
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)

# Save
model.pretrained_model.save_pretrained(RL_MODEL_PATH)
tokenizer.save_pretrained(RL_MODEL_PATH)
print(f"🎉 DONE! Safe PPO Model saved to {RL_MODEL_PATH}")

🚀 Starting Safe PPO Training...


0it [00:00, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
10it [00:36,  3.64s/it]


🎉 DONE! Safe PPO Model saved to ./nutrigenie_final_rl_model


In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# --- 1. SETUP ---
# Load the FINAL RL Model you just saved
RL_MODEL_PATH = "./nutrigenie_final_rl_model"

print("Loading your NutriGenie AI...")
try:
    tokenizer = AutoTokenizer.from_pretrained(RL_MODEL_PATH)
    model = AutoModelForCausalLM.from_pretrained(RL_MODEL_PATH)
    # Move to GPU if available for speed
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    print("✅ Model Loaded Successfully!")
except OSError:
    print("❌ Model not found. Did the training finish?")
    raise

# --- 2. DEFINE THE LOGIC (DiSsCo) ---
def calculate_dissco_score(age, hba1c, bmi, gender, conditions):
    # Logic from paper: ((Age/10) + HbA1c + (BMI/10)) * Factors
    gf = 1.4 if gender.lower() == 'female' else 1.0
    hdf = 1.0 + (0.5 if conditions else 0)
    raw = ((age/10) + hba1c + (bmi/10)) * gf * hdf
    # Normalized Score (0-1)
    return min(raw / 50.0, 1.0)

# --- 3. THE GENERATOR FUNCTION ---
def generate_nutrigenie_recipe(ingredients, patient_profile):
    # A. Calculate Score
    score = calculate_dissco_score(
        patient_profile['age'],
        patient_profile['hba1c'],
        patient_profile['bmi'],
        patient_profile['gender'],
        patient_profile['cond']
    )

    # B. Define Limit
    target_gi = 65 - (score * 10)
    print(f"\n--- PATIENT PROFILE ---")
    print(f"Severity Score (DiSsCo): {score:.3f}")
    print(f"Strictness Level: GI must be < {target_gi:.1f}")

    # C. Generate
    print(f"\n--- AI THINKING ---")
    prompt = f"Ingredients: {ingredients}\nRecipe:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate parameters (Creative but safe)
    outputs = model.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id
    )

    recipe_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # D. Display Result
    print("\n--- 🥗 GENERATED RECIPE 🥗 ---")
    print(recipe_text)
    print("-" * 30)

    # E. Quick Check
    gi_database = {'sugar': 65, 'rice': 73, 'potato': 78, 'moong dal': 29, 'spinach': 15}
    found_bad = [k for k,v in gi_database.items() if k in recipe_text.lower() and v > target_gi]

    if not found_bad:
        print("✅ HEALTH CHECK PASSED: No high-GI ingredients detected.")
    else:
        print(f"⚠️ HEALTH WARNING: Found {', '.join(found_bad)}. Portion control advised.")

# --- 4. RUN THE DEMO ---
# Test Case: 65-year-old female with High Blood Sugar
patient = {
    "age": 65,
    "gender": "female",
    "hba1c": 9.0,
    "bmi": 30,
    "cond": ["Hypertension"]
}

generate_nutrigenie_recipe("Moong Dal, Spinach, Garlic, Tomato", patient)

Loading your NutriGenie AI...
✅ Model Loaded Successfully!

--- PATIENT PROFILE ---
Severity Score (DiSsCo): 0.777
Strictness Level: GI must be < 57.2

--- AI THINKING ---

--- 🥗 GENERATED RECIPE 🥗 ---
Ingredients: Moong Dal, Spinach, Garlic, Tomato
Recipe: To begin making the Moong Dal Ki Goli Recipe, we will first wash the moong dal and spinach thoroughly under running water.Place the washed moong dal and spinach in the pressure cooker along with 2-1/2 cups of water.Add turmeric powder, salt to taste and cover the pressure cooker. Place the weight on and pressure cook for 4 to 5 whistles until the dal is cooked.Once the dal is cooked, allow the pressure to release naturally and open the cooker. Serve Moong Dal Ki Goli Recipe along with Steamed Rice, Mixed Vegetable Sambar and Elai Vadam for a simple and wholesome weekday meal. 
Recipe:
------------------------------


In [None]:
# 1. Install the UI library
!pip install gradio

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# --- LOAD BACKEND (Reuse your loaded model) ---
# We assume 'model' and 'tokenizer' are already loaded from your previous cell.
# If they aren't, uncomment the lines below:
# MODEL_PATH = "./nutrigenie_final_rl_model"
# tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
# model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)

# --- DEFINING THE APP LOGIC ---
def nutrigenie_logic(age, gender, weight, height, hba1c, conditions, ingredients):
    # 1. Calculate BMI
    height_m = height / 100
    bmi = weight / (height_m ** 2)

    # 2. Calculate DiSsCo Score (Logic from Paper)
    gf = 1.4 if gender == "Female" else 1.0
    hdf = 1.0
    if conditions and len(conditions) > 0:
        hdf += 0.5 # Add weight for heart/BP conditions

    raw_score = ((age / 10) + hba1c + (bmi / 10)) * gf * hdf
    dissco_score = min(raw_score / 50.0, 1.0)

    # 3. Determine Safety Limit
    target_gi = 65 - (dissco_score * 10)

    # 4. Generate Recipe
    prompt = f"Ingredients: {ingredients}\nRecipe:"
    inputs = tokenizer(prompt, return_tensors="pt")
    if torch.cuda.is_available(): inputs = inputs.to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    recipe_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # 5. Health Check
    gi_database = {'sugar': 65, 'rice': 73, 'potato': 78, 'moong dal': 29, 'spinach': 15, 'maida': 71}
    warnings = []
    for ing, val in gi_database.items():
        if ing in recipe_text.lower() and val > target_gi:
            warnings.append(f"{ing.title()} (GI {val})")

    if warnings:
        verdict = f"⚠️ CAUTION: Recipe contains high-GI ingredients: {', '.join(warnings)}. Strict portion control advised."
        color = "red"
    else:
        verdict = "✅ DIABETES FRIENDLY: This recipe fits your personalized GI limit."
        color = "green"

    return f"{dissco_score:.3f}", f"< {target_gi:.1f}", recipe_text, verdict

# --- CREATING THE INTERFACE (Green Theme) ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as app:
    gr.Markdown("# 🥗 NutriGenie: AI for Diabetes")
    gr.Markdown("Personalized Recipe Generator based on DiSsCo Score")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### 1. Patient Details")
            age = gr.Number(label="Age", value=60)
            gender = gr.Radio(["Male", "Female"], label="Gender", value="Male")
            with gr.Row():
                weight = gr.Number(label="Weight (kg)", value=70)
                height = gr.Number(label="Height (cm)", value=170)
            hba1c = gr.Number(label="HbA1c Level", value=6.5)
            conditions = gr.CheckboxGroup(["Hypertension", "Heart Disease"], label="Conditions")

            gr.Markdown("### 2. Pantry")
            ingredients = gr.Textbox(label="Available Ingredients", placeholder="e.g. Spinach, Moong Dal, Garlic")

            btn = gr.Button("Generate Personalized Recipe", variant="primary")

        with gr.Column():
            gr.Markdown("### 3. Results")
            with gr.Row():
                dissco_out = gr.Textbox(label="DiSsCo Score")
                gi_limit_out = gr.Textbox(label="Your GI Limit")

            recipe_out = gr.Textbox(label="Generated Recipe", lines=10)
            verdict_out = gr.Textbox(label="Health Verification")

    btn.click(
        nutrigenie_logic,
        inputs=[age, gender, weight, height, hba1c, conditions, ingredients],
        outputs=[dissco_out, gi_limit_out, recipe_out, verdict_out]
    )

# Launch the app
app.launch(debug=True)



  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as app:


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://748b36f3f3df9f7435.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
