In [1]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
import torch
import json
import accelerate

from compliance_engine import RecipeComplianceEngine

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "meta-llama/Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16 )

hf_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    # --- THIS IS THE KEY FIX ---
    return_full_text=False 
)

llm = HuggingFacePipeline(pipeline=hf_pipeline)

Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.37it/s]
Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=hf_pipeline)


In [3]:
guidelines_df = pd.read_csv("data/belgium_rules.csv")
recipes_df = pd.read_csv("data/data_kg.csv")

In [4]:
engine = RecipeComplianceEngine(llm_client=llm)


engine.preprocess_guidelines(guidelines_df, guideline_col='rule')
sample_recipe_row = recipes_df[recipes_df['recipe_id'] == 31115]
sample_recipe = sample_recipe_row.iloc[0].to_dict()

final_report = engine.process_recipe(sample_recipe)

Preprocessing and tagging guidelines...


10it [06:57, 41.71s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
101it [1:09:45, 41.44s/it]


KeyboardInterrupt: 

In [None]:
print("\n" + "="*50)
print("FINAL COMPLIANCE REPORT")
print("="*50)
# Use json.dumps for pretty printing the final dictionary
print(json.dumps(final_report, indent=2))