In [14]:
import pandas as pd

## Ingestion

In [15]:
df = pd.read_csv("../data/incidents_train.csv", index_col=False, dtype=str)
df = df.rename(columns={"Unnamed: 0": "id", "hazard-category": "hazard_category", "product-category": "product_category"})

In [16]:
documents = df.to_dict(orient='records')

In [17]:
import minsearch

In [18]:
index = minsearch.Index(
    text_fields=['title', 'hazard_category', 'product_category', 'hazard', 'product'],
    keyword_fields=['id']
)

In [19]:
index.fit(documents)

<minsearch.Index at 0x78d9e5071280>

## RAG flow

In [20]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [21]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [22]:
prompt_template = """
You're a food hazard detection assistant. Answer the QUESTION based on the CONTEXT from the food-incident reports.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()


entry_template = """
 'title': {title}
 'hazard_category': {hazard_category}
 'product_category': {product_category}
 'hazard': {hazard}
 'product': {product}
""".strip()


def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [23]:
def llm(prompt, model='llama3.1'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2
    )
    
    return response.choices[0].message.content

In [24]:
def rag(query, model='llama3.1'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt, model=model)
    return answer

In [27]:
question = 'What are the products that contain listeria monocytogenes?'
answer = rag(question)
print(answer)

Based on the context provided, the products that contain listeria monocytogenes are:

1. Sausage products
2. Chicken burgers (Konspol)
3. Cooked diced chicken meat products
4. Chicken meat products (Glacial Treasure brand)
5. Precooked cooked beef meat products (Corned Beef)
6. Pastry products (Smith Snacks brand)
7. BBQ Chicken Salads
8. Salads containing chicken products (GH Foods CA, LLC)

Note that some of these products have been recalled due to possible contamination with listeria monocytogenes, while others have been confirmed to contain the bacteria.
