In [1]:
%pip install google-genai
%pip install python-dotenv
%pip install beautifulsoup4
%pip install googlesearch-python


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;3

In [2]:
from google import genai
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
cx_id = os.getenv("GOOGLE_CSE_ID")
client = genai.Client(api_key=api_key)

Take the input from file 'data/generated_recipes.json' and check each recipe for correctness.

The RAG model first takes the name of the medicine from field 'medicijn' and retrieves the documentation from apotheek.nl. To find the documentation, it uses a Google search query that combines the medicine name with the site:apotheek.nl.
Then it checks if the medicine is valid by checking if the URL returns a 200 status code. If this fails, the recipe is considered invalid.
If the medicine is valid the documentation is provided as context to the AI model to check if the strength, form, number of doses, and instructions are valid. The AI model is also asked to check if the indication is relevant to the medical question.

For the moderation model a prompt is used that asks the model to check the recipe with the medical question.

The recipe is considered correct only if both the RAG model and the moderation model consider it correct.


In [3]:
from bs4 import BeautifulSoup
import requests

def read_medicijn_details(html_text: str):
    """Extracts the medicine details from the HTML text."""
    soup = BeautifulSoup(html_text, 'html.parser')
    ul_element = soup.find('ul', id='container')
    if ul_element:
        return ul_element.get_text(strip=True)
    else:
        return "Geen details gevonden voor het medicijn."

def get_medicijn_details(medicijn: str):
    # use google search to find the medicine documentation on apotheek.nl
    query = f"{medicijn} site:apotheek.nl"
    url = f"https://www.googleapis.com/customsearch/v1?q={query}&key={api_key}&cx={cx_id}"
    search_response = requests.get(url).json()

    if 'items' in search_response and len(search_response['items']) > 0:
        medicijn_url = search_response['items'][0]['link']
        return read_medicijn_details(requests.get(medicijn_url).text)
    else:
        return "Geen details gevonden voor het medicijn."


Now use the medical docs to check each recipe for correctness.


In [4]:
def check_recipe_rag(recipe, details):
    """Check the recipe using the RAG model."""
    medical_question = recipe.get('medische_vraag')
    medicijn = recipe.get('medicijn')
    recept = recipe.get('recept')
    patient_info = recipe.get('patientinformatie')

    prompt = f"""
    Controleer het volgende recept op juistheid:

    Vraag: {medical_question}

    Recept: {recept}

    Patiënt informatie: {patient_info}

    Details: {details}
    """

    response = client.models.generate_content(
        model="gemini-2.0-flash-001",
        contents=prompt,
        config={
            "response_mime_type": "application/json",
            "response_schema": {
                "type": "object",
                "properties": {
                    "correct": {"type": "boolean"},
                    "explanation": {"type": "string"}
                },
                "required": ["correct", "explanation"]
            },
            "system_instruction": ("Beoordeel met de tekst in 'Details' het recept op basis van de volgende criteria: "
                                  "1. Bevat het medicijn een geldige naam, sterkte, vorm, aantal doses en instructies? "
                                  "2. Is de indicatie relevant voor de medische vraag en gegeven patient informatie? "
                                  "3. Geef een beoordeling van het recept als correct = True|False. ")
        }
    )
    try:
        return json.loads(response.text)
    except ValueError:
        print("Response is not in JSON format, returning raw text.")
        return json.loads("{'correct': false, 'explanation': 'Response is not in JSON format.'}")



In [5]:
def check_recipe_moderator(recipe):
    """Check the recipe using the RAG model."""
    medical_question = recipe.get('medische_vraag')
    recept = recipe.get('recept')
    patient_info = recipe.get('patientinformatie')

    prompt = f"""
    Controleer het volgende recept op juistheid:

    Vraag: {medical_question}

    Patient informatie: {patient_info}

    Recept: {recept}
    """

    response = client.models.generate_content(
        model="gemini-2.0-flash-001",
        contents=prompt,
        config={
            "response_mime_type": "application/json",
            "response_schema": {
                "type": "object",
                "properties": {
                    "correct": {"type": "boolean"},
                    "explanation": {"type": "string"}
                },
                "required": ["correct", "explanation"]
            },
            "system_instruction": ("Beoordeel het recept op basis van de volgende criteria: "
                                  "1. Bevat het medicijn een geldige naam, sterkte, vorm, aantal doses en instructies? "
                                  "2. Is de indicatie relevant voor de medische vraag en gegeven patient informatie? "
                                  "3. Geef een beoordeling van het recept als correct = True|False. ")
        }
    )
    return json.loads(response.text)


In [6]:
import json

# Read the JSON file
with open('../data/generated_recipes.json', 'r', encoding='utf-8') as file:
    recipes = json.load(file)

# Loop through each recipe and access the 'medicijn' field
for recipe in recipes:
    medicijn = recipe.get('medicijn')
    details = get_medicijn_details(medicijn)
    rag_correct = False
    rag_explanation = ""
    if details.startswith('Geen details gevonden'):
        rag_correct = False
        rag_explanation = "Medicijn niet gevonden op apotheek.nl."
    else:
        # prompt the AI model to check the recipe
        # add the documentation to the prompt
        rag_result = check_recipe_rag(recipe, details)
        rag_correct = rag_result['correct']
        rag_explanation = rag_result['explanation']

    mod_result = check_recipe_moderator(recipe)
    mod_correct = mod_result['correct']
    mod_explanation = mod_result['explanation']

    if (rag_correct and mod_correct):
        recipe['correct'] = True
        recipe['explanation'] = f"RAG: {rag_explanation}, Moderator: {mod_explanation}"
    else:
        recipe['correct'] = False
        recipe['explanation'] = f"RAG: {rag_explanation}, Moderator: {mod_explanation}"


# Write the recipes to a JSON file
output_file = "../data/generated_recipes_combined_review.json"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(recipes, f, ensure_ascii=False, indent=4)

