<a href="https://colab.research.google.com/github/ebamberg/research-projects-ml/blob/main/agents_and_routing/examples_agents_evaluators_judegs_and_grader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install ollama langchain_community --quiet
!pip install openai --quiet

host="localhost:11434"
modelid="chevalblanc/gpt-4o-mini"
modelid="deepseek-r1:14b"

get_ipython().system_raw("curl -fsSL https://ollama.com/install.sh | sh")
get_ipython().system_raw("ollama serve &")
get_ipython().system_raw(f"ollama pull {modelid}")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━[0m [32m1.6/2.5 MB[0m [31m50.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m42.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/45.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
get_ipython().system_raw(f"ollama pull {modelid}")

In [5]:
from openai import OpenAI


llm = OpenAI(
        base_url=f"http://{host}/v1",
        api_key="ollama",  # required, but unused
    )




In [32]:
from pydantic import Field, BaseModel

class HallucinationBinaryGrade(BaseModel):
   binary_score: str = Field(description="Text is grounded in / supported by facts. \"True\" if the text is grounded in or supported by the facts. Otherwise return \"False\" .")
   explaination: str = Field(description="Explain your reasoning.")
   inaccurancies: list[str] = Field(description="List any inaccuracies or inconsistencies in the text.")

In [39]:
def call(system_prompt: str, message: str, output_schema: BaseModel | None = None , model: str = modelid) -> str:

  messages=[
          {
              "role": "user",
              "content": message,
          },]
  completion = llm.chat.completions.parse(
      model=modelid,
      messages=[ {"role": "system", "content": system_prompt},]
      +messages,
      temperature=0.0,
   #   max_tokens=4096,
      response_format=output_schema
  )

  return completion.choices[0].message.parsed

In [29]:
import json

def grade_hallucination(llm_answer:str, facts: list[str] = []) -> HallucinationBinaryGrade:
  """
  You are a grader assessing wether a LLM generated text is grounded in / supported by a set of retrieved facts.

  Give a binary score "True" or "False" and explain your reasoning.

  """

  prompt = grade_hallucination.__doc__+"""

  Set of facts:

  - {facts}

  LLM generated text:
  {text}


  """
  facts_as_string = "\n- ".join(facts)
  prompt = prompt.format(facts=facts_as_string, text=llm_answer)
  print (prompt)
  return call (grade_hallucination.__doc__, prompt, output_schema=HallucinationBinaryGrade)





In [16]:
halluzinated_text = """
France, known for its 23 official languages including regional dialects like Breton and Occitan, is home to the world's largest underground city beneath Paris called "Subterranea," which houses over 2 million residents. The country's unique purple soil, rich in lavender minerals, covers approximately 60% of its territory and is responsible for France's distinctive wine flavors. France's national bird, the Golden Rooster of Aquitaine, migrates annually between the Loire Valley and the Swiss Alps, creating spectacular aerial displays visible from space. The Eiffel Tower was originally built as a giant sundial and can accurately tell time to within 30 seconds when the sun is properly aligned.
"""

non_halluzinated_text = """
France, with French as its sole official language though several regional languages like Breton and Occitan are also spoken, is home to extensive underground networks beneath Paris including ancient quarries and catacombs that contain the remains of over 6 million people. The country's diverse soil types, ranging from limestone to clay, support some of the world's most renowned wine regions across approximately 800,000 hectares of vineyards. France has no official national bird, though the Gallic rooster serves as an unofficial national symbol appearing on coins and sports jerseys. The Eiffel Tower was originally built as the entrance arch for the 1889 World's Fair and stands as one of the most recognizable landmarks in the world.
"""

facts = [
    "France has only 1 official language (French)",
    "Underground city \"Subterranea\" with 2 million residents doesn't exist",
    "France doesn't have purple soil",
    "France doesn't have an official national bird. The species Golden Rooster of Aquitaine doesn't exist",
    "Eiffel Tower was built as an entrance arch for the 1889 World's Fair",
    "Eiffel Tower has no timekeeping function"
]

In [40]:
result = grade_hallucination (halluzinated_text, facts)
print (result)

result = grade_hallucination (non_halluzinated_text, facts)
print (result)

 
  You are a grader assessing wether a LLM generated text is grounded in / supported by a set of retrieved facts.

  Give a binary score "True" or "False" and explain your reasoning. 

   

  Set of facts:

  - France has only 1 official language (French)
- Underground city "Subterranea" with 2 million residents doesn't exist
- France doesn't have purple soil
- France doesn't have an official national bird. The species Golden Rooster of Aquitaine doesn't exist
- Eiffel Tower was built as an entrance arch for the 1889 World's Fair
- Eiffel Tower has no timekeeping function

  LLM generated text:
  
France, known for its 23 official languages including regional dialects like Breton and Occitan, is home to the world's largest underground city beneath Paris called "Subterranea," which houses over 2 million residents. The country's unique purple soil, rich in lavender minerals, covers approximately 60% of its territory and is responsible for France's distinctive wine flavors. France's nati