In [2]:
!pip -qqq install ollama==0.1.9 --progress-bar off
!pip -qqq install pydantic==2.7.4 --progress-bar off
!pip -qqq install langchain-core==0.2.7 --progress-bar off
!pip -qqq install openai==1.35.3 --progress-bar off

In [2]:
import json
import os
from enum import Enum
from typing import Generic, Type, TypeVar

from openai import OpenAI
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel


In [3]:
# Local Ollama client
client = OpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='ollama',
)
MODEL = 'llama3'

In [4]:
tweet = """
How do you choose which LLM to use?

A vibe check ain't going to cut it.

I'm trying DeepEval (@jeffr_yyy)

- Wide range of metrics: Relevancy, Hallucination & more
- Bulk & real-time evaluation
- CI/CD integration
- Benchmarking on popular datasets
"""

In [5]:
class ResponseFormat(Enum):
    JSON = "json"
    TEXT = "text"


def predict(
    prompt: str,
    system_prompt: str = None,
    response_format: ResponseFormat = ResponseFormat.TEXT,
    model: str = MODEL,
    client: OpenAI = client,
):
    messages = [
        {
            "role": "user",
            "content": prompt,
        }
    ]
    if system_prompt:
        messages.insert(
            0,
            {
                "role": "system",
                "content": system_prompt,
            },
        )
    try:
        chat_completion = client.chat.completions.create(
            messages=messages,
            model=model,
            temperature=0.00001,
            response_format={
                "type": "json_object"
                if response_format == ResponseFormat.JSON
                else "text"
            },
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        print("There is error", e)


## JSON Output Support

In [6]:
system_prompt = """
You're evaluating writing style in text.
Your evalutions must always be in JSON format. Here is an example JSON response:

```
{
  'readability': 4,
  'conciseness': 2
}
```
"""

print(system_prompt)


You're evaluating writing style in text.
Your evalutions must always be in JSON format. Here is an example JSON response:

```
{
  'readability': 4,
  'conciseness': 2
}
```



In [7]:
prompt = f"""
Evaluate the text:

```
{tweet}
```

You're evaluating the readability and conciseness with values from 0 (extremely bad) to 10 (extremely good)
"""
print(prompt)


Evaluate the text:

```

How do you choose which LLM to use?

A vibe check ain't going to cut it.

I'm trying DeepEval (@jeffr_yyy)

- Wide range of metrics: Relevancy, Hallucination & more
- Bulk & real-time evaluation
- CI/CD integration
- Benchmarking on popular datasets

```

You're evaluating the readability and conciseness with values from 0 (extremely bad) to 10 (extremely good)



In [8]:
response = predict(prompt, system_prompt, response_format=ResponseFormat.JSON)

In [9]:
print(response)

{ "readability": 8, "conciseness": 9 }


## No JSON Output Support

In [10]:
class WritingScore(BaseModel):
    readability: int
    conciseness: int

In [11]:
schema = {k: v for k, v in WritingScore.schema().items()}
schema = {"properties": schema["properties"], "required": schema["required"]}
print(json.dumps(schema, indent=2))

{
  "properties": {
    "readability": {
      "title": "Readability",
      "type": "integer"
    },
    "conciseness": {
      "title": "Conciseness",
      "type": "integer"
    }
  },
  "required": [
    "readability",
    "conciseness"
  ]
}


In [12]:
OUTPUT_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.

Here is the output schema:

```
{schema}
```

Do not return any preamble or explanations, return only a pure JSON string surrounded by triple backticks (```)."""

In [13]:
json_instruction = OUTPUT_FORMAT_INSTRUCTIONS.format(
    schema=json.dumps(schema, indent=2)
)
print(json_instruction)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:

```
{
  "properties": {
    "readability": {
      "title": "Readability",
      "type": "integer"
    },
    "conciseness": {
      "title": "Conciseness",
      "type": "integer"
    }
  },
  "required": [
    "readability",
    "conciseness"
  ]
}
```

Do not return any preamble or explanations, return only a pure JSON string surrounded by triple backticks (```).


In [16]:
prompt = f"""
Evaluate the writing style from the text:

```
{tweet}
```

Evaluate the readability and conciseness with values from 0 (extremely bad) to 10 (extremely good).
Just give the output values for readability and conciseness in JSON format. Do not include any other information.

{json_instruction}
"""
print(prompt)


Evaluate the writing style from the text:

```

How do you choose which LLM to use?

A vibe check ain't going to cut it.

I'm trying DeepEval (@jeffr_yyy)

- Wide range of metrics: Relevancy, Hallucination & more
- Bulk & real-time evaluation
- CI/CD integration
- Benchmarking on popular datasets

```

Evaluate the readability and conciseness with values from 0 (extremely bad) to 10 (extremely good).
Just give the output values for readability and conciseness in JSON format. Do not include any other information.

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:

```
{
  "properties": {
    "rea

In [17]:
response = predict(prompt, MODEL)
print(response)

```{"readability":6,"conciseness":8}```


## From Scratch

In [18]:
response_json = json.loads(response.strip("```"))
WritingScore.model_validate(response_json)

WritingScore(readability=6, conciseness=8)

In [19]:
TBaseModel = TypeVar("TBaseModel", bound=BaseModel)


class JsonOutputParser(Generic[TBaseModel]):
    def __init__(self, pydantic_object: Type[TBaseModel]):
        self.pydantic_object = pydantic_object

    def parse(self, response: str):
        response_json = json.loads(response.strip("```"))
        return self.pydantic_object.parse_obj(response_json)

In [20]:
JsonOutputParser(pydantic_object=WritingScore).parse(response)

WritingScore(readability=6, conciseness=8)

## Using LangChain Parser

In [21]:
parser = PydanticOutputParser(pydantic_object=WritingScore)

In [22]:
parser.parse(response)

WritingScore(readability=6, conciseness=8)