https://docs.llamaindex.ai/en/stable/examples/output_parsing/df_program/

Prerequisites:
- LLM
- Pydantic class

Libraries: 
- pip install llama-index

The open source llm is defined

In [97]:
# Customize the llm
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings
models = ["phi", "gemma:2b", "gemma:7b", "llama2", "llama2-uncensored"]
Settings.llm = Ollama(model=models[0], request_timeout=60.0)

In [98]:
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings
def change_model(model_number: str):
    Settings.llm = Ollama(model=models[model_number], request_timeout=60.0)

In [92]:
from pydantic import BaseModel
from typing import List

from llama_index.core.program import LLMTextCompletionProgram

Output schema:

In [93]:
class Song(BaseModel):
    """Data model for a song."""

    title: str
    length_seconds: int


class Album(BaseModel):
    """Data model for an album."""

    name: str | None
    artist: str | None
    songs: List[Song] | None

In [94]:
from llama_index.core.program import LLMTextCompletionProgram

In [95]:
prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\
"""
program = LLMTextCompletionProgram.from_defaults(
    output_cls=Album, # Output pydentic class which must be complied
    prompt_template_str=prompt_template_str,
    verbose=True,
)

In [102]:
import time
start = time.time()
def programm() -> Album:
    output = program(movie_name="The Shining")
    end = time.time()
    print(f"Time taken: {end - start:.2f} seconds")
    return output

In [107]:
import time
start = time.time()
trial = 0
while True:
    print(f"Trial: {trial}")
    trial += 1
    try:
        output = programm()
        print(f"The model{models[trial]} worked!")
        break
    except Exception as e:
        print(f"Error: {e}")
        change_model(trial)
        if trial >= 4:
            print("All models failed")
            end_time = time.time()
            print(f"Time taken for checking all models: {end_time - start:.2f} seconds")
            break

# El modelo gemma2:b ha funcionado una vez con el siguiente output: 
# Album(name='The Shining', artist='Frank Sinatra', songs=[Song(title='The End of the World', length_seconds=11), Song(title='When the Night Comes', length_seconds=9), Song(title="I'll Be There for You", length_seconds=6)])



Trial: 0
Error: 1 validation error for Album
__root__
  Expecting property name enclosed in double quotes: line 14 column 49 (char 545) [type=value_error.jsondecode, input_value='{\n    "$defs": {\n     ...    "type": "object"\n}', input_type=str]
Trial: 1
Error: 1 validation error for Album
__root__
  Expecting ',' delimiter: line 10 column 5 (char 195) [type=value_error.jsondecode, input_value='{\n    "$defs": {\n     ...k"\n        }\n    ]\n}', input_type=str]
Trial: 2
Error: 3 validation errors for Album
name
  Field required [type=missing, input_value={'$defs': {'Song': {'desc...e', 'artist', 'songs']}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing
artist
  Field required [type=missing, input_value={'$defs': {'Song': {'desc...e', 'artist', 'songs']}}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing
songs
  Field required [type=missing, input_value={'$defs': {'Song': {'desc...e', 'artist',

IndexError: list index out of range

In [106]:
output

Album(name='The Shining', artist='Frank Sinatra', songs=[Song(title='The End of the World', length_seconds=11), Song(title='When the Night Comes', length_seconds=9), Song(title="I'll Be There for You", length_seconds=6)])

In [None]:
from llama_index.core.output_parsers import PydanticOutputParser

program = LLMTextCompletionProgram.from_defaults(
    output_parser=PydanticOutputParser(output_cls=Album),
    prompt_template_str=prompt_template_str,
    verbose=True,
)

In [None]:
output = program(movie_name="Lord of the Rings")
output

Custom Output Parser

In [None]:
from llama_index.core.output_parsers import ChainableOutputParser


class CustomAlbumOutputParser(ChainableOutputParser):
    """Custom Album output parser.

    Assume first line is name and artist.

    Assume each subsequent line is the song.

    """

    def __init__(self, verbose: bool = False):
        self.verbose = verbose

    def parse(self, output: str) -> Album:
        """Parse output."""
        if self.verbose:
            print(f"> Raw output: {output}")
        lines = output.split("\n")
        name, artist = lines[0].split(",")
        songs = []
        for i in range(1, len(lines)):
            title, length_seconds = lines[i].split(",")
            songs.append(Song(title=title, length_seconds=length_seconds))

        return Album(name=name, artist=artist, songs=songs)

In [None]:
prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\

Return answer in following format.
The first line is:
, 
Every subsequent line is a song with format:
, 

"""
program = LLMTextCompletionProgram.from_defaults(
    output_parser=CustomAlbumOutputParser(verbose=True),
    output_cls=Album,
    prompt_template_str=prompt_template_str,
    verbose=True,
)

In [None]:
output = program(movie_name="The Dark Knight")

> Raw output:  ,
"I'm Not Dead" by Thirty Seconds to Mars
"What Happens Tomorrow?" by David Bowie
"Usual Suspects" by Guns N' Roses
"Man in the Mirror" by Michael Jackson



ValueError: not enough values to unpack (expected 2, got 1)