In [1]:
import ollama

client = ollama.Client(host="https://ollama-dev.ceos.ufsc.br/")
response = client.list()
for model in response.models:
    print(model.model, model.details.parameter_size)

gemma3:12b 12.2B
gemma3:4b 4.3B
gemma3:1b 999.89M
gemma3:27b-it-fp16 27.4B
qwen2.5:72b-instruct 72.7B
deepseek-r1:32b-qwen-distill-q4_K_M 32.8B
deepseek-r1:70b 70.6B
qwen2.5:0.5b 494.03M
qwen2.5:1.5b 1.5B
llama3.3:70b 70.6B
llama3.3:latest 70.6B
llama3.2-vision:latest 9.8B
llama3.2:longe-ctx 3.2B
qwen2.5:latest 7.6B
nomic-embed-text:latest 137M
qwen2.5-coder:32b 32.8B
llama3.2:1b 1.2B
mistral-nemo:12b-instruct-2407-fp16 12.2B
nemotron:70b-instruct-q8_0 70.6B
llama3.2:3b 3.2B
llama3.2:latest 3.2B
mistral-large:123b 122.6B
qwen2.5:72b 72.7B
llama3.1:70b 70.6B
qwen2.5:32b-instruct 32.8B
qwen2.5:72b-instruct-q4_0 72.7B
llama2-uncensored:latest 7B
qwen2.5:7b 7.6B
qwen2.5:3b 3.1B


In [2]:
import csv
import json
import pathlib

sample_path = pathlib.Path("../../../resources/sample.json")
with sample_path.open() as sample_file:
    reader = json.load(sample_file)
    sample = {}
    for entry in reader:
        entry_id = entry.pop("codigo")  # Remove "ID" and use it as the key
        sample[entry_id] = entry

In [3]:
from slugify import slugify

ground_truth_path = pathlib.Path("../../../resources/ground_truth.csv")
ground_truth = {}
with ground_truth_path.open() as ground_truth_file:
    reader = csv.DictReader(ground_truth_file)
    fieldnames = []
    for header in reader.fieldnames:
        header = slugify(header, separator="_")
        if header == "datahoradom":
            header = "data"
        if header == "categoriadom":
            header = "categoria"
        fieldnames.append(header)
    reader.fieldnames = fieldnames
    for row in reader:
        row = {key: (value if value != "NULL" else None) for key, value in row.items()}
        row_id = row.pop("codigo")  # remove 'ID' and use it as a key
        ground_truth[row_id] = row
    print(fieldnames)

['codigo', 'titulo', 'data', 'cod_registro_info_sfinge', 'municipio', 'entidade', 'categoria', 'modalidade', 'nr_modalidade', 'objeto', 'justificativa', 'data_abertura', 'informacoes', 'signatario', 'cargo_do_signatario']


In [4]:
dom_values = ["titulo", "data", "cod_registro_info_sfinge", "entidade", "categoria"]
cod_registro_info_sfinge_null_amount = 0
for codigo in sample:
    if ground_truth[codigo]["cod_registro_info_sfinge"] is None:
        cod_registro_info_sfinge_null_amount += 1
print(cod_registro_info_sfinge_null_amount)

13


In [35]:
from datetime import datetime
from enum import Enum

from pydantic import BaseModel


class Modalidade(str, Enum):
    CONVITE = "Convite"
    TOMADA_DE_PRECO = "Tomada de Preço"
    DISPENSA = "Dispensa de Licitação"
    INEXIGIBILIDADE = "Inexigibilidade"
    PREGÃO = "Pregão"
    CARTA_CONVITE = "Carta Convite"
    CREDENCIAMENTO = "Credenciamento"
    OUTROS = "Outros"
    CONCORRENCIA = "Concorrência"


class CustomDate(BaseModel):
    year: int
    month: int
    day: int


class Atributos(BaseModel):
    modalidade: Modalidade
    nr_modalidade: str
    objeto: str
    justificativa: str
    data_abertura: CustomDate
    informacoes: str
    signatario: str
    cargo_do_signatario: str

In [36]:
Atributos.model_json_schema()

{'$defs': {'CustomDate': {'properties': {'year': {'title': 'Year',
     'type': 'integer'},
    'month': {'title': 'Month', 'type': 'integer'},
    'day': {'title': 'Day', 'type': 'integer'}},
   'required': ['year', 'month', 'day'],
   'title': 'CustomDate',
   'type': 'object'},
  'Modalidade': {'enum': ['Convite',
    'Tomada de Preço',
    'Dispensa de Licitação',
    'Inexigibilidade',
    'Pregão',
    'Carta Convite',
    'Credenciamento',
    'Outros',
    'Concorrência'],
   'title': 'Modalidade',
   'type': 'string'}},
 'properties': {'modalidade': {'$ref': '#/$defs/Modalidade'},
  'nr_modalidade': {'title': 'Nr Modalidade', 'type': 'string'},
  'objeto': {'title': 'Objeto', 'type': 'string'},
  'justificativa': {'title': 'Justificativa', 'type': 'string'},
  'data_abertura': {'$ref': '#/$defs/CustomDate'},
  'informacoes': {'title': 'Informacoes', 'type': 'string'},
  'signatario': {'title': 'Signatario', 'type': 'string'},
  'cargo_do_signatario': {'title': 'Cargo Do Signat

In [37]:
response = client.chat(
    model="deepseek-r1:70b",
    messages=[
        {"role": "system", "content": "Extraia os atributos do documento"},
        {"role": "user", "content": sample["6611505"]["texto"]},
    ],
    format=Atributos.model_json_schema(),
)

In [38]:
print(response.message.content)

{
  "modalidade": "Concorrência",
  "nr_modalidade": "05/2024",
  "objeto": "Execução de passeio público as margens da SC-154, quilômetros 89 e 90, acesso sul do município de Ipumirim/SC",
  "justificativa": "Considerando o disposto no art. 28, inciso II da Lei n° 14.133/2021",
  "data_abertura": {
    "year": 2024,
    "month": 11,
    "day": 12
  },
  "informacoes": "Foram cumpridas as formalidades previstas na Lei n° 14.133/2021",
  "signatario": "Hilário Reffatti - Prefeito Municipal",
  "cargo_do_signatario": "Prefeito Municipal de Ipumirim"
}


In [8]:
json.loads(response.message.content)

AttributeError: 'ListResponse' object has no attribute 'message'

In [36]:
sample

{'6611928': {'titulo': 'ERRATA Nº 001 - PE Nº 155/2024/SMS',
  'data': '2024-11-12 17:43:19',
  'cod_registro_info_sfinge': '01A844BB11CE682547D615CA1F6A386E150DE5EF',
  'municipio': 'Palhoça',
  'entidade': 'Prefeitura Municipal de Palhoça',
  'categoria': 'Licitações',
  'link': 'https://www.diariomunicipal.sc.gov.br/?q=id:6611928',
  'texto': '<html xmlns="http://www.w3.org/1999/xhtml">\n\n    <head>\n\n        <meta name="pdf:PDFVersion" content="1.7"/>\n\n        <meta name="pdf:docinfo:title" content="Microsoft Word - AVISO DE ERRATA N&ordm; 001 - LAUDA.doc"/>\n\n        <meta name="pdf:hasXFA" content="false"/>\n\n        <meta name="access_permission:modify_annotations" content="true"/>\n\n        <meta name="access_permission:can_print_degraded" content="true"/>\n\n        <meta name="dc:creator" content="Carolina Elisa Peres"/>\n\n        <meta name="dcterms:created" content="2024-11-12T18:52:48Z"/>\n\n        <meta name="dcterms:modified" content="2024-11-12T18:52:48Z"/>\n\n

In [37]:
import os

# get keys for your project from https://cloud.langfuse.com
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-bb675b61-0e41-4085-871c-cd07a4a506c8"
os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-58d1df49-b36f-4f5a-bea8-815eb4872f1a"
os.environ["LANGFUSE_HOST"] = "http://localhost:3000"

In [38]:
from langfuse.openai import OpenAI

client = OpenAI(
    base_url="https://ollama-dev.ceos.ufsc.br/v1",
    api_key="ollama",
)

response = client.chat.completions.create(
    model="deepseek-r1:32b-qwen-distill-q4_K_M",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who was the first person to step on the moon?"},
        {
            "role": "assistant",
            "content": "Neil Armstrong was the first person to step on the moon on July 20, 1969, during the Apollo 11 mission.",
        },
        {
            "role": "user",
            "content": "What were his first words when he stepped on the moon?",
        },
    ],
)
print(response.choices[0].message.content)

<think>
Alright, let's dive into this query. The user initially asked who was the first person to step on the moon, and I correctly provided Neil Armstrong along with the date of the Apollo 11 mission. Now, they're following up by asking about his first words upon stepping onto the moon.

Hmm, I remember that Neil Armstrong said something memorable there. It's a classic quote everyone knows, but let me make sure I get it right. He did say something about a small step for man and a giant leap for mankind. The exact wording has always been interesting because people often misquote or misinterpret it.

Wait, was it "a man" or "man"? I recall there's some confusion because of the way it was spoken. Armstrong actually said "that's one small step for a man..." but sometimes it's heard as "for man." The "a" is hard to distinguish in the audio.

The user might be looking not just for the quote itself, but maybe also why it's significant or any interesting facts about it. They could be a studen

In [39]:
from langfuse.openai import openai

openai.langfuse_auth_check()

True