# Extraction of models from DeciContas.br Dataset

In [3]:
import os
import json
import pymssql

import pandas as pd
import numpy as np

from pprint import pprint
from datetime import datetime
from typing import List, Dict, Any, Optional
from langchain_openai import  AzureChatOpenAI, ChatOpenAI
from langchain_core.language_models.chat_models import BaseChatModel
from dotenv import load_dotenv

from sqlalchemy import (
    create_engine, Column, Integer, String, Float, Date, Boolean, Text, JSON
)
from sqlalchemy.orm import declarative_base, sessionmaker, Session
from sqlalchemy.schema import DDL, CheckConstraint
from sqlalchemy.engine import Engine

from tools.schema import (
    NERDecisao,
    Obrigacao,
    Recomendacao,
    
)

from tools.utils import (
    get_decisions_by_year_and_months,
    get_decisions_by_process,
    run_ner_pipeline_for_dataframe,
    get_connection
)

load_dotenv()

gpt4turbo = AzureChatOpenAI(
    deployment_name="gpt-4-turbo",
    model_name="gpt-4",
)

'''
gpt4turbo = ChatOpenAI(
    model="gpt-4-turbo",
    temperature=0.0
)
'''

extractor_decisao_gpt4turbo = gpt4turbo.with_structured_output(NERDecisao, include_raw=False, method="json_schema")
extractor_obrigacao_gpt4turbo = gpt4turbo.with_structured_output(
    Obrigacao, include_raw=False, method="json_schema")
extractor_recomendacao_gpt4turbo = gpt4turbo.with_structured_output(
    Recomendacao, include_raw=False, method="json_schema")





# Set up functions

# Run for year and months

In [6]:
df_dec_2025_1 = get_decisions_by_year_and_months(2025, [10,11,12])

In [None]:
run_ner_pipeline_for_dataframe(
    df=df_dec_2025_1,
    extractor=extractor_decisao_gpt4turbo,
    model_name="gpt-4-turbo",
    prompt_version="v1",
    run_id=datetime.now().strftime("Extração NER %d/%m/%Y %H:%M:%S")
)

# Run for processes

In [24]:
p = ['002039/2020',
'200008/2023',
'009819/2016',
'000294/2024',
'004102/2021',
'004478/2021',
'101152/2021',
'000066/2021',
'006828/2015',
'004703/2024',
'000244/2025',
'006347/2014',
'000664/2024',
'005234/2020',
'100603/2020',
'101069/2022',
'101081/2022',
'200243/2021',
'200166/2021',
'002020/2020',
'006590/2015',
'003131/2024',
'100854/2020',
'100008/2021',
'100584/2020',
'100612/2020',
'100708/2020',
'100726/2020',
'101060/2022',
'100604/2020',
'101640/2019',
'100889/2020',
'002347/2023',
'002490/2020',
'200018/2023',
'003127/2024',
'101162/2022',
'100537/2020',
'100177/2020',
'100231/2021',
'100356/2020',
'100736/2022',
'102457/2021',
'100416/2020',
'100462/2020',
'100504/2020',
'100602/2020',
'100926/2020',
'001992/2020',
'002370/2020',
'100426/2020',
'100475/2020',
'100484/2020',
'100495/2020',
'100501/2020',
'100613/2020',
'100615/2020',
'001605/2025',
'006160/2014',
'006496/2015',
'002153/2020',
'302904/2023',
'010680/2014',
'006334/2015',
'200034/2021',
'024971/2016',
'006040/2014',
'600229/2020',
'004339/2019',
'003136/2024',
'015898/2013',
'006620/2015',
'006444/2015',
'005848/2014',
'002695/2020',
'001989/2020',
'006375/2015',
'006650/2015',
'101072/2022',
'101539/2021',
'007970/2018']

In [22]:
df_processes = get_decisions_by_process(p)

In [26]:
run_ner_pipeline_for_dataframe(
    df=df_processes,
    extractor=extractor_decisao_gpt4turbo,
    model_name="gpt-4-turbo",
    prompt_version="v1",
    run_id=datetime.now().strftime("Extração NER %d/%m/%Y %H:%M:%S")
)

# Obrigacao and Recomendacao Extraction 

In [6]:
conn_processo = get_connection("BdDIP")

In [8]:
sql_nerdecisoes = """
SELECT * 
    FROM NERDecisao
"""
df_nerdecisoes = pd.read_sql(sql_nerdecisoes, conn_processo)

In [9]:
df_nerdecisoes

Unnamed: 0,IdNerDecisao,IdProcesso,IdComposicaoPauta,IdVotoPauta,Modelo,VersaoPrompt,RunId,RawJson,DataExtracao
0,1,599242,123668,46267,gpt-4-turbo,v1,Extração NER 04/12/2025 11:33:35,"{""multas"":[],""ressarcimentos"":[],""obrigacoes"":...",2025-12-04 14:33:37.259220
1,2,580147,123670,46294,gpt-4-turbo,v1,Extração NER 04/12/2025 11:33:35,"{""multas"":[],""ressarcimentos"":[],""obrigacoes"":...",2025-12-04 14:33:38.337366
2,3,364852,123489,45759,gpt-4-turbo,v1,Extração NER 04/12/2025 11:33:35,"{""multas"":[{""descricao_multa"":""CONHECIMENTO e ...",2025-12-04 14:33:41.478029
3,4,367272,123530,46107,gpt-4-turbo,v1,Extração NER 04/12/2025 11:33:35,"{""multas"":[],""ressarcimentos"":[],""obrigacoes"":...",2025-12-04 14:33:42.593977
4,5,446872,123660,45782,gpt-4-turbo,v1,Extração NER 04/12/2025 11:33:35,"{""multas"":[],""ressarcimentos"":[],""obrigacoes"":...",2025-12-04 14:33:43.609644
...,...,...,...,...,...,...,...,...,...
278,279,546253,117922,42086,gpt-4-turbo,v1,Extração NER 04/12/2025 13:34:35,"{""multas"":[],""ressarcimentos"":[],""obrigacoes"":...",2025-12-04 16:35:21.824078
279,280,601405,118636,43086,gpt-4-turbo,v1,Extração NER 04/12/2025 13:34:35,"{""multas"":[],""ressarcimentos"":[],""obrigacoes"":...",2025-12-04 16:35:22.862543
280,281,601405,120130,44357,gpt-4-turbo,v1,Extração NER 04/12/2025 13:34:35,"{""multas"":[],""ressarcimentos"":[],""obrigacoes"":...",2025-12-04 16:35:23.958930
281,282,612549,123074,45825,gpt-4-turbo,v1,Extração NER 04/12/2025 13:34:35,"{""multas"":[{""descricao_multa"":""multa de R$ 5.0...",2025-12-04 16:35:28.068151


In [None]:
sql_obrigacao_processar = """
SELECT 
    o.IdNerObrigacao,
    o.IdNerDecisao,
    o.Ordem,
    o.DescricaoObrigacao,
    d.IdProcesso,
    CONCAT(p.Numero_Processo, '/', p.Ano_Processo) AS processo,
    org.Nome AS orgao_responsavel,
    org.IdOrgao AS id_orgao_responsavel,
    gp.TipoPessoa AS tipo_responsavel,
    gp.IdPessoa AS id_pessoa
FROM BdDIP.dbo.NERObrigacao o
INNER JOIN BdDIP.dbo.NERDecisao d 
    ON d.IdNerDecisao = o.IdNerDecisao
INNER JOIN processo.dbo.Processos p
    ON p.IdProcesso = d.IdProcesso
INNER JOIN processo.dbo.Orgaos org 
    ON p.IdOrgaoEnvolvido = org.IdOrgao 
INNER JOIN processo.dbo.Pro_ProcessosResponsavelDespesa pprd 
    ON pprd.IdProcesso = p.IdProcesso 
INNER JOIN processo.dbo.GenPessoa gp 
    ON gp.IdPessoa = pprd.IdPessoa 
LEFT JOIN BdDIP.dbo.ObrigacaoProcessada op
    ON op.IdNerObrigacao = o.IdNerObrigacao
WHERE op.IdObrigacaoProcessada IS NULL;
"""

df_obrigacao_processar = pd.read_sql(sql_obrigacao_processar, conn_processo)


In [None]:
sql_recomendacao_processar = """
SELECT 
    r.IdNerRecomendacao,
    r.IdNerDecisao,
    r.Ordem,
    r.DescricaoRecomendacao,
    d.IdProcesso,
    CONCAT(p.Numero_Processo, '/', p.Ano_Processo) AS processo,
    org.Nome AS orgao_responsavel,
    org.IdOrgao AS id_orgao_responsavel,
    gp.Nome AS nome_responsavel,
    gp.Documento AS documento_responsavel,
    gp.TipoPessoa AS tipo_responsavel,
    gp.IdPessoa AS id_pessoa
FROM BdDIP.dbo.NERRecomendacao r
INNER JOIN BdDIP.dbo.NERDecisao d 
    ON d.IdNerDecisao = r.IdNerDecisao
INNER JOIN processo.dbo.Processos p
    ON p.IdProcesso = d.IdProcesso
INNER JOIN processo.dbo.Orgaos org 
    ON p.IdOrgaoEnvolvido = org.IdOrgao 
INNER JOIN processo.dbo.Pro_ProcessosResponsavelDespesa pprd 
    ON pprd.IdProcesso = p.IdProcesso 
INNER JOIN processo.dbo.GenPessoa gp 
    ON gp.IdPessoa = pprd.IdPessoa 
LEFT JOIN BdDIP.dbo.RecomendacaoProcessada rp
    ON rp.IdNerRecomendacao = r.IdNerRecomendacao
WHERE rp.IdRecomendacaoProcessada IS NULL;
"""

df_recomendacao_processar = pd.read_sql(sql_recomendacao_processar, conn_processo)

In [16]:
df_obrigacao_processar

Unnamed: 0,IdNerObrigacao,IdNerDecisao,Ordem,DescricaoObrigacao,IdProcesso,processo,orgao_responsavel,id_orgao_responsavel,nome_responsavel,documento_responsavel,tipo_responsavel,id_pessoa
0,4,19,0,determinação à autoridade responsável pelo IPE...,542777,100071/2021,INSTITUTO DE PREVIDÊNCIA DOS SERVIDORES DO RN,308,MARIA SALISETE SALES COSTA,36995410459,F,55328
1,8,30,0,A NOTIFICAÇÃO do Instituto de Previdência Soci...,541905,100843/2020,INSTITUTO DE PREVIDÊNCIA DOS SERVIDORES DO RN,308,MARIA DAS NEVES DA SILVA MATOS,45535736449,F,54974
2,10,34,0,determinação à autoridade responsável pelo Ins...,542059,100871/2020,INSTITUTO DE PREVIDÊNCIA DOS SERVIDORES DO RN,308,MARILUCE PEREIRA DA SILVA,29720575468,F,55013
3,7,28,0,expedição de determinação ao atual gestor do I...,541900,100838/2020,INSTITUTO DE PREVIDÊNCIA DOS SERVIDORES DO RN,308,MARIA AUXILIADORA LAMAS FERNANDES DE OLIVEIRA,46611517472,F,54969
4,9,33,0,NOTIFICAÇÃO do Instituto de Previdência Social...,542058,100870/2020,INSTITUTO DE PREVIDÊNCIA DOS SERVIDORES DO RN,308,MARILENE PEREIRA MARQUES DA SILVA,16054547453,F,55012
...,...,...,...,...,...,...,...,...,...,...,...,...
96,47,282,1,"expedição de obrigação de fazer, em sede caute...",612549,001605/2025,CÂMARA MUNICIPAL DE SERRA NEGRA DO NORTE,158,JAIRO SOARES FLAUZINO,05885069409,F,96517
97,45,274,0,determinação ao Instituto de Previdência de Sã...,527062,002153/2020,INSTITUTO PREVIDENCIÁRIO DE SÃO PAULO DO POTENGI,1259,BRUNO GUILHERME D. M. ARAÚJO,04660783405,F,27718
98,46,282,0,cumprindo ao Presidente da Câmara de Vereadore...,612549,001605/2025,CÂMARA MUNICIPAL DE SERRA NEGRA DO NORTE,158,ALYSSON MOISÉS DE MEDEIROS,03163340490,F,5470
99,47,282,1,"expedição de obrigação de fazer, em sede caute...",612549,001605/2025,CÂMARA MUNICIPAL DE SERRA NEGRA DO NORTE,158,ALYSSON MOISÉS DE MEDEIROS,03163340490,F,5470
