In [16]:
import re
import os
import json
import csv
import duckdb as db
# from pydantic import BaseModel
from typing import Any, Dict, List, Tuple, TypedDict, Annotated
from textwrap import dedent
# from crewai_tools import tool, FileWriterTool
from crewai import Agent, Crew, Process, Task
# from langchain_ollama.llms import OllamaLLM
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


# CrewAI

In [6]:
# model = ChatGroq(
#     model="llama3-8b-8192",
#     temperature = 0,
# )

In [7]:
model = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0
)

In [8]:
# model = ChatGoogleGenerativeAI(
#     model='gemini-1.5-pro-exp-0801',
#     temperature=0
# )

In [63]:
from os import listdir
from os.path import isfile, join

In [44]:
def query_list(file_path: str) -> List:
    with open(file_path) as f:
        s = f.read()
    
    return s

In [80]:
def load_queries(folder_path: str):
    file_list = [f for f in listdir(folder_path) if isfile(join(folder_path, f))]
    file_list.sort()
    queries = []  

    for sql_file in file_list:
        q = query_list(join(folder_path,sql_file))
        queries.append({
            'query_name': 'teste',
            'sql_code': q 
        })          

    return queries

In [81]:
path = 'sql_files/bn_beneficiario/'

In [82]:
teste = load_queries(path)

In [83]:
teste

[{'query_name': 'teste',
  'sql_code': "SELECT \n       BEN.HANDLE                                                                         AS ID_BENEFICIARIO\n      ,FAM.HANDLE                                                                         AS ID_FAMILIA\n      ,CON.HANDLE                                                                         AS ID_CONTRATANTE\n      ,NVL(LOT.HANDLE,0)                                                                  AS ID_CONTRATANTE_LOT\n      ,PLA.HANDLE                                                                         AS ID_PLANO\n      ,(SELECT MAX(BEN_TIT.HANDLE) \n        FROM   SAM_BENEFICIARIO BEN_TIT \n        WHERE  BEN_TIT.FAMILIA   = BEN.FAMILIA \n        AND    BEN_TIT.EHTITULAR = 'S')                                                   AS ID_BENEFICIARIO_RESP\n      ,CASE\n         WHEN CON.LOCALFATURAMENTO = 'C' THEN CPES.HANDLE --CPES\n         WHEN CON.LOCALFATURAMENTO = 'L' THEN LPES.HANDLE --LPES\n         WHEN CON.LOCAL

In [45]:
q = query_list('sql_files/bn_beneficiario/01_beneficiario.sql')

In [46]:
queries = []  
queries.append({
    'step_name': 'teste',
    'sql_code': q 
})  

In [47]:
queries

[{'step_name': 'teste',
  'sql_code': "SELECT \n       BEN.HANDLE                                                                         AS ID_BENEFICIARIO\n      ,FAM.HANDLE                                                                         AS ID_FAMILIA\n      ,CON.HANDLE                                                                         AS ID_CONTRATANTE\n      ,NVL(LOT.HANDLE,0)                                                                  AS ID_CONTRATANTE_LOT\n      ,PLA.HANDLE                                                                         AS ID_PLANO\n      ,(SELECT MAX(BEN_TIT.HANDLE) \n        FROM   SAM_BENEFICIARIO BEN_TIT \n        WHERE  BEN_TIT.FAMILIA   = BEN.FAMILIA \n        AND    BEN_TIT.EHTITULAR = 'S')                                                   AS ID_BENEFICIARIO_RESP\n      ,CASE\n         WHEN CON.LOCALFATURAMENTO = 'C' THEN CPES.HANDLE --CPES\n         WHEN CON.LOCALFATURAMENTO = 'L' THEN LPES.HANDLE --LPES\n         WHEN CON.LOCALF

In [48]:
sql_analyst = Agent(
    role = "Senior Data Analyst",
    goal = "Analyse and complex sql queries and extract table name vs column name relation from all tables on the querie",
    backstory=dedent(
        """
        You're a highly specialized developed to dissect and understand complex SQL queries,
        you could quickly and accurately extract essential information from intricate SQL statements.
        Your key traits are Analytical prowess, Attention to detail, Vast knowledge of SQL syntax 
        across multiple database systems.
        """
    ),
    llm=model,
    allow_delegation=False
)

In [49]:
extract_tables_columns = Task(
    description=dedent(
        """
        Analyse this SQL querie {sql_code}.
        Is very important use table name, not its nick name. Then extract tables e columns names.
        Get only table name and column name following this patterns:
        table_name;columns_name
        table1;columnName_n1
        table1;columnName_n2
        table2;columnName_n1
        table2;columnName_n2
        tableN;columnName_n1
        tableN;columnName_n1
        """
    ),
    expected_output="CSV file",
    agent=sql_analyst,
    #callback=lambda result: result_collector.add_result(result)
)

In [50]:
crew = Crew(
    agents = [sql_analyst],
    tasks = [extract_tables_columns],
    process = Process.sequential,
    verbose = 0,
    memory=False,
    output_log_file="crew.log",
)



In [51]:
##self.export_to_csv()

def save_to_csv(data, filename):
    # Split the string by newlines to get rows
    rows = data.split('\n')
    
    # Split each row by semicolons to get columns
    formatted_data = [row.split(';') for row in rows]
    
    # Output filename
    output_filename = f"{'output'}/{filename}.csv"

    # Write the formatted data to a CSV file
    with open(output_filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Column', 'Table'])  # Write header
        writer.writerows(formatted_data)

In [59]:
def process_query(file_name: str, sql_code: list):
    result = crew.kickoff(inputs=sql_code)
    output = extract_tables_columns.output
    save_to_csv(output.raw, file_name)

In [62]:
for query in queries:
    print(query['step_name'])
    file_name = '01_beneficiario'
    result = crew.kickoff(inputs=query)
    output = extract_tables_columns.output
    save_to_csv(output.raw, file_name)
    #result_collector.add_result('export_tables_columns',output.raw,file_name)

teste


In [60]:
type(queries)

list

In [61]:
process_query('01_beneficiario', queries)

TypeError: str.format() argument after ** must be a mapping, not list

In [78]:
# for query in list_of_queries:
#     print(query['step_name'])
#     file_name = convert_to_filename(query['step_name'])
#     result = crew.kickoff(inputs=query)
#     output = extract_tables_columns.output
#     save_to_csv(output.raw, file_name)
#     #result_collector.add_result('export_tables_columns',output.raw,file_name)

BENEFICIÁRIO
BUSCA MICROSIGA
CONTRATO
FAMILIA PESSOA RESPONSÁVEL
FAMILIA TITULAR RESPONSÁVEL
HANDLE_BENEFICIARIO
LOTAÇÃO
SAM_FAMILIA_TETO_PF
SEM SETOR


# Processa CSVs

In [45]:
db.sql(
"""     
    with todas as (
        select * from 'output/benefici_rio.csv' 
        union all
        select * from 'output/contrato.csv' 
        union all
        select * from 'output/familia_pessoa_respons_vel.csv'
        union all
        select * from 'output/familia_titular_respons_vel.csv'
        union all
        select * from 'output/lota__o.csv'
    )
    select distinct todas.* from todas
    order by todas.Column
""")

┌────────────────────┬────────────────────────────────┐
│       Column       │             Table              │
│      varchar       │            varchar             │
├────────────────────┼────────────────────────────────┤
│ AGENCIA            │ SFN_CONTAFIN_COMPLEMENTO       │
│ AGENCIA            │ SFN_AGENCIA                    │
│ ANOTACAO           │ SAM_FAMILIA_ANOTADM            │
│ ATENDIMENTOATE     │ SAM_BENEFICIARIO               │
│ BAIRRO             │ SAM_ENDERECO                   │
│ BANCO              │ SFN_CONTAFIN_COMPLEMENTO       │
│ BEMESTARESAUDE     │ K_SAM_BENEFICIARIO_APOIOSAUDE  │
│ BEN                │ HANDLE                         │
│ BENEFICIARIO       │ SAM_BENEFICIARIO_EVENTO        │
│ BENEFICIARIO       │ SFN_CONTAFIN                   │
│    ·               │      ·                         │
│    ·               │      ·                         │
│    ·               │      ·                         │
│ TITULAR            │ SAM_BENEFICIARIO_TITULAR 