In [27]:
import xml.etree.ElementTree as ET
import logging
import re
import pygraphviz as pgv
from pydantic import BaseModel
from typing import Any, Dict, List, Tuple, TypedDict, Annotated
from textwrap import dedent
from crewai_tools import tool, FileWriterTool
from crewai import Agent, Crew, Process, Task
from langchain_ollama.llms import OllamaLLM
from langchain_groq import ChatGroq

In [28]:
pentaho_file_path = '../data/benef_transf.ktr'

In [29]:
def parse_ktr_file(file_path) -> ET.Element:
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
    except ET.ParseError as e:
        logging.error(f"Error parsing KTR file: {e}")
        raise
    return root

In [30]:
def extract_execution_sequence(root: ET.Element) -> List[Tuple[str, str]]:
    hops = []
    for hop in root.findall('.//hop'):
        from_step = hop.find('from').text
        to_step = hop.find('to').text
        enabled = hop.find('enabled').text
        if enabled == 'Y':
            hops.append((from_step, to_step))
    
    return hops    

In [31]:
def create_flow_graph(hops: List[Tuple[str, str]], output_file: str = "flow_graph.png"):
    graph = pgv.AGraph(directed=True)
    
    for from_step, to_step in hops:
        #print(f'from: {from_step} to {to_step}')

        graph.add_node(from_step, shape="box")
        graph.add_node(to_step, shape="box")
        graph.add_edge(from_step, to_step)
    
    graph.layout(prog='dot')
    graph.draw(output_file)


    logging.info(f"Data flow saved in {output_file}")

In [32]:
def extract_sql_queries(steps: List[Tuple[str, str]]) -> Dict[str, Any]:
    active_steps = set(steps for seq in state["sequence"] for step in seq)
    sql_steps = state["root"].findall(".//step")
    queries = []
    
    for step in sql_steps:
        step_name = step.find("name").text
        step_type = step.find("type").text
        if step_type in ['TableInput', 'DBJoin'] and step_name in active_steps:
            sql_element = step.find("sql")
            if sql_element is not None and sql_element.text:
                queries.append({
                    "step_name": step_name,
                    "step_type": step_type,
                    "sql": sql_element.text
                })
    
    state["queries"] = queries
    return state

In [33]:
root = parse_ktr_file(pentaho_file_path)

In [34]:
hops = extract_execution_sequence(root)

In [35]:
active_steps = []
for i in hops:
    active_steps.append(i[0])

In [36]:
list(dict.fromkeys(active_steps))

['Switch / Case LOCAL FATURAMENTO',
 'Switch / Case PESSOA RESPONSÁVEL',
 'Switch / Case TITULAR RESPONSÁVEL',
 'Dummy (do nothing)',
 'Remover colunas',
 'BENEFICIÁRIO',
 'SAM_FAMILIA_TETO_PF',
 'SEM SETOR',
 'Insert / Update - BN_BENEFICIARIO',
 'Filter rows',
 'BUSCA MICROSIGA',
 'HANDLE_BENEFICIARIO',
 'Blocking Step',
 'QTD_INCATU_BN_BENEFICIARIO',
 'Blocking Step 2',
 'CONTRATO',
 'FAMILIA PESSOA RESPONSÁVEL',
 'FAMILIA TITULAR RESPONSÁVEL',
 'Insert / Update - BN_RESP_FINANCEIRO',
 'LOTAÇÃO',
 'QTD_INCATU_BN_RESP_FINANCEIRO']

## Testes

In [37]:
class KTRTransformation():
    def __init__(self):
        self.name: str = ""
        self.order: List[str] = []
        self.steps: List[Step] = []

class Step:
    def __init__(self):
        self.name: str = ""
        self.type: str = ""

class SQLStep(Step):
    def __init__(self):
        super().__init__()
        self.sql: str = ""
        self.parameters: List[Parameter] = []

class Parameter:
    def __init__(self):
        self.name: str = ""
        self.type: str = ""

class FilterStep(Step):
    def __init__(self):
        super().__init__()
        self.send_true_to: str = ""
        self.send_false_to: str = ""
        self.conditions: List[Condition] = []

class Condition:
    def __init__(self):
        self.leftvalue: str = ""
        self.operator: str = ""
        self.rightvalue: str = ""

class InsertUpdateStep(Step):
    def __init__(self):
        super().__init__()
        self.table: str = ""
        self.keys: List[KeyValue] = []
        self.values: List[KeyValue] = []

class KeyValue:
    def __init__(self):
        self.name: str = ""
        self.field: str = ""

In [38]:
def parse_ktr_file(file_path: str) -> KTRTransformation:
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
    except ET.ParseError as e:
        logging.error(f"Error parsing KTR file: {e}")
        raise

    transformation = KTRTransformation()
    
    # Extract transformation name
    transformation.name = root.find("info/name").text

    # Extract order and set active steps
    active_steps = set()
    order_element = root.find("order")
    if order_element is not None:
        for hop in order_element.findall("hop"):
            from_step = hop.find("from").text
            to_step = hop.find("to").text
            transformation.order.append((from_step, to_step))
            active_steps.add(from_step)
            active_steps.add(to_step)            

    # Extract steps
    for step_element in root.findall("step"):
        step_name = step_element.find("name").text
        step_type = step_element.find("type").text

        # Only process steps that are in the active_steps set
        if step_name not in active_steps:
            continue

        if step_type == "DBJoin" or step_type == "TableInput":
            step = SQLStep()
            step.name = step_name
            step.type = step_type
            step.sql = step_element.find("sql").text
            for param in step_element.findall("parameter/field"):
                parameter = Parameter()
                parameter.name = param.find("name").text
                parameter.type = param.find("type").text
                step.parameters.append(parameter)

        elif step_type == "FilterRows":
            step = FilterStep()
            step.name = step_name
            step.type = step_type
            step.send_true_to = step_element.find("send_true_to").text
            step.send_false_to = step_element.find("send_false_to").text
            for condition in step_element.findall("conditions"):
                cond = Condition()
                cond.leftvalue = condition.find("leftvalue").text
                cond.operator = condition.find("function").text
                cond.rightvalue = condition.find("value/text").text
                step.conditions.append(cond)

        elif step_type == "InsertUpdate":
            step = InsertUpdateStep()
            step.name = step_name
            step.type = step_type
            step.table = step_element.find("lookup/table").text
            for key in step_element.findall("lookup/key"):
                kv = KeyValue()
                kv.name = key.find("name").text
                kv.field = key.find("field").text
                step.keys.append(kv)
            for value in step_element.findall("lookup/value"):
                kv = KeyValue()
                kv.name = value.find("name").text
                kv.field = value.find("rename").text
                step.values.append(kv)

        else:
            step = Step()
            step.name = step_name
            step.type = step_type

        transformation.steps.append(step)

    return transformation

In [39]:
def step_to_markdown(step: Step) -> str:
    md = f"## Step: {step.name}\n\n"
    md += f"Type: {step.type}\n\n"

    if isinstance(step, SQLStep):
        md += "### SQL Query\n\n"
        md += f"```sql\n{step.sql}\n```\n\n"
        if step.parameters:
            md += "Parameters:\n"
            for param in step.parameters:
                md += f"- {param.name} ({param.type})\n"

    elif isinstance(step, FilterStep):
        md += f"Send True To: {step.send_true_to}\n"
        md += f"Send False To: {step.send_false_to}\n\n"
        md += "Conditions:\n"
        for condition in step.conditions:
            md += f"- {condition.leftvalue} {condition.operator} {condition.rightvalue}\n"

    elif isinstance(step, InsertUpdateStep):
        md += f"Table: {step.table}\n\n"
        md += "Keys:\n"
        for key in step.keys:
            md += f"- {key.name}: {key.field}\n"
        md += "\nValues:\n"
        for value in step.values:
            md += f"- {value.name}: {value.field}\n"

    return md

def convert_to_filename(input_string):
    s = re.sub(r'[^a-zA-Z0-9_\-\.~]', '_', input_string)
    return s.lower()


def transformation_to_markdown(transformation: KTRTransformation) -> str:
    md = f"# Transformation: {transformation.name}\n\n"
    md += "## Execution Order\n\n"
    for step_name in transformation.order:
        md += f"- {step_name}\n"
    md += "\n## Steps\n\n"
    for step in transformation.steps:
        md += step_to_markdown(step) + "\n"
    # return md

    md_file_name = convert_to_filename(transformation.name)+'.md'

    try:
        with open(md_file_name, 'w', encoding='utf-8') as f:
            f.write(md)
        logging.info(f"Markdown documentation exported to {md_file_name}")
    except Exception as e:
        logging.error(f"Error exporting markdown to file: {e}")
        raise
    return md

In [40]:
T = parse_ktr_file(pentaho_file_path)

In [41]:
transformation_to_markdown(T)

"# Transformation: BN_BENEFICIARIO - insert/update\n\n## Execution Order\n\n- ('Switch / Case LOCAL FATURAMENTO', 'Switch / Case TITULAR RESPONSÁVEL')\n- ('Switch / Case PESSOA RESPONSÁVEL', 'Dummy (do nothing) 2')\n- ('Switch / Case PESSOA RESPONSÁVEL', 'FAMILIA PESSOA RESPONSÁVEL')\n- ('Switch / Case TITULAR RESPONSÁVEL', 'FAMILIA TITULAR RESPONSÁVEL')\n- ('Switch / Case TITULAR RESPONSÁVEL', 'Switch / Case PESSOA RESPONSÁVEL')\n- ('Dummy (do nothing)', 'Remover colunas')\n- ('Remover colunas', 'Switch / Case LOCAL FATURAMENTO')\n- ('BENEFICIÁRIO', 'SAM_FAMILIA_TETO_PF')\n- ('SAM_FAMILIA_TETO_PF', 'Dummy (do nothing)')\n- ('SEM SETOR', 'Insert / Update - BN_BENEFICIARIO')\n- ('Insert / Update - BN_BENEFICIARIO', 'QTD_INCATU_BN_BENEFICIARIO')\n- ('Filter rows', 'BUSCA MICROSIGA')\n- ('BUSCA MICROSIGA', 'Insert / Update - BN_BENEFICIARIO')\n- ('HANDLE_BENEFICIARIO', 'BENEFICIÁRIO')\n- ('Filter rows', 'SEM SETOR')\n- ('Blocking Step', 'Set Variables')\n- ('QTD_INCATU_BN_BENEFICIARIO', '

# CrewAI

In [42]:
model = ChatGroq(
    model="llama3-8b-8192",
    temperature = 0,
)

In [43]:
type(T)

__main__.KTRTransformation

In [109]:
import json
import csv

In [151]:
class ResultCollector:
    def __init__(self):
        self.results = []

    def add_result(self, task_name, result):
        self.results.append({"task": task_name, "result": result})

    def export_to_json(self, filename):
        with open(filename, 'w') as f:
            json.dump(self.results, f, indent=2)

    def export_to_csv(self, filename):
        if not self.results:
            return

        keys = self.results[0].keys()
        with open(filename, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=keys)
            writer.writeheader()
            writer.writerows(self.results)

In [152]:
result_collector = ResultCollector()

In [153]:
T.steps

[<__main__.SQLStep at 0x71c509429ba0>,
 <__main__.SQLStep at 0x71c509428220>,
 <__main__.Step at 0x71c5094286d0>,
 <__main__.Step at 0x71c5094290f0>,
 <__main__.SQLStep at 0x71c509428d30>,
 <__main__.Step at 0x71c509429ea0>,
 <__main__.Step at 0x71c509428a90>,
 <__main__.SQLStep at 0x71c509429cc0>,
 <__main__.SQLStep at 0x71c509428370>,
 <__main__.FilterStep at 0x71c509429d80>,
 <__main__.SQLStep at 0x71c509429cf0>,
 <__main__.InsertUpdateStep at 0x71c509429f30>,
 <__main__.InsertUpdateStep at 0x71c509428e50>,
 <__main__.SQLStep at 0x71c5093642e0>,
 <__main__.Step at 0x71c509365120>,
 <__main__.Step at 0x71c5093651e0>,
 <__main__.Step at 0x71c509365240>,
 <__main__.SQLStep at 0x71c5093652a0>,
 <__main__.SQLStep at 0x71c509365300>,
 <__main__.Step at 0x71c5093653c0>,
 <__main__.Step at 0x71c509365480>,
 <__main__.Step at 0x71c5093654e0>,
 <__main__.Step at 0x71c509365540>,
 <__main__.Step at 0x71c5093655a0>]

In [154]:
def query_list(steps: list):
    """ 
    Input a tranformed ktr structure, output a list of queries.
    """
    queries = []  

    for step in T.steps:
        if step.type in ['DBJoin', 'TableInput']:
            queries.append({
                'step_name': step.name,
                'sql_code': step.sql 
            })  
    return queries

In [155]:
list_of_queries = query_list(T.steps)

In [156]:
list_of_queries

[{'step_name': 'BENEFICIÁRIO',
  'sql_code': "SELECT \n       BEN.HANDLE                                                                         AS ID_BENEFICIARIO\n      ,FAM.HANDLE                                                                         AS ID_FAMILIA\n      ,CON.HANDLE                                                                         AS ID_CONTRATANTE\n      ,NVL(LOT.HANDLE,0)                                                                  AS ID_CONTRATANTE_LOT\n      ,PLA.HANDLE                                                                         AS ID_PLANO\n      ,(SELECT MAX(BEN_TIT.HANDLE) \n        FROM   SAM_BENEFICIARIO BEN_TIT \n        WHERE  BEN_TIT.FAMILIA   = BEN.FAMILIA \n        AND    BEN_TIT.EHTITULAR = 'S')                                                   AS ID_BENEFICIARIO_RESP\n      ,CASE\n         WHEN CON.LOCALFATURAMENTO = 'C' THEN CPES.HANDLE --CPES\n         WHEN CON.LOCALFATURAMENTO = 'L' THEN LPES.HANDLE --LPES\n         WHEN CON

In [157]:
file_write_tool = FileWriterTool()

In [158]:
sql_analyst = Agent(
    role = "Senior Data Analyst",
    goal = "Analyse and complex sql queries and extract table name vs column name relation from all tables on the querie",
    backstory=dedent(
        """
        You're a highly specialized developed to dissect and understand complex SQL queries,
        you could quickly and accurately extract essential information from intricate SQL statements.
        Your key traits are Analytical prowess, Attention to detail, Vast knowledge of SQL syntax 
        across multiple database systems.
        """
    ),
    llm=model,
    allow_delegation=False
)

In [159]:
report_writer = Agent(
    role = "Senior Business Analyst",
    goal = "Write a document based on the work of analyst",
    backstory = dedent(
        """
        You writing still is well known for clear and effective communication.
        You summarize Pentahor workflow artifacts and sql queries business rules into bullet point contain de most import details.
        """
    ),
    llm=model,
    allow_delegation=False
)

In [160]:
extract_tables_columns = Task(
    description = dedent(
        """
        Analyse {sql_code} sql queries and {step_name} to refer about step where the sql code is executed in the workflow.
        """
    ),
    expected_output = "Save a csv file with with relation tables, columns names, in two columns 'table_name, column_name'.",
    #tools=[file_write_tool],
    agent = sql_analyst,
    #output_file='output/{step_name}.csv',
    #create_directory = True,
    callback=lambda result: result_collector.add_result("extract_tables_columns", result)    
)

In [161]:
export_tables_columns = Task(
    description = "Save csv file using {step_name} as file name. And two columns table_name and column_name.",
    expected_output = "File saved successfully",
    agent = sql_analyst,
    tools=[file_write_tool],
    context = [extract_tables_columns],
    output_file='output/{step_name}.csv',
    create_directory = True,
    callback=lambda result: result_collector.add_result("export_tables_columns", result)
)

In [162]:
analyze_data = Task(
    description = "Analyze the query {sql_code} and write an analysis.",
    expected_output = "Detailed analysis from text for non technical public.",
    agent = sql_analyst,

)

In [163]:
write_report = Task(
    description=dedent(
        """
        Write an tecnical report from previous analysis, this document will be used 
        to migrate the actual project to cloud enviroment. 
        """
    ),
    expected_output = "Markdown report",
    agent=report_writer,
    context=[analyze_data]
)

In [164]:
# crew = Crew(
#     agents = [sql_analyst, report_writer],
#     tasks = [extract_tables_columns,format_queries,analyze_data,write_report],
#     process = Process.sequential,
#     verbose=1,
#     memory=False,
#     output_log_file="crew.log",
# )

In [170]:
crew = Crew(
    agents = [sql_analyst],
    tasks = [extract_tables_columns],
    process = Process.sequential,
    verbose = 0,
    memory=False,
    output_log_file="crew.log",
)



In [166]:
#list_of_queries

In [167]:
result = crew.kickoff_for_each(inputs=list_of_queries)



[1m[95m [2024-08-28 10:01:54][DEBUG]: == Working Agent: Senior Data Analyst[00m
[1m[95m [2024-08-28 10:01:54][INFO]: == Starting Task: 
Analyse SELECT 
       BEN.HANDLE                                                                         AS ID_BENEFICIARIO
      ,FAM.HANDLE                                                                         AS ID_FAMILIA
      ,CON.HANDLE                                                                         AS ID_CONTRATANTE
      ,NVL(LOT.HANDLE,0)                                                                  AS ID_CONTRATANTE_LOT
      ,PLA.HANDLE                                                                         AS ID_PLANO
      ,(SELECT MAX(BEN_TIT.HANDLE) 
        FROM   SAM_BENEFICIARIO BEN_TIT 
        WHERE  BEN_TIT.FAMILIA   = BEN.FAMILIA 
        AND    BEN_TIT.EHTITULAR = 'S')                                                   AS ID_BENEFICIARIO_RESP
      ,CASE
         WHEN CON.LOCALFATURAMENTO = 'C' THEN CPES.HANDLE



[1m[92m [2024-08-28 10:01:57][DEBUG]: == [Senior Data Analyst] Task output: Here is the final answer:

```
BENEFICIARIO,RELATION_TABLE
BENEFICIARIO,SELECT
BENEFICIARIO,CONTRATO
BENEFICIARIO,FAMILIA
BENEFICIARIO,MATRICULA
BENEFICIARIO,CARTAO
BENEFICIARIO,REPASSE
BENEFICIARIO,CONTRATO_LOTACAO
BENEFICIARIO,UNIMED
BENEFICIARIO,UNIMED_DESTINO
BENEFICIARIO,UNIMED_DESTINO_RAZAOSOCIAL
BENEFICIARIO,ENDEREÇO
BENEFICIARIO,ENDERECOCORRESPONDENCIA
BENEFICIARIO,ENDERECORESIDENCIAL
BENEFICIARIO,ESTADO
BENEFICIARIO,MUNICIPIO
BENEFICIARIO,PAIS
BENEFICIARIO,PAISEMISSOR
BENEFICIARIO,ESTADOCIVIL
BENEFICIARIO,CONVENIO
BENEFICIARIO,CBO
BENEFICIARIO,PLANO
BENEFICIARIO,TIPODEPENDENTE
BENEFICIARIO,MOTIVOCANCELAMENTO
BENEFICIARIO,DATA_REATIVACAO
BENEFICIARIO,DATA_CANCELAMENTO
BENEFICIARIO,INTERCAMBIO
BENEFICIARIO,ANOTACAO_ADM_FAMILIA
BENEFICIARIO,POSSUI_BIOMETRIA
BENEFICIARIO,ORIGEM_CARENCIA
BENEFICIARIO,UNIMED_ORIGEM
BENEFICIARIO,UNIMED_DESTINO
BENEFICIARIO,UNIMED_DESTINO_RAZAOSOCIAL
BENEFICIARIO,UNIMED_ORIG



[1m[92m [2024-08-28 10:01:58][DEBUG]: == [Senior Data Analyst] Task output: Table Name, Column Name
SIGA.VW_SRA010, RA_TELEFON
SIGA.VW_SRA010, RA_EMAIL
SIGA.CTT010, CTT_DESC01
SIGA.CTT010, CTT_CUSTO
SIGA.VW_SRA010, RA_MAT

I extracted the table name and column name relation from the SQL query. The query is selecting two columns from two tables: SIGA.VW_SRA010 and SIGA.CTT010. The columns are RA_TELEFON, RA_EMAIL, CTT_DESC01, and CTT_CUSTO. The table names are SIGA.VW_SRA010 and SIGA.CTT010.

I will save the result in a CSV file named "BUSCA MICROSIGA.csv" with the following content:

Table Name,Column Name
SIGA.VW_SRA010,RA_TELEFON
SIGA.VW_SRA010,RA_EMAIL
SIGA.CTT010,CTT_DESC01
SIGA.CTT010,CTT_CUSTO
SIGA.VW_SRA010,RA_MAT

[00m
[1m[95m [2024-08-28 10:01:58][DEBUG]: == Working Agent: Senior Data Analyst[00m
[1m[95m [2024-08-28 10:01:58][INFO]: == Starting Task: 
Analyse SELECT --DISTINCT 
  'P' AS TIPO_RESPONSAVEL,
  CPES.HANDLE AS ID_RESP_FINANCEIRO,
  CPES_ENDC.BAIRRO AS BAIRRO



[1m[92m [2024-08-28 10:02:00][DEBUG]: == [Senior Data Analyst] Task output: CONTRATO.csv

Table Name, Column Name
BEN, HANDLE
BEN, FAMILIA
FAM, HANDLE
FAM, CONTRATO
CON, HANDLE
CPES, HANDLE
CPES, CNPJ_CPF
CPES, DATA_EXCLUSAO
CPES, DATA_INCLUSAO
CPES, DATA_NASCIMENTO
CPES, DDD_CELULAR
CPES, DDD
CPES, DDD_COMERCIAL
CPES, EMAIL
CPES, FONE
CPES, FONE_CELULAR
CPES, FONE_COMERCIAL
CPES, LOGRADOURO
CPES, NUMERO
CPES, TIPO_RESPONSAVEL
CPES, ID_RESP_FINANCEIRO
CPES, BAIRRO
CPES, CEP
CPES, CIDADE
CPES, CONTRAT_PAGADOR_NOME
CPES, COMPLEMENTO
CPES, DEBITO_AUT
CPES, DEBITO_AUT_AGENCIA
CPES, DEBITO_AUT_BANCO
CPES, DEBITO_AUT_CONTA
CPES, DEBITO_AUT_DIGITO
CPES, RAMO_DE_ATIVIDADE
CPES, TIPO_DOCUMENTO
CPES, UF
CPES, DATA_INICIAL_DEBITO_AUT
FAM, HANDLE
FAM, ID_FAMILIA
CON, HANDLE
CON, CONTRATO
RAMATI, HANDLE
RAMATI, DESCRICAO
TIPDOC, HANDLE
TIPDOC, DESCRICAO
ESTADOS, HANDLE
ESTADOS, SIGLA
MUNICIPIOS, HANDLE
MUNICIPIOS, NOME
CPES_ENDC, HANDLE
CPES_ENDC, BAIRRO
CPES_ENDC, CEP
CPES_ENDC, COMPLEMENTO
CPES



[1m[92m [2024-08-28 10:02:01][DEBUG]: == [Senior Data Analyst] Task output: Here is the final answer in the format of a CSV file with two columns: Table Name and Column Name.

FAMILIA PESSOA RESPONSÁVEL.csv

Table Name,Column Name
BEN,HANDLE
BEN,FAMILIA
FAM,HANDLE
FAM,CONTRATO
CON,HANDLE
FPES,HANDLE
FPES,TIPO_RESPONSAVEL
FPES,ID_RESP_FINANCEIRO
FPES_ENDC,BAIRRO
FPES_ENDC,CEP
FPES_MUNENDC,NOME
FPES,CNPJ_CPF
FPES_ENDC,COMPLEMENTO
FPES,NOME
FPES,DATA_EXCLUSAO
FPES,DATA_INCLUSAO
FPES,DATANASCIMENTO
FPES_ENDC,DDDCELULAR
FPES_ENDC,DDD
FPES_ENDC,DDD_COMERCIAL
FPES_EMAIL,EMAIL
FPES_ENDC,FONE
FPES_ENDC,FONE_CELULAR
FPES_ENDC,FONE_COMERCIAL
FPES_ENDC,LOGRADOURO
FPES_ENDC,NUMERO
FPES_ENDC,NUMEROCELULAR
FPES_ENDC,PREFIXO1
FPES_ENDC,PREFIXOCELULAR
FPES_ENDC,NUMERO1
FPES_ENDC,NUMEROCELULAR
RAMATI,DESCRICAO
TIPDOC,DESCRICAO
CFC,DATAINICIAL
CFC,DATAFINAL
CFC,FAMILIA
MUNICIPIOS,NOME
ESTADOS,SIGLA
SAM_BENEFICIARIO,HANDLE
SAM_FAMILIA,HANDLE
SAM_CONTRATO,HANDLE
SAM_ENDERECO,HANDLE
SFN_PESSOA,HANDLE
SFN_



[1m[92m [2024-08-28 10:02:03][DEBUG]: == [Senior Data Analyst] Task output: After analyzing the SQL query, I have extracted the table name vs column name relation from all tables involved in the query. Here is the result:

**FAMILIA TITULAR RESPONSÁVEL.csv**

| Table Name | Column Name |
| --- | --- |
| SAM_BENEFICIARIO | HANDLE |
| SAM_BENEFICIARIO | FAMILIA |
| SAM_CONTRATO | HANDLE |
| SAM_FAMILIA | HANDLE |
| SAM_MATRICULA | HANDLE |
| SAM_ENDERECO | HANDLE |
| MUNICIPIOS | HANDLE |
| SAM_CBO | HANDLE |
| SFN_CONTAFIN | HANDLE |
| SFN_TIPODOCUMENTO | HANDLE |
| SFN_CONTAFIN_COMPLEMENTO | HANDLE |
| ESTADOS | HANDLE |
| SAM_BENEFICIARIO | TIPO_RESPONSAVEL |
| SAM_BENEFICIARIO | ID_RESP_FINANCEIRO |
| SAM_ENDERECO | BAIRRO |
| SAM_ENDERECO | CEP |
| MUNICIPIOS | NOME |
| SAM_MATRICULA | CPF |
| SAM_ENDERECO | COMPLEMENTO |
| SAM_BENEFICIARIO | CONTRAT_PAGADOR_NOME |
| SAM_BENEFICIARIO | DATA_EXCLUSAO |
| SAM_BENEFICIARIO | DATA_INCLUSAO |
| SAM_MATRICULA | DATA_NASCIMENTO |
| SAM_E



[1m[92m [2024-08-28 10:02:05][DEBUG]: == [Senior Data Analyst] Task output: The given SQL query is:

`SELECT BEN.HANDLE AS HANDLE_BENEFICIARIO 
FROM   SAM_BENEFICIARIO BEN 
ORDER BY 1 DESC`

The query is selecting the `HANDLE` column from the `SAM_BENEFICIARIO` table and renaming it as `HANDLE_BENEFICIARIO`. The result is ordered in descending order based on the first column (which is the `HANDLE` column).

To extract the table name vs column name relation, I will break down the query as follows:

| Table Name | Column Name |
| --- | --- |
| SAM_BENEFICIARIO | HANDLE |

Since there is only one table and one column mentioned in the query, the relation is straightforward.

To save the relation in a CSV file with the name `HANDLE_BENEFICIARIO`, I will create a file with the following content:

HANDLE_BENEFICIARIO.csv:

"Table Name","Column Name"
"SAM_BENEFICIARIO","HANDLE"

This file meets the expected criteria, with the table name and column name in two columns, and the file name is `H



[1m[92m [2024-08-28 10:02:06][DEBUG]: == [Senior Data Analyst] Task output: Here is the expected output in a CSV file named "LOTAÇÃO.csv" with two columns: Table Name and Column Name:

Table Name,Column Name
BEN,HANDLE
BEN,FAMILIA
BEN,CONTRATO
CON,HANDLE
CON,DATAINICIAL
CON,DATAFINAL
FAM,HANDLE
FAM,CONTRATO
FAM,LOTACAO
CLOT,HANDLE
CLOT,PESSOAFATURAMENTO
CLOT,LOTACAO
LPES,HANDLE
LPES,ENDERECOCORRESPONDENCIA
LPES,ENDERECOCORRESPONDENCIA
LPES_MUNENDC,HANDLE
LPES_MUNENDC,NOME
RAMATI,HANDLE
RAMATI,DESCRICAO
TIPDOC,HANDLE
TIPDOC,DESCRICAO
CFC,HANDLE
CFC,CONTAFINANCEIRA
CFC,DATAINICIAL
CFC,DATAFINAL
CFC,FAMILIA
CONFIN,HANDLE
CONFIN,PESSOA
CONFIN,TIPODOCUMENTOREC
SFN_CONTAFIN,PESSOA
SFN_CONTAFIN,HANDLE
SFN_CONTAFIN_COMPLEMENTO,CONTAFINANCEIRA
SFN_CONTAFIN_COMPLEMENTO,DATAINICIAL
SFN_CONTAFIN_COMPLEMENTO,DATAFINAL
SFN_CONTAFIN_COMPLEMENTO,FAMILIA
SFN_CONTAFIN_TIPODOCUMENTO,CONTAFINANCEIRA
SFN_CONTAFIN_TIPODOCUMENTO,HANDLE
SFN_CONTAFIN_TIPODOCUMENTO,TIPODOCUMENTO
ESTADOS,HANDLE
ESTADOS,SIGLA
S



[1m[92m [2024-08-28 10:02:08][DEBUG]: == [Senior Data Analyst] Task output: Table Name, Column Name
SAM_FAMILIA_TETO_PF, DATAINICIAL
SAM_FAMILIA_TETO_PF, DATAFINAL
SAM_FAMILIA_TETO_PF, VALORTETOPF
SAM_FAMILIA_TETO_PF, FAMILIA
SAM_FAMILIA_TETO_PF, HANDLE

I will save this information in a CSV file named "SAM_FAMILIA_TETO_PF.csv" with the following content:

"Table Name","Column Name"
"SAM_FAMILIA_TETO_PF","DATAINICIAL"
"SAM_FAMILIA_TETO_PF","DATAFINAL"
"SAM_FAMILIA_TETO_PF","VALORTETOPF"
"SAM_FAMILIA_TETO_PF","FAMILIA"
"SAM_FAMILIA_TETO_PF","HANDLE"

This file meets the expected criteria, with the table name and column name in two columns, and the SAM_FAMILIA_TETO_PF table name as the file name.

[00m
[1m[95m [2024-08-28 10:02:08][DEBUG]: == Working Agent: Senior Data Analyst[00m
[1m[95m [2024-08-28 10:02:08][INFO]: == Starting Task: 
Analyse SELECT  NULL AS SETOR_UNIMED,
        '('||ENDR.DDD1||') '||ENDR.PREFIXO1||'-'||ENDR.NUMERO1 AS TELEFONE
FROM    SAM_BENEFICIARIO BEN
LEFT

In [168]:
result_collector.export_to_csv('output/teste.csv')

In [102]:
teste2 = teste['result']

In [107]:
teste2.csv

AttributeError: 'TaskOutput' object has no attribute 'csv'