In [1]:
import xml.etree.ElementTree as ET
import logging
import os
from typing import Any, Dict, List, Tuple
import pandas as pd
import pygraphviz as pgv
from langchain_ollama.llms import OllamaLLM
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnableSequence

In [2]:
pentaho_file_path = '../data/benef_transf.ktr'

In [3]:
class SQLAnalysisAgent:
    def __init__(self, llm: Any):
        self.llm = llm

    def analyze(self, queries: List[Dict[str, str]]) -> List[Dict[str, str]]:
        prompt_template = PromptTemplate(
            input_variables=["sql_content"],
            template="""
            Analyze the following SQL query and provide insights on:
            1. The purpose of the query
            2. Tables and columns used
            3. Any joins or complex operations
            4. Potential optimizations
            5. Suggested improvements for a data warehouse setting

            SQL Query:
            {sql_content}

            Provide your analysis in a structured format.
            """
        )

        chain = RunnableSequence(prompt_template, self.llm)
        analysis_results = []
        for query in queries:
            response = chain.invoke({"sql_content": query['sql']})
            analysis_results.append({
                "step_name": query['step_name'],
                "analysis": response
            })
        
        return analysis_results

class WorkflowAnalysisAgent:
    def __init__(self, llm: Any):
        self.llm = llm

    def analyze(self, sequence: List[tuple], queries: List[Dict[str, str]], sql_analysis: List[Dict[str, str]]) -> str:
        prompt_template = PromptTemplate(
            input_variables=["sequence", "queries", "sql_analysis"],
            template="""
            Analyze the following workflow and provide insights on:
            1. The overall purpose of the workflow
            2. The sequence of operations
            3. Data flow between steps
            4. Potential bottlenecks or inefficiencies
            5. Suggested improvements for the workflow structure

            Workflow Sequence:
            {sequence}

            Queries:
            {queries}

            SQL Analysis:
            {sql_analysis}

            Provide your analysis in a structured format.
            """
        )

        chain = RunnableSequence(prompt_template, self.llm)
        response = chain.invoke({
            "sequence": str(sequence),
            "queries": str(queries),
            "sql_analysis": str(sql_analysis)
        })
        
        return response

class DocumentationAgent:
    def __init__(self, llm: Any):
        self.llm = llm

    def generate(self, workflow_analysis: str, sql_analysis: List[Dict[str, str]]) -> str:
        prompt_template = PromptTemplate(
            input_variables=["workflow_analysis", "sql_analysis"],
            template="""
            Create a comprehensive markdown document that includes:
            1. Executive Summary
            2. Workflow Overview
            3. Detailed Step Analysis
            4. SQL Query Documentation
            5. Identified Issues and Bottlenecks
            6. Recommendations for Improvement

            Use the following information:

            Workflow Analysis:
            {workflow_analysis}

            SQL Analysis:
            {sql_analysis}

            Generate a well-structured markdown document.
            """
        )

        chain = RunnableSequence(prompt_template, self.llm)
        response = chain.invoke({
            "workflow_analysis": workflow_analysis,
            "sql_analysis": str(sql_analysis)
        })
        
        return response

class ProjectPlanningAgent:
    def __init__(self, llm: Any):
        self.llm = llm

    def plan(self, documentation: str) -> str:
        prompt_template = PromptTemplate(
            input_variables=["documentation"],
            template="""
            Based on the provided documentation, create a project plan to recreate and improve the workflow:
            1. Identify key objectives for the new project
            2. Outline the main phases of the project
            3. Suggest a new data warehouse structure with dimension and fact tables
            4. Propose improvements for each step of the workflow
            5. Recommend technologies and best practices to be used
            6. Outline a testing and validation strategy

            Documentation:
            {documentation}

            Provide a detailed project plan in markdown format.
            """
        )

        chain = RunnableSequence(prompt_template, self.llm)
        response = chain.invoke({"documentation": documentation})
        
        return response

# Classe principal do Agente
class Agent:
    def __init__(self, model: Any, file_path: str):
        self.file_path = file_path
        self.llm = model
        
        ktr_filename = os.path.basename(file_path)
        self.markdown_filename = os.path.splitext(ktr_filename)[0] + ".md"
        
        self.sql_analysis_agent = SQLAnalysisAgent(model)
        self.workflow_analysis_agent = WorkflowAnalysisAgent(model)
        self.documentation_agent = DocumentationAgent(model)
        self.project_planning_agent = ProjectPlanningAgent(model)

    def run(self):
        state = {}

        # Parse do arquivo KTR
        state = self.parse_ktr_file(state)

        # Extração da sequência de execução
        state = self.extract_execution_sequence(state)

        # Extração das consultas SQL
        state = self.extract_sql_queries(state)

        # Análise das consultas SQL
        state = self.analyze_sql(state)

        # Análise do workflow
        state = self.analyze_workflow(state)

        # Geração de documentação
        state = self.generate_documentation(state)

        # Planejamento do projeto
        state = self.plan_project(state)

        # Exporta a documentação para arquivo Markdown
        self.export_markdown_to_file(state)

    def parse_ktr_file(self, state: Dict[str, Any]) -> Dict[str, Any]:
        try:
            tree = ET.parse(self.file_path)
            state["root"] = tree.getroot()
        except ET.ParseError as e:
            logging.error(f"Error parsing KTR file: {e}")
            raise
        return state

    def extract_execution_sequence(self, state: Dict[str, Any]) -> Dict[str, Any]:
        hops = []
        for hop in state["root"].findall('.//hop'):
            from_step = hop.find('from').text
            to_step = hop.find('to').text
            enabled = hop.find('enabled').text
            if enabled == 'Y':
                hops.append((from_step, to_step))
        
        state["sequence"] = hops
        return state

    def extract_sql_queries(self, state: Dict[str, Any]) -> Dict[str, Any]:
        active_steps = set(step for seq in state["sequence"] for step in seq)
        sql_steps = state["root"].findall(".//step")
        queries = []
        
        for step in sql_steps:
            step_name = step.find("name").text
            step_type = step.find("type").text
            if step_type in ['TableInput', 'DBJoin'] and step_name in active_steps:
                sql_element = step.find("sql")
                if sql_element is not None and sql_element.text:
                    queries.append({
                        "step_name": step_name,
                        "step_type": step_type,
                        "sql": sql_element.text
                    })
        
        state["queries"] = queries
        return state

    def analyze_sql(self, state: Dict[str, Any]) -> Dict[str, Any]:
        state["sql_analysis"] = self.sql_analysis_agent.analyze(state["queries"])
        return state

    def analyze_workflow(self, state: Dict[str, Any]) -> Dict[str, Any]:
        state["workflow_analysis"] = self.workflow_analysis_agent.analyze(
            state["sequence"], state["queries"], state["sql_analysis"]
        )
        return state

    def generate_documentation(self, state: Dict[str, Any]) -> Dict[str, Any]:
        state["documentation"] = self.documentation_agent.generate(
            state["workflow_analysis"], state["sql_analysis"]
        )
        return state

    def plan_project(self, state: Dict[str, Any]) -> Dict[str, Any]:
        state["project_plan"] = self.project_planning_agent.plan(state["documentation"])
        state["documentation"] += "\n\n## Project Plan for Improvement\n\n" + state["project_plan"]
        return state    

    def export_markdown_to_file(self, state: Dict[str, Any]):
        with open(self.markdown_filename, 'w') as file:
            file.write(state["documentation"])

In [4]:
model = OllamaLLM(model='qwen2:latest', temperature=0.0)

In [5]:
tool = []

In [6]:
prompt = ""

In [7]:
abot = Agent(model=model, file_path=pentaho_file_path)

In [8]:
abot.run()