# Imports

In [1]:
"""
Main entry point for the Enhanced PowerBI Agent.
"""

import warnings
import os

# Suppress all warnings at the beginning of the script
warnings.filterwarnings("ignore")

# Suppress urllib3 warnings
os.environ["PYTHONWARNINGS"] = "ignore:Unverified HTTPS request"

# Import libraries after suppressing warnings
from langchain.agents import AgentExecutor, ZeroShotAgent, Tool
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory

from custom_llm import CustomGPT
from config import AGENT_PREFIX, KNOWN_TABLES
from Prompts.few_shot_examples import get_few_shot_examples

from Tools.info_powerbi_tool import InfoPowerBITool
from Tools.list_powerbi_tool import ListPowerBITool
from Tools.schema_powerbi_tool import SchemaPowerBITool
from Tools.query_powerbi_tool import QueryPowerBITool

# PowerBI Agent Configuration

In [2]:
def create_powerbi_agent():
    """Create and configure the enhanced PowerBI agent."""

    # Initialize the custom LLM
    llm = CustomGPT()

    # Initialize tools
    tools = [
        InfoPowerBITool(),
        ListPowerBITool(),
        SchemaPowerBITool(),
        QueryPowerBITool()
    ]

    # Convert tools to a format ZeroShotAgent can use
    tool_names = [tool.name for tool in tools]

    # Create a separate string for few-shot examples to avoid template issues
    few_shot_examples = get_few_shot_examples()

    # Define the prefix (system message) for the agent
    prefix = f"""
    {AGENT_PREFIX}

    You are an advanced PowerBI data analyst specializing in complex queries and data analysis. Your expertise lies in understanding 
    star schema architectures with fact and dimension tables, and constructing appropriate joins to answer user questions.

    IMPORTANT CONTEXT ABOUT THE DATA MODEL:
    - The dataset follows a star schema with fact tables that contain measures and dimension tables that contain attributes.
    - To answer most user questions, you'll need to join tables using the appropriate keys.
    - Common business terms may not match database column names exactly. For example, "product" might be stored as "PRD_ID".
    - Always check table schemas and relationships before constructing queries.
    - Financial data is primarily stored in fact tables with monetary values in columns like "MONTANT".

    REASONING APPROACH - MATCH YOUR EXPLORATION DEPTH TO QUESTION COMPLEXITY:
    1. For simple questions (like "list tables", "show schema", "give me counts"), use minimal tool calls and return results immediately.
    2. For complex analytical questions, follow this more detailed approach:
       a. UNDERSTAND: First, understand what information the user is looking for and what business entities are involved.
       b. EXPLORE: Use info_powerbi_tool and schema_powerbi_tool to understand the relevant tables and their relationships.
       c. PLAN: Determine which tables need to be joined and which columns to use in the query.
       d. EXECUTE: Construct and run the appropriate query with necessary filters and calculations.
       e. EXPLAIN: Explain the results in business terms the user will understand.

    For complex queries involving multiple tables:
    - First identify the fact tables that contain the measures you need
    - Then identify the dimension tables needed for filtering or grouping
    - Use the relationship information to correctly join these tables
    - Apply appropriate filters and aggregations in your query

    DAX QUERY EXAMPLES:
    - To get distinct values from a column: EVALUATE VALUES(TableName[ColumnName])
    - To filter data: EVALUATE FILTER(TableName, TableName[Column] = "Value")
    - To get the top N records: EVALUATE TOPN(10, TableName)
    - To calculate a sum: EVALUATE SUMMARIZECOLUMNS(TableName[GroupColumn], "Total", SUM(TableName[AmountColumn]))
    
    When using query_powerbi_tool, always use DAX syntax, not SQL.
    """

    # Combine the prefix, examples, and tools list - ensuring examples are properly inserted
    full_prefix = prefix + "\n" + few_shot_examples + "\n\nYou have access to the following tools:"

    # Define the agent format instructions with strict guidance on format
    format_instructions = """Use the following format:

    Question: the input question you must answer
    Thought: you should always think about what to do
    Action: the action to take, should be one of [{tool_names}]
    Action Input: the input to the action
    Observation: the result of the action
    ... (this Thought/Action/Action Input/Observation can repeat N times)
    Thought: I now know the final answer
    Final Answer: the final answer to the original input question

    IMPORTANT EFFICIENCY RULES:
    - If you have all information needed to answer the question after initial tool call(s), proceed immediately to Final Answer.
    - Only perform additional exploratory tool calls if absolutely necessary to answer the question.
    - Do not run schema_powerbi_tool on tables unless you need their schema to answer the current question.
    - Match your exploration depth to question complexity - simple questions require minimal tool usage.

    IMPORTANT FORMAT RULES:
    - After each "Thought:", you MUST ALWAYS follow with either:
      - "Action:" (to use a tool), OR
      - "Final Answer:" (to provide the final response)
    - NEVER write a Thought without following it with either an Action or Final Answer.
    - ALWAYS use "Final Answer:" when you have the information needed to answer the user's question.

    NEVER invent or answer questions the user didn't ask.
    ALWAYS stay focused on the exact question the user is asking.
    """

    # Create the prompt template
    prompt = ZeroShotAgent.create_prompt(
        tools,
        prefix=full_prefix,
        format_instructions=format_instructions,
        input_variables=["input", "chat_history", "agent_scratchpad"]
    )

    # Create the memory
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Create the LLM chain
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        llm_chain = LLMChain(llm=llm, prompt=prompt)

    # Create the agent
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)

    # Create the agent executor with increased max_iterations for complex queries
    agent_executor = AgentExecutor.from_agent_and_tools(
        agent=agent,
        tools=tools,
        memory=memory,
        verbose=True,
        max_iterations=8,  # Increased to allow for more complex reasoning chains
        handle_parsing_errors=True,
        early_stopping_method="force",  # Force stopping on parsing errors to avoid infinite loops
    )

    return agent_executor

# PowerBI Agent Initialization

In [3]:
def main():
    # Create the agent
    agent = create_powerbi_agent()

    print("🤖 Your personal PowerBI Agent is initialized and Ready to Go ! ")
    print("📋 Ask questions about your PowerBI dataset or request analysis.")
    print("👋🏽 Type 'exit' to quit.\n")

    while True:
        user_input = input("• Question: ")

        if user_input.lower() == "exit":
            print("Goodbye! 👋🏽")
            break

        try:
            response = agent.invoke({"input": user_input})
            print(f"\nAnswer: {response['output']}\n")
        except Exception as e:
            print(f"Error: {str(e)}")
            print("\nAnswer: I encountered an issue processing your request. Please try rephrasing your question or ask something else.\n")

# 🤖 Agent Capabilities by Complexity Level

The Power BI agent is currently under development and continues to improve. Its ability to answer questions varies based on the complexity of the query. As complexity increases, more training and refinement may be required for optimal performance.

### Level 1: Basic Schema Understanding
**Description**: Straightforward questions about dataset structure.
**Examples**:
- *List all tables in the current dataset.*
- *Provide the schema of the table `GL`.*

### Level 2: Direct Data Retrieval with Explicit References
**Description**: Queries that reference specific tables and column names.
**Examples**:
- *List some `Sous BU` values from the `GL` table.*

### Level 3: Semantic Retrieval with Partial Technical Knowledge
**Description**: Queries that omit exact column names but imply intent.
**Examples**:
- *Which column in `GL` represents revenue? Show some sample values.*
- *List a few products from the `GL` table.*

### Level 4: Natural Language Queries on a Single Table
**Description**: Fully natural language queries referring to a single table without technical terms.
**Example**:
- *What is the total revenue of product `P231` for the year 2024?*

### Level 5: Complex Analytical Queries with Joins
**Description**: Natural language questions requiring data aggregation and joins across multiple tables.
**Example**:
- *Rank the top 3 best-selling products in the `Digital Solutions` Sous BU, where `Rangnar` was the sole client in 2026.*

---

### 🚧 Future Development
The agent's capabilities will expand to better handle high-complexity queries (Level 4 and 5) through improved semantic understanding and more robust training on real-world query examples.


# Testing

In [None]:
main()

🤖 Your personal PowerBI Agent is initialized and Ready to Go ! 
📋 Ask questions about your PowerBI dataset or request analysis.
👋🏽 Type 'exit' to quit.



• Question:  list all the tables in the current dataset




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should start by listing all the available tables in the current dataset to provide the user with the information they requested.

Action: list_powerbi_tool
Action Input: List all tables in the dataset[0m
Observation: [33;1m[1;3mAvailable tables in the PowerBI dataset:

- INDICATEURS
- DIM_CLIENT
- DIM_DATE
- DIM_SOCIETE
- GL
- MAPPING_ACTIVITE
- MAPPING_CDR
- MAPPING_COMPTE
- MAPPING_PRODUIT
- MAPPING_PROJET_CDR
- MAPPING_PROJECT_MULTIBU
[0m
[32;1m[1;3mI now have the list of all available tables in the current dataset.

Final Answer: The available tables in the current PowerBI dataset are:

- INDICATEURS
- DIM_CLIENT
- DIM_DATE
- DIM_SOCIETE
- GL
- MAPPING_ACTIVITE
- MAPPING_CDR
- MAPPING_COMPTE
- MAPPING_PRODUIT
- MAPPING_PROJET_CDR
- MAPPING_PROJECT_MULTIBU[0m

[1m> Finished chain.[0m

Answer: The available tables in the current PowerBI dataset are:

- INDICATEURS
- DIM_CLIENT
- DIM_DATE
- DIM_SOCIETE
- GL
- MAPP

• Question:  List some Sous BU values from the GL table




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe user is asking for a list of some "Sous BU" values from the GL table. To answer this question, we need to extract distinct values from the "Sous BU" column in the GL table.

Action: query_powerbi_tool
Action Input: EVALUATE VALUES(GL[Sous BU])[0m
Observation: [36;1m[1;3mQuery results:

{'results': [{'GL[Sous BU]': None}, {'GL[Sous BU]': 'BPO – Back Office'}, {'GL[Sous BU]': 'Digital Solutions'}, {'GL[Sous BU]': 'BPO – Cheque et Encaissement'}, {'GL[Sous BU]': 'BPO – Paynum'}, {'GL[Sous BU]': 'Contralia'}, {'GL[Sous BU]': 'E Consentement'}, {'GL[Sous BU]': 'Identite Numerique'}, {'GL[Sous BU]': 'ID360'}, {'GL[Sous BU]': 'T2IS'}, {'GL[Sous BU]': 'Gamme RH'}, {'GL[Sous BU]': 'E Vote/ BDES'}, {'GL[Sous BU]': 'Carnet Numerique du Logement BPO'}, {'GL[Sous BU]': 'Digital_Booster_Sofadev'}, {'GL[Sous BU]': 'Hubmail (BPO)'}, {'GL[Sous BU]': 'Agoria'}, {'GL[Sous BU]': 'Softeam (BU)'}, {'GL[Sous BU]': 'Digital_Booster_Softeam'},

• Question:  What is the total revenue of product P231 for the year 2024 of the table GL




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To answer the question, I need to calculate the total revenue of product P231 for the year 2024 from the GL table. First, I should confirm if the GL table contains the necessary columns for "PRODUIT", "EXERCICE", and "MONTANT".

Action: schema_powerbi_tool
Action Input: GL[0m
Observation: [38;5;200m[1;3mSchema for table 'GL':

Columns:
- GL[EXERCICE] (number)
- GL[MOIS] (number)
- GL[Date Comptable] (string)
- GL[SOCIETE] (string)
- GL[Num_Tiers] (string)
- GL[CDR] (string)
- GL[CDR_LIB] (string)
- GL[ACTIVITE] (string)
- GL[ACTIVITE_LIB] (string)
- GL[PROJET] (string)
- GL[PROJET_LIB] (string)
- GL[PRODUIT] (string)
- GL[PRODUIT_LIB] (string)
- GL[Client] (string)
- GL[Client_Lib] (string)
- GL[COMPTE_ANALYTIQUE] (string)
- GL[COMPTE_LIBELLE] (string)
- GL[MONTANT] (number)
- GL[Contrepartie] (string)
- GL[Raison_Sociale] (string)
- GL[Siret_Tiers] (string)
- GL[G_Docaposte] (string)
- GL[G_Softeam] (string)
- GL

Sample of questions : 
list all the tables in the current dataset
List some Sous BU values from the GL table
What is the total revenue of product P231 for the year 2024 of the table GL