### Test 1: Maketing Data

In [4]:
import json
import pandas as pd
import numpy as np
import os
from typing import Dict, Any, List, Union
from dotenv import load_dotenv
import pprint
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from rich import print as rprint

# Load environment variables from .env file
load_dotenv()

from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langgraph.graph import StateGraph, START, END

# Import all the functions from your original file
# Assuming your original file is named data_analyzer.py
from domain_detector_v1 import (
    NumpyEncoder, llm, build_profile, profile,
    domain_node, concept_node, analysis_node, eval_node, reflect_node,
    domain_chain, concept_chain, analysis_chain, eval_chain, reflect_chain,
    success, MAX_ITERS
)

# Create a rich console for prettier output
console = Console()

# Create enhanced node functions that log their inputs and outputs

def log_node_execution(node_name, input_state, output_state):
    """Log the execution of a node with input and output states."""
    console.rule(f"[bold cyan]{node_name} Node")
    
    # Format the input and output for display
    if node_name == "domain":
        # For domain node, show only relevant parts
        console.print(f"[bold green]Input:[/bold green]")
        if input_state.get("memory") and input_state.get("memory") != "None":
            console.print(f"Memory: {input_state['memory']}")
        else:
            console.print("No memory input")
        
        console.print(f"\n[bold green]Output:[/bold green]")
        if "domain_info" in output_state:
            domain_info = output_state["domain_info"]
            domain_table = Table(show_header=True)
            domain_table.add_column("Field", style="bold blue")
            domain_table.add_column("Value")
            domain_table.add_row("Domain", str(domain_info.get("domain", "N/A")))
            domain_table.add_row("Definition", str(domain_info.get("definition", "N/A")))
            domain_table.add_row("Wiki URL", str(domain_info.get("wiki_url", "N/A")))
            console.print(domain_table)
    
    elif node_name == "concept":
        console.print(f"[bold green]Input:[/bold green]")
        console.print(f"Domain: {json.dumps(input_state.get('domain_info', {}), indent=2)}")
        if input_state.get("memory") and input_state.get("memory") != "None":
            console.print(f"Memory: {input_state['memory']}")
        
        console.print(f"\n[bold green]Output:[/bold green]")
        try:
            concepts = json.loads(output_state.get("concepts", "[]"))
            console.print(Panel(str(concepts), title="Concepts"))
        except:
            console.print(f"Concepts (raw): {output_state.get('concepts', 'N/A')}")
    
    elif node_name == "analysis":
        console.print(f"[bold green]Input:[/bold green]")
        console.print(f"Domain: {json.dumps(input_state.get('domain_info', {}), indent=2)}")
        console.print(f"Concepts: {input_state.get('concepts', 'N/A')}")
        if input_state.get("memory") and input_state.get("memory") != "None":
            console.print(f"Memory: {input_state['memory']}")
        
        console.print(f"\n[bold green]Output:[/bold green]")
        try:
            analysis = json.loads(output_state.get("analysis", "{}"))
            console.print(Panel(json.dumps(analysis, indent=2), title="Analysis"))
        except:
            console.print(f"Analysis (raw): {output_state.get('analysis', 'N/A')}")
    
    elif node_name == "eval":
        console.print(f"[bold green]Input:[/bold green]")
        console.print(f"Domain: {json.dumps(input_state.get('domain_info', {}), indent=2)}")
        console.print(f"Concepts: {input_state.get('concepts', 'N/A')}")
        console.print(f"Analysis: {input_state.get('analysis', 'N/A')[:200]}...")
        
        console.print(f"\n[bold green]Output:[/bold green]")
        if "scores" in output_state:
            scores = output_state["scores"]
            eval_table = Table(show_header=True)
            eval_table.add_column("Metric", style="bold blue")
            eval_table.add_column("Score")
            
            for metric, score in scores.items():
                color = "green" if score >= 4 else ("yellow" if score >= 3 else "red")
                eval_table.add_row(metric, f"[{color}]{score}[/{color}]")
            
            console.print(eval_table)
            console.print(f"Evaluation reason: {output_state.get('evaluation', 'N/A')}")
            console.print(f"Domain OK: {output_state.get('domain_ok', False)}")
            console.print(f"Concepts OK: {output_state.get('concepts_ok', False)}")
    
    elif node_name == "reflect":
        console.print(f"[bold green]Input:[/bold green]")
        console.print(f"Evaluation: {input_state.get('evaluation', 'N/A')}")
        scores_str = json.dumps(input_state.get('scores', {}), indent=2)
        console.print(f"Scores: {scores_str}")
        if input_state.get("memory") and input_state.get("memory") != "None":
            console.print(f"Previous memory: {input_state['memory']}")
        
        console.print(f"\n[bold green]Output:[/bold green]")
        console.print(f"New memory: {output_state.get('memory', 'N/A')}")
        console.print(f"Iteration: {output_state.get('iteration', 0)}")
    
    console.rule()

# Wrap the existing node functions to include logging
def logged_domain_node(state):
    output = domain_node(state)
    log_node_execution("domain", state, output)
    return output

def logged_concept_node(state):
    output = concept_node(state)
    log_node_execution("concept", state, output)
    return output

def logged_analysis_node(state):
    output = analysis_node(state)
    log_node_execution("analysis", state, output)
    return output

def logged_eval_node(state):
    output = eval_node(state)
    log_node_execution("eval", state, output)
    return output

def logged_reflect_node(state):
    output = reflect_node(state)
    log_node_execution("reflect", state, output)
    return output

# Rebuild the graph with the logged versions of the nodes
def build_logged_graph():
    builder = StateGraph(dict)
    builder.add_node("domain", logged_domain_node)
    builder.add_node("concept", logged_concept_node)
    builder.add_node("analysis", logged_analysis_node)
    builder.add_node("eval", logged_eval_node)
    builder.add_node("reflect", logged_reflect_node)

    builder.add_edge(START, "domain")
    builder.add_edge("domain", "concept")
    builder.add_edge("concept", "analysis")
    builder.add_edge("analysis", "eval")

    # Conditional branching after eval
    def decide_next(state):
        scores = state.get("scores", {})
        iteration = state.get("iteration", 0)
        
        console.print(f"[bold magenta]Decision point:[/bold magenta] Iteration {iteration}")
        
        # Force stop after MAX_ITERS iterations
        if iteration >= MAX_ITERS:
            console.print(f"[bold red]Reached max iterations ({MAX_ITERS}), stopping.[/bold red]")
            return END
        
        # Stop if all scores are good enough
        if all(v >= 4 for v in scores.values()):
            console.print("[bold green]All scores are excellent, stopping.[/bold green]")
            return END
        
        # Otherwise continue with reflect
        console.print("[bold yellow]Scores need improvement, continuing to reflection...[/bold yellow]")
        return "reflect"

    builder.add_conditional_edges("eval", decide_next)
    builder.add_edge("reflect", "concept")  # skip domain if domain_fixed true inside concept logic

    return builder.compile()

# Run the graph with detailed logging
def run_logged_analysis(csv_path=None):
    console.print("[bold]Starting Analysis Pipeline with Detailed Logging[/bold]", style="blue on white")
    
    # If csv_path is provided, build a new profile, otherwise use the existing one
    if csv_path:
        prof = build_profile(csv_path)
    else:
        prof = profile
    
    # Build the logged graph
    graph = build_logged_graph()
    
    # Create initial state
    initial_state = {
        "profile": prof,
        "memory": "None",
        "iteration": 0,
        "domain_fixed": False
    }
    
    console.print("\n[bold]Initial State:[/bold]")
    console.print(f"Profile loaded with {prof['raw']['n_rows']} rows and {prof['raw']['n_cols']} columns")
    
    # Execute the graph
    console.rule("[bold]Starting Graph Execution[/bold]")
    result = graph.invoke(initial_state)
    
    # Print final results
    console.rule("[bold green]Final Results[/bold green]")
    console.print("[bold]Final Scores:[/bold]")
    
    final_scores = result["scores"]
    scores_table = Table(show_header=True)
    scores_table.add_column("Metric", style="bold blue")
    scores_table.add_column("Score")
    
    for metric, score in final_scores.items():
        color = "green" if score >= 4 else ("yellow" if score >= 3 else "red")
        scores_table.add_row(metric, f"[{color}]{score}[/{color}]")
    
    console.print(scores_table)
    
    console.print("\n[bold]Final Analysis:[/bold]")
    try:
        analysis = json.loads(result["analysis"])
        console.print(Panel(json.dumps(analysis, indent=2), title="Analysis"))
    except:
        console.print(result["analysis"])
    
    return result

if __name__ == "__main__":
    # You can specify a custom CSV path here, or leave it as None to use the existing profile
    result = run_logged_analysis()
    
    # You can also save the final result to a file
    with open("analysis_result.json", "w") as f:
        json.dump(json.loads(result["analysis"]), f, indent=2)
    
    console.print("[bold green]Analysis complete! Results saved to analysis_result.json[/bold green]")

GraphRecursionError: Recursion limit of 25 reached without hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/GRAPH_RECURSION_LIMIT