In [1]:
import asyncio
import psutil
import os
import time
import threading
# Importe suas fun√ß√µes de teste
# from test_pipeline import test_full_workflow 

# --- Utilit√°rio de Formata√ß√£o ---
def format_bytes(size):
    power = 2**10
    n = 0
    power_labels = {0 : '', 1: 'KB', 2: 'MB', 3: 'GB', 4: 'TB'}
    while size > power:
        size /= power
        n += 1
    return f"{size:.2f} {power_labels[n]}"

# --- Classe Monitor de Mem√≥ria ---
class MemoryMonitor:
    def __init__(self, interval=0.1):
        self.interval = interval
        self.process = psutil.Process(os.getpid())
        self.running = False
        self.max_rss = 0
        self.start_rss = 0
        self.end_rss = 0
        self._thread = None

    def _monitor(self):
        while self.running:
            # RSS: Resident Set Size (Mem√≥ria RAM f√≠sica usada)
            current_rss = self.process.memory_info().rss
            if current_rss > self.max_rss:
                self.max_rss = current_rss
            time.sleep(self.interval)

    def start(self):
        self.start_rss = self.process.memory_info().rss
        self.max_rss = self.start_rss
        self.running = True
        self._thread = threading.Thread(target=self._monitor, daemon=True)
        self._thread.start()
        print(f"üìâ Mem√≥ria Inicial: {format_bytes(self.start_rss)}")

    def stop(self):
        self.running = False
        if self._thread:
            self._thread.join()
        self.end_rss = self.process.memory_info().rss
        
        print("\n" + "="*40)
        print(f"üìä RELAT√ìRIO DE MEM√ìRIA DO PROCESSO")
        print("="*40)
        print(f"üìâ Inicial:      {format_bytes(self.start_rss)}")
        print(f"üìà Final:        {format_bytes(self.end_rss)}")
        print(f"üöÄ PICO (Peak):  {format_bytes(self.max_rss)}")
        print(f"üíß Diferen√ßa:    {format_bytes(self.end_rss - self.start_rss)}")
        print("="*40)


In [10]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from app.tools.context_store import AnalysisContext 
from app.tools.metrics_agent_tools import get_dataset_health_check, query_anomalous_ids, run_ml_inference_pipeline, choose_emb_conf
from app.tools.data_tools import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:

orchestrator_tools = [
      query_mongo_requests, 
      query_sql_campaigns, 
      list_avaiable_datasets, 
      inspect_file_schema, 
      load_dataset_into_context, 
      check_context_status, 
      inspect_file_schema
]

async def test_full_workflow():
    print("\nüîπ --- INICIANDO TESTE DE INTEGRA√á√ÉO (SEM LLM) ---")

    print("\nTesting: 1. Orchestrator - Discovery (SQL)")

    try:
        # Tenta listar campanhas do Google
        campaigns_str = await query_sql_campaigns.ainvoke({"traffic_source": "google", "limit": 2})
        print(f"‚úÖ SQL Result: {campaigns_str}")
        
        # HACK PARA O TESTE:
        # Como n√£o temos o LLM para ler a string e escolher o hash, vamos pegar um hash 'fake' 
        # ou extrair da string se o banco estiver conectado.
        # Para este teste, vou assumir que voc√™ pegou um hash v√°lido do log acima.
        target_hash = "uw0qfu4a1r" 
        print(f"Usando Hash Alvo: {target_hash}")

    except Exception as e:
        print(f"‚ùå SQL Failed: {e}")
        return

    print("\nTesting: 2. Orchestrator - Ingestion (Mongo -> Context)")
    try:
        # Simula o carregamento

        status_msg = await query_mongo_requests.ainvoke({
            "hash": target_hash,
            # "hashes": campaigns_str, 
            "traffic_source": "google"
        })
        print(f"‚úÖ Load Result: {status_msg}")
        
        # VERIFICA√á√ÉO DE ESTADO (Crucial!)
        # Vamos espiar dentro do Singleton para ver se funcionou
        try:
            print("Status: ", AnalysisContext.get_status())
            # print(f"üîé VERIFICA√á√ÉO: Contexto cont√©m {len(df)} linhas. Colunas: {list(df.columns[:3])}...")
        except ValueError:
            print("‚ùå VERIFICA√á√ÉO FALHOU: Contexto est√° vazio!")
            return

    except Exception as e:
        print(f"‚ùå Mongo Load Failed: {e}")
        # SE VOC√ä N√ÉO TEM BANCO RODANDO AGORA, DESCOMENTE A LINHA ABAIXO PARA MOCKAR DADOS:
        # mock_data_loading() 
        return


    print("\nTesting: 3.1 Sub-agent - ML Execution")
    try:
        # O agente chama sem argumentos, pois pega do Contexto
        choosed_emb = choose_emb_conf.invoke({"traffic_source": "google", "df": AnalysisContext.get_data_from_mongo()}) 
        print(f"‚úÖ Inference Result: {choosed_emb}")
    except Exception as e:
        print(f"‚ùå Chosse embedding pipeline failed {e}")
        return
    
    print("\nTesting: 3.2 Sub-agent - ML Execution")
    try:
        # O agente chama sem argumentos, pois pega do Contexto
        inference_summary = run_ml_inference_pipeline.invoke({"emb_conf": choosed_emb}) 
        print(f"‚úÖ Inference Result: {inference_summary}")
    except Exception as e:
        print(f"‚ùå ML Pipeline Failed: {e}")
        return

    print("\nTesting: 4. Sub-agent - Health Check")
    try:
        health_stats = get_dataset_health_check.invoke({})
        print(f"‚úÖ Health Stats: {health_stats}")
    except Exception as e:
        print(f"‚ùå Health Check Failed: {e}")

    print("\nTesting: 5. Sub-agent - Query Anomalies")
    try:
        # Testa buscar IDs com baixa confian√ßa
        anomalies = query_anomalous_ids.invoke({"criteria": "low_trust", "threshold": 0.5})
        print(f"‚úÖ Found {len(anomalies)} anomalies. Sample IDs: {anomalies[:5]}")
    except Exception as e:
        print(f"‚ùå Query Failed: {e}")

    print("\nüîπ --- TESTE FINALIZADO ---")

# --- MOCK OPCIONAL (Se voc√™ n√£o tiver o Mongo rodando localmente) ---
def mock_data_loading():
    print("‚ö†Ô∏è MOCKING DATA LOADING...")
    data = {
        "id": range(100),
        "user_agent": ["Mozilla/5.0"] * 50 + ["Googlebot"] * 50,
        "url": ["/home"] * 100,
        "label": [1]*50 + [0]*50 # 1=Human, 0=Bot
    }
    df = pd.DataFrame(data)
    AnalysisContext.set_mongo_data(df, "google")
    print("‚úÖ Mock data loaded into Context.")

# --- Seu Wrapper de Teste ---
async def run_with_monitoring():
    monitor = MemoryMonitor(interval=0.1) 
    monitor.start()
    try:
        print("‚è≥ Executando Workflow...")
        await test_full_workflow() 
    finally:
        monitor.stop()

def check_gpu_memory():
    try:
        import torch
        if torch.cuda.is_available():
            print("\n RELAT√ìRIO GPU (VRAM)")
            print(f"Alocada: {format_bytes(torch.cuda.memory_allocated())}")
            print(f"Reservada: {format_bytes(torch.cuda.memory_reserved())}")
    except ImportError:
        pass

if __name__ == "__main__":
    await run_with_monitoring()

üìâ Mem√≥ria Inicial: 3.20 GB
‚è≥ Executando Workflow...

üîπ --- INICIANDO TESTE DE INTEGRA√á√ÉO (SEM LLM) ---

Testing: 1. Orchestrator - Discovery (SQL)
‚úÖ SQL Result: success=True campaigns=['yszqvqzj0c', '6b5w3e0qsv'] message=None
Usando Hash Alvo: uw0qfu4a1r

Testing: 2. Orchestrator - Ingestion (Mongo -> Context)
DEBUG [Context]: Mongo Data Loaded. Rows: 688
‚úÖ Load Result: success=True message="SUCCESS: Loaded 688 requests into AnalysisContext.\nSources: google | Hashes: 1\nAction Required: Delegate to 'Metrics Analyst' agent to run ML inference now." num_requests=688
Status:  Mongo Raw: 688 | ML Processed: Pending

Testing: 3.1 Sub-agent - ML Execution
G:/Meu Drive/TWR/data/google
[DEBUG] Model Path FASTTEXT: G:/Meu Drive/TWR/data/google/fasttext_google.model
<app.services.embedding_service.FastTextEmbedder object at 0x00000201D5C9F110>
Enter to Fasttext encoder


Criando Vocabul√°rio: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 688/688 [00:00<00:00, 4069.40it/s]


Using 11 out of 12 cores


Vetorizando: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 688/688 [00:06<00:00, 100.19it/s]


Finishing encoding
Metric val for fasttext: 0.3053992986679077
Score para fasttext: 0.3053992986679077


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 103/103 [00:00<00:00, 662.66it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


<app.services.embedding_service.TransformerEmbedder object at 0x00000201C58A5B50>
Enter to Transformers encoder


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 688/688 [00:00<00:00, 3316.79it/s]
Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 22/22 [00:21<00:00,  1.01it/s]


Finishing encoding
Metric val for transformers: 0.14213261008262634
Score para transformers: 0.14213261008262634
Melhor modelo: fasttext | Caminho/Config: G:/Meu Drive/TWR/data/google
[DEBUG] Model Path FASTTEXT: G:/Meu Drive/TWR/data/google/fasttext_google.model
‚úÖ Inference Result: {'emb_conf': 'fasttext', 'model_path': 'G:/Meu Drive/TWR/data/google'}

Testing: 3.2 Sub-agent - ML Execution
google
Embedding type:  fasttext
Enter to Fasttext encoder


Criando Vocabul√°rio: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 688/688 [00:00<00:00, 3407.79it/s]


Using 11 out of 12 cores


Vetorizando: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 688/688 [00:05<00:00, 117.00it/s]


Finishing encoding
‚è≥ [CACHE MISS] Instanciando e carregando MentorNetPredictor do disco: G:/Meu Drive/TWR/data/google/fasttext/mentor_net_bundle.pth...
DEBUG [Context]: ML Results Stored. Rows: 688
‚úÖ Inference Result: Inference completed using 'google' model with results: 
Models Accuracy: 0.9273255813953488Total Error in prediction (possible anomalies): 50Analyzed 688 samples.
You can now now:
1. Call 'get_dataset_health_check' to see overall performance stats.
2. Call 'query_anomalous_ids' to extract specific samples for the Detective Agent.

Testing: 4. Sub-agent - Health Check
‚úÖ Health Stats: {'total_samples': 688, 'false_positives': 44, 'false_negatives': 6, 'avg_trust': 0.8922286629676819}

Testing: 5. Sub-agent - Query Anomalies
Found 73 samples matching criteria 'low_trust' with threshold 0.5.
       weight      loss  target  pred  ncs
8    0.427834  0.077653     1.0   1.0  0.5
68   0.001892  0.987114     0.0   1.0  0.2
70   0.012552  0.724372     0.0   1.0  0.3
91   0.35

In [12]:
import operator
from typing import TypedDict, Annotated, List, Any
from langgraph.graph import StateGraph, END

class AgentState(TypedDict):
    traffic_source: str
    target_hash: str
    embedding_choice: Any  # Pode ser o dict ou a string que sua tool retorna
    health_stats: Any
    anomalies: List[str]
    status: str

# 2. DEFINI√á√ÉO DOS N√ìS (Cada n√≥ executa uma ou mais Tools)

async def node_discovery(state: AgentState):
    print("\n[Node: Discovery] Buscando campanhas...")
    traffic_source = state.get("traffic_source", "google")
    
    # Chama a Tool SQL
    campaigns_str = await query_sql_campaigns.ainvoke({"traffic_source": traffic_source, "limit": 2})
    print(f"   Campanhas encontradas: {campaigns_str}")
    
    # HACK: Simulando a extra√ß√£o do hash (No futuro, o LLM faria isso)
    extracted_hash = "uw0qfu4a1r" 
    
    return {"target_hash": extracted_hash, "status": "discovery_completed"}

async def node_ingestion(state: AgentState):
    print(f"\n[Node: Ingestion] üì• Carregando dados do Mongo (Hash: {state['target_hash']})...")
    
    # Chama a Tool do Mongo (Que internamente salva no AnalysisContext)
    status_msg = await query_mongo_requests.ainvoke({
        "hash": state["target_hash"],
        "traffic_source": state["traffic_source"]
    })
    
    print(f"   Status Mongo: {status_msg}")
    return {"status": "data_loaded_to_context"}

def node_ml_pipeline(state: AgentState):
    print("\n[Node: ML Pipeline] ü§ñ Executando infer√™ncia...")
    
    # 1. Chama a Tool para escolher o embedding (sem passar o DF, a tool pega do Contexto)
    choosed_emb = choose_emb_conf.invoke({"traffic_source": state["traffic_source"]})
    print(f"   Embedding selecionado: {choosed_emb}")
    
    # 2. Roda o pipeline de infer√™ncia com o embedding escolhido
    inference_summary = run_ml_inference_pipeline.invoke({"emb_conf": choosed_emb})
    print(f"   Resumo ML: {inference_summary}")
    
    return {"embedding_choice": choosed_emb, "status": "ml_execution_completed"}

def node_health_check(state: AgentState):
    print("\n[Node: Health Check] ü©∫ Avaliando integridade dos dados...")
    
    # Chama a tool de Health Check
    health_stats = get_dataset_health_check.invoke({})
    print(f"   Sa√∫de do Dataset: {health_stats}")
    
    return {"health_stats": health_stats, "status": "health_check_completed"}

def node_anomalies(state: AgentState):
    print("\n[Node: Anomalies] üö® Buscando IDs an√¥malos...")
    
    # Chama a tool de anomalias
    anomalies = query_anomalous_ids.invoke({"criteria": "low_trust", "threshold": 0.5})
    print(f"   Anomalias encontradas: {len(anomalies)}")
    
    return {"anomalies": anomalies, "status": "workflow_finished"}


# 3. CONSTRUINDO O GRAFO (Conectando os n√≥s)
workflow = StateGraph(AgentState)

# Adicionando os n√≥s ao grafo
workflow.add_node("discovery", node_discovery)
workflow.add_node("ingestion", node_ingestion)
workflow.add_node("ml_pipeline", node_ml_pipeline)
workflow.add_node("health_check", node_health_check)
workflow.add_node("anomalies", node_anomalies)

# Definindo a ordem de execu√ß√£o (Arestas/Edges)
workflow.set_entry_point("discovery")
workflow.add_edge("discovery", "ingestion")
workflow.add_edge("ingestion", "ml_pipeline")
workflow.add_edge("ml_pipeline", "health_check")
workflow.add_edge("health_check", "anomalies")
workflow.add_edge("anomalies", END)

# Compilando o aplicativo LangGraph
app = workflow.compile()

In [13]:
async def test_langgraph_workflow():
    inputs = {"traffic_source": "google"}
    
    print("‚è≥ Executando Grafo...")
    # O stream permite ver passo a passo em tempo real
    async for output in app.astream(inputs):
        for node_name, state_update in output.items():
            print(f"‚úÖ Passou pelo n√≥: {node_name}")
            print(f"Estado atualizado: {state_update}\n")

await test_langgraph_workflow()

‚è≥ Executando Grafo...

[Node: Discovery] Buscando campanhas...
   Campanhas encontradas: success=True campaigns=['yszqvqzj0c', '6b5w3e0qsv'] message=None
‚úÖ Passou pelo n√≥: discovery
Estado atualizado: {'target_hash': 'uw0qfu4a1r', 'status': 'discovery_completed'}


[Node: Ingestion] üì• Carregando dados do Mongo (Hash: uw0qfu4a1r)...
DEBUG [Context]: Mongo Data Loaded. Rows: 688
   Status Mongo: success=True message="SUCCESS: Loaded 688 requests into AnalysisContext.\nSources: google | Hashes: 1\nAction Required: Delegate to 'Metrics Analyst' agent to run ML inference now." num_requests=688
‚úÖ Passou pelo n√≥: ingestion
Estado atualizado: {'status': 'data_loaded_to_context'}


[Node: ML Pipeline] ü§ñ Executando infer√™ncia...
   Embedding selecionado: {'emb_conf': 'fasttext', 'model_path': 'G:/Meu Drive/TWR/data/google'}
google
Embedding type:  fasttext
Enter to Fasttext encoder


Criando Vocabul√°rio: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 688/688 [00:00<00:00, 4091.91it/s]


Using 11 out of 12 cores


Vetorizando: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 688/688 [00:06<00:00, 102.90it/s]


Finishing encoding
DEBUG [Context]: ML Results Stored. Rows: 688
   Resumo ML: Inference completed using 'google' model with results: 
Models Accuracy: 0.9273255813953488Total Error in prediction (possible anomalies): 50Analyzed 688 samples.
You can now now:
1. Call 'get_dataset_health_check' to see overall performance stats.
2. Call 'query_anomalous_ids' to extract specific samples for the Detective Agent.
‚úÖ Passou pelo n√≥: ml_pipeline
Estado atualizado: {'embedding_choice': {'emb_conf': 'fasttext', 'model_path': 'G:/Meu Drive/TWR/data/google'}, 'status': 'ml_execution_completed'}


[Node: Health Check] ü©∫ Avaliando integridade dos dados...
   Sa√∫de do Dataset: {'total_samples': 688, 'false_positives': 44, 'false_negatives': 6, 'avg_trust': 0.8922286629676819}
‚úÖ Passou pelo n√≥: health_check
Estado atualizado: {'health_stats': {'total_samples': 688, 'false_positives': 44, 'false_negatives': 6, 'avg_trust': 0.8922286629676819}, 'status': 'health_check_completed'}


[Node: Anoma