In [1]:
import asyncio
import psutil
import os
import time
import threading
# Importe suas fun√ß√µes de teste
# from test_pipeline import test_full_workflow 

# --- Utilit√°rio de Formata√ß√£o ---
def format_bytes(size):
    power = 2**10
    n = 0
    power_labels = {0 : '', 1: 'KB', 2: 'MB', 3: 'GB', 4: 'TB'}
    while size > power:
        size /= power
        n += 1
    return f"{size:.2f} {power_labels[n]}"

# --- Classe Monitor de Mem√≥ria ---
class MemoryMonitor:
    def __init__(self, interval=0.1):
        self.interval = interval
        self.process = psutil.Process(os.getpid())
        self.running = False
        self.max_rss = 0
        self.start_rss = 0
        self.end_rss = 0
        self._thread = None

    def _monitor(self):
        while self.running:
            # RSS: Resident Set Size (Mem√≥ria RAM f√≠sica usada)
            current_rss = self.process.memory_info().rss
            if current_rss > self.max_rss:
                self.max_rss = current_rss
            time.sleep(self.interval)

    def start(self):
        self.start_rss = self.process.memory_info().rss
        self.max_rss = self.start_rss
        self.running = True
        self._thread = threading.Thread(target=self._monitor, daemon=True)
        self._thread.start()
        print(f"üìâ Mem√≥ria Inicial: {format_bytes(self.start_rss)}")

    def stop(self):
        self.running = False
        if self._thread:
            self._thread.join()
        self.end_rss = self.process.memory_info().rss
        
        print("\n" + "="*40)
        print(f"üìä RELAT√ìRIO DE MEM√ìRIA DO PROCESSO")
        print("="*40)
        print(f"üìâ Inicial:      {format_bytes(self.start_rss)}")
        print(f"üìà Final:        {format_bytes(self.end_rss)}")
        print(f"üöÄ PICO (Peak):  {format_bytes(self.max_rss)}")
        print(f"üíß Diferen√ßa:    {format_bytes(self.end_rss - self.start_rss)}")
        print("="*40)


In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from app.tools.context_store import AnalysisContext 
from app.tools.metrics_agent_tools import get_dataset_health_check, query_anomalous_ids, run_ml_inference_pipeline
from app.tools.data_tools import *

orchestrator_tools = [
      query_mongo_requests, 
      query_sql_campaigns, 
      list_avaiable_datasets, 
      inspect_file_schema, 
      load_dataset_into_context, 
      check_context_status, 
      inspect_file_schema
]


async def test_full_workflow():
    print("\nüîπ --- INICIANDO TESTE DE INTEGRA√á√ÉO (SEM LLM) ---")

    # ==============================================================================
    # 1. SIMULA√á√ÉO DO ORQUESTRADOR (Discovery & Ingestion)
    # ==============================================================================
    print("\nTesting: 1. Orchestrator - Discovery (SQL)")
    
    # Simula o LLM chamando a tool com argumentos
    # Nota: Se suas tools usam @tool, use .invoke() ou chame a fun√ß√£o decorada diretamente dependendo da vers√£o do LangChain
    try:
        # Tenta listar campanhas do Google
        campaigns_str = await query_sql_campaigns.ainvoke({"traffic_source": "google", "limit": 2})
        print(f"‚úÖ SQL Result: {campaigns_str}")
        
        # HACK PARA O TESTE:
        # Como n√£o temos o LLM para ler a string e escolher o hash, vamos pegar um hash 'fake' 
        # ou extrair da string se o banco estiver conectado.
        # Para este teste, vou assumir que voc√™ pegou um hash v√°lido do log acima.
        target_hash = "uw0qfu4a1r" 
        print(f"Usando Hash Alvo: {target_hash}")

    except Exception as e:
        print(f"‚ùå SQL Failed: {e}")
        return

    print("\nTesting: 2. Orchestrator - Ingestion (Mongo -> Context)")
    try:
        # Simula o carregamento

        status_msg = await query_mongo_requests.ainvoke({
            "hash": target_hash,
            # "hashes": campaigns_str, 
            "traffic_source": "google"
        })
        print(f"‚úÖ Load Result: {status_msg}")
        
        # VERIFICA√á√ÉO DE ESTADO (Crucial!)
        # Vamos espiar dentro do Singleton para ver se funcionou
        try:
            print("Status: ", AnalysisContext.get_status())
            # print(f"üîé VERIFICA√á√ÉO: Contexto cont√©m {len(df)} linhas. Colunas: {list(df.columns[:3])}...")
        except ValueError:
            print("‚ùå VERIFICA√á√ÉO FALHOU: Contexto est√° vazio!")
            return

    except Exception as e:
        print(f"‚ùå Mongo Load Failed: {e}")
        # SE VOC√ä N√ÉO TEM BANCO RODANDO AGORA, DESCOMENTE A LINHA ABAIXO PARA MOCKAR DADOS:
        # mock_data_loading() 
        return


    print("\nTesting: 3. Sub-agent - ML Execution")
    try:
        # O agente chama sem argumentos, pois pega do Contexto
        inference_summary = run_ml_inference_pipeline.invoke({}) 
        print(f"‚úÖ Inference Result: {inference_summary}")
    except Exception as e:
        print(f"‚ùå ML Pipeline Failed: {e}")
        return

    print("\nTesting: 4. Sub-agent - Health Check")
    try:
        health_stats = get_dataset_health_check.invoke({})
        print(f"‚úÖ Health Stats: {health_stats}")
    except Exception as e:
        print(f"‚ùå Health Check Failed: {e}")

    print("\nTesting: 5. Sub-agent - Query Anomalies")
    try:
        # Testa buscar IDs com baixa confian√ßa
        anomalies = query_anomalous_ids.invoke({"criteria": "low_trust", "threshold": 0.5})
        print(f"‚úÖ Found {len(anomalies)} anomalies. Sample IDs: {anomalies[:5]}")
    except Exception as e:
        print(f"‚ùå Query Failed: {e}")

    print("\nüîπ --- TESTE FINALIZADO ---")

# --- MOCK OPCIONAL (Se voc√™ n√£o tiver o Mongo rodando localmente) ---
def mock_data_loading():
    print("‚ö†Ô∏è MOCKING DATA LOADING...")
    data = {
        "id": range(100),
        "user_agent": ["Mozilla/5.0"] * 50 + ["Googlebot"] * 50,
        "url": ["/home"] * 100,
        "label": [1]*50 + [0]*50 # 1=Human, 0=Bot
    }
    df = pd.DataFrame(data)
    AnalysisContext.set_mongo_data(df, "google")
    print("‚úÖ Mock data loaded into Context.")

# --- Seu Wrapper de Teste ---
async def run_with_monitoring():
    monitor = MemoryMonitor(interval=0.1) # Checa a cada 100ms
    
    monitor.start()
    try:
        print("‚è≥ Executando Workflow...")
        # Chama sua fun√ß√£o original aqui
        await test_full_workflow() 
    finally:
        monitor.stop()

def check_gpu_memory():
    try:
        import torch
        if torch.cuda.is_available():
            print("\n RELAT√ìRIO GPU (VRAM)")
            print(f"Alocada: {format_bytes(torch.cuda.memory_allocated())}")
            print(f"Reservada: {format_bytes(torch.cuda.memory_reserved())}")
    except ImportError:
        pass

if __name__ == "__main__":
    await run_with_monitoring()

  from .autonotebook import tqdm as notebook_tqdm


Using 11 out of 12 cores


  delegate = self.__delegate_class__(*args, **kwargs)


Garantindo √≠ndices...
üìâ Mem√≥ria Inicial: 517.51 MB
‚è≥ Executando Workflow...

üîπ --- INICIANDO TESTE DE INTEGRA√á√ÉO (SEM LLM) ---

Testing: 1. Orchestrator - Discovery (SQL)
‚úÖ SQL Result: ['qta46nlsd6', 'wern8rs7b1']
Usando Hash Alvo: uw0qfu4a1r

Testing: 2. Orchestrator - Ingestion (Mongo -> Context)
DEBUG [Context]: Mongo Data Loaded. Rows: 1000
‚úÖ Load Result: SUCCESS: Loaded 1000 requests into AnalysisContext.
Sources: google | Hashes: 1
Action Required: Delegate to 'Metrics Analyst' agent to run ML inference now.
Status:  Mongo Raw: 1000 | ML Processed: Pending

Testing: 3. Sub-agent - ML Execution
Embedding type:  fasttext
[DEBUG] Model Path FASTTEXT: c:\Users\Camille\Documents\TWR\deep_agents_twr/files/models/embedding/fasttext_google.model
Enter to Fasttext encoder


Criando Vocabul√°rio: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:00<00:00, 2418.47it/s]


Using 11 out of 12 cores


Vetorizando: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:07<00:00, 133.72it/s]


Finishing encoding
(1000, 100)
DEBUG [Context]: ML Results Stored. Rows: 1000
checando se a tool de inferencia salva dos dados: 1000
‚úÖ Inference Result: Inference completed using 'google' model with results: 
Models Accuracy: 0.902Total Error in prediction (possible anomalies): 98Analyzed 1000 samples.
You can now now:
1. Call 'get_dataset_health_check' to see overall performance stats.
2. Call 'query_anomalous_ids' to extract specific samples for the Detective Agent.

Testing: 4. Sub-agent - Health Check
‚úÖ Health Stats: {'total_samples': 1000, 'false_positives': 51, 'false_negatives': 47, 'avg_trust': 0.894597053527832}

Testing: 5. Sub-agent - Query Anomalies
‚ùå Query Failed: 'id'

üîπ --- TESTE FINALIZADO ---

üìä RELAT√ìRIO DE MEM√ìRIA DO PROCESSO
üìâ Inicial:      517.51 MB
üìà Final:        1.48 GB
üöÄ PICO (Peak):  1.49 GB
üíß Diferen√ßa:    1001.74 MB


In [3]:
from app.config.container import request_service
from app.config.container import campaign_service


campaigns = await campaign_service.fetch_recent_active_campaigns(traffic_source="google", limit=50)
results = await request_service.fetch_training_sample_by_hashes(campaigns)

In [4]:
import pandas as pd

df_results = pd.DataFrame(results)
df_results["decision"].value_counts()

decision
bots      10000
unsafe    10000
Name: count, dtype: int64