In [None]:
import json

# Criar o notebook completo como arquivo Python
notebook_content = '''# ====================================================================
# MARKETING DATA SCIENTIST PARTNER AGENT - PRODUCTION NOTEBOOK
# Arquitetura Multi-Agente ADK para Diagn√≥stico de Campanhas
# ====================================================================

"""
üéØ OBJETIVO:
Agente cientista de dados s√™nior que atua como parceiro estrat√©gico para:
- Diagn√≥stico de problemas em campanhas de tr√°fego pago
- EDA (Exploratory Data Analysis) automatizada
- Testes estat√≠sticos rigorosos (Chi¬≤, T-test, ANOVA)
- An√°lise de causa raiz (RCA) sistem√°tica
- Gera√ß√£o de insights acion√°veis e visualiza√ß√µes

üèóÔ∏è ARQUITETURA:
- 1 CoordinatorAgent (orquestrador h√≠brido)
- 8 Agentes Especialistas (DataQuality, Tracking, Funnel, Diagnostic, PMax, Stats, Experiment, Insights)
- 2 Agentes Novos (EDA, Visualization)
- BigQuery como fonte de dados unificada
- Statistical Toolkit com scipy.stats
"""

# ====================================================================
# CELL 1: INSTALA√á√ÉO E SETUP INICIAL
# ====================================================================

import sys
print(f"üêç Python: {sys.version}")
print("\\n[INFO] Installing dependencies...\\n")

!pip install -q google-adk>=1.18.0
!pip install -q google-cloud-bigquery>=3.15.0
!pip install -q scipy>=1.11.0 pandas>=2.1.0 numpy>=1.24.0
!pip install -q matplotlib>=3.7.0 seaborn>=0.12.0 plotly>=5.17.0
!pip install -q scikit-learn>=1.3.0
!pip install -q gradio>=4.14.0

print("\\n[OK] All dependencies installed! ‚úÖ")


# ====================================================================
# CELL 2: CONFIGURA√á√ÉO SEGURA DE CREDENCIAIS
# ====================================================================

import os
import logging
import tempfile
import atexit
from kaggle_secrets import UserSecretsClient

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s | %(levelname)-8s | %(message)s'
)
logger = logging.getLogger(__name__)

class SecureCredentialsManager:
    """Gerenciador seguro de credenciais com limpeza autom√°tica."""
    
    def __init__(self):
        self.temp_files = []
        atexit.register(self.cleanup)
    
    def setup_gemini_key(self) -> bool:
        """Configura API key do Gemini."""
        try:
            api_key = UserSecretsClient().get_secret("GOOGLE_API_KEY")
            if not api_key or len(api_key) < 20:
                raise ValueError("Invalid API key")
            os.environ["GOOGLE_API_KEY"] = api_key
            os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "FALSE"
            logger.info("‚úÖ Gemini API configured")
            return True
        except Exception as e:
            logger.error(f"‚ùå API key failed: {e}")
            print("\\n[ACTION] Add GOOGLE_API_KEY in Kaggle Secrets")
            return False
    
    def setup_bigquery_credentials(self) -> tuple:
        """Configura credenciais do BigQuery."""
        try:
            creds = UserSecretsClient().get_secret("BIGQUERY_SERVICE_ACCOUNT_JSON")
            fd, path = tempfile.mkstemp(suffix='.json', prefix='bq_')
            os.write(fd, creds.encode())
            os.close(fd)
            os.chmod(path, 0o600)
            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = path
            self.temp_files.append(path)
            logger.info("‚úÖ BigQuery configured")
            return True, path
        except Exception as e:
            logger.warning(f"‚ö†Ô∏è BigQuery not configured: {e}")
            return False, ""
    
    def cleanup(self):
        """Remove arquivos tempor√°rios."""
        for path in self.temp_files:
            try:
                if os.path.exists(path):
                    os.unlink(path)
            except: 
                pass

# Inicializar credenciais
creds_manager = SecureCredentialsManager()
GEMINI_READY = creds_manager.setup_gemini_key()
BIGQUERY_ENABLED, BQ_PATH = creds_manager.setup_bigquery_credentials()

if not GEMINI_READY:
    raise RuntimeError("Cannot proceed without API key")

print(f"\\n{'='*60}")
print("üîê Security Status:")
print(f"  ‚úÖ Gemini: Configured")
print(f"  {'‚úÖ' if BIGQUERY_ENABLED else '‚ö†Ô∏è'} BigQuery: {'Enabled' if BIGQUERY_ENABLED else 'Optional'}")
print(f"{'='*60}\\n")


# ====================================================================
# CELL 3: IMPORTS E CONFIGURA√á√ÉO DO BIGQUERY
# ====================================================================

from google.adk.agents import Agent, SequentialAgent, ParallelAgent, LoopAgent
from google.adk.runners import InMemoryRunner
from google.adk.tools import AgentTool, FunctionTool, google_search

import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import chi2_contingency, ttest_ind, f_oneway, pearsonr
import math
import json
from typing import Dict, Any, List, Optional, Tuple
from io import StringIO
from dataclasses import dataclass, field
from datetime import datetime, timedelta
import warnings

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

warnings.filterwarnings('ignore')

# Configurar BigQuery (condicional)
bq_toolset = None
if BIGQUERY_ENABLED:
    try:
        from google.adk.tools.bigquery import BigQueryToolset, BigQueryCredentialsConfig, BigQueryToolConfig, WriteMode
        from google.oauth2 import service_account
        
        credentials = service_account.Credentials.from_service_account_file(BQ_PATH)
        creds_config = BigQueryCredentialsConfig(credentials=credentials)
        tool_config = BigQueryToolConfig(write_mode=WriteMode.BLOCKED)
        bq_toolset = BigQueryToolset(
            credentials_config=creds_config, 
            bigquery_tool_config=tool_config
        )
        logger.info("‚úÖ BigQuery initialized")
    except Exception as e:
        logger.error(f"BigQuery init failed: {e}")
        BIGQUERY_ENABLED = False

logger.info("‚úÖ Imports complete")
print("[OK] Environment ready! üöÄ\\n")


# ====================================================================
# CELL 4: FRAMEWORK DE VALIDA√á√ÉO
# ====================================================================

class ValidationError(Exception):
    """Erro de valida√ß√£o de entrada."""
    pass

class InputValidator:
    """Validador de inputs para fun√ß√µes estat√≠sticas."""
    
    @staticmethod
    def validate_probability(value: float, name: str):
        """Valida se valor est√° entre 0 e 1."""
        if not isinstance(value, (int, float)):
            raise ValidationError(f"{name} must be numeric")
        if not 0 < value < 1:
            raise ValidationError(f"{name} must be in (0,1), got {value}")
    
    @staticmethod
    def validate_positive(value: float, name: str):
        """Valida se valor √© positivo."""
        if not isinstance(value, (int, float)):
            raise ValidationError(f"{name} must be numeric")
        if value <= 0:
            raise ValidationError(f"{name} must be positive")
    
    @staticmethod
    def validate_ab_test_inputs(ctrl_conv, ctrl_total, treat_conv, treat_total):
        """Valida inputs de teste A/B."""
        for val, name in [
            (ctrl_conv, "control_conversions"), 
            (ctrl_total, "control_total"),
            (treat_conv, "treatment_conversions"), 
            (treat_total, "treatment_total")
        ]:
            if not isinstance(val, int) or val < 0:
                raise ValidationError(f"{name} must be non-negative integer")
        
        if ctrl_total == 0 or treat_total == 0:
            raise ValidationError("Total cannot be zero")
        if ctrl_conv > ctrl_total:
            raise ValidationError(f"Control conversions > total")
        if treat_conv > treat_total:
            raise ValidationError(f"Treatment conversions > total")
    
    @staticmethod
    def validate_dataframe(df: pd.DataFrame, required_cols: List[str] = None):
        """Valida DataFrame."""
        if df.empty:
            raise ValidationError("DataFrame is empty")
        
        if required_cols:
            missing = set(required_cols) - set(df.columns)
            if missing:
                raise ValidationError(f"Missing columns: {missing}")

logger.info("‚úÖ Validation framework ready")
print("[OK] Input validation loaded!\\n")


# ====================================================================
# CELL 5: STATISTICAL TOOLKIT COMPLETO
# ====================================================================

@dataclass
class SampleSizeResult:
    """Resultado de c√°lculo de tamanho de amostra."""
    sample_size_per_group: int
    total_sample_size: int
    baseline_rate: float
    target_rate: float
    mde_percentage: float
    mde_absolute: float
    alpha: float
    power: float
    
    def to_dict(self):
        return {
            "sample_size_per_group": self.sample_size_per_group,
            "total_sample_size": self.total_sample_size,
            "baseline_rate": self.baseline_rate,
            "target_rate": self.target_rate,
            "mde_percentage": self.mde_percentage,
            "mde_absolute": self.mde_absolute,
            "alpha": self.alpha,
            "power": self.power
        }

@dataclass
class SignificanceResult:
    """Resultado de teste de signific√¢ncia estat√≠stica."""
    control_rate: float
    treatment_rate: float
    uplift_relative_pct: float
    uplift_absolute_pp: float
    p_value: float
    z_statistic: float
    is_significant: bool
    is_positive: bool
    ci_95_lower: float
    ci_95_upper: float
    sample_sizes: Dict[str, int]
    
    def to_dict(self):
        if self.is_significant and self.is_positive:
            recommendation = "[OK] SHIP IT: Significant positive impact"
        elif self.is_significant and not self.is_positive:
            recommendation = "[STOP] DO NOT SHIP: Significant negative impact"
        else:
            recommendation = "[WAIT] KEEP TESTING: Not yet significant"
        
        return {
            "control_rate": self.control_rate,
            "treatment_rate": self.treatment_rate,
            "uplift_relative_percentage": self.uplift_relative_pct,
            "uplift_absolute_pp": self.uplift_absolute_pp,
            "p_value": self.p_value,
            "z_statistic": self.z_statistic,
            "is_significant": self.is_significant,
            "is_positive": self.is_positive,
            "confidence_interval_95": {
                "lower": self.ci_95_lower,
                "upper": self.ci_95_upper,
                "lower_pp": self.ci_95_lower * 100,
                "upper_pp": self.ci_95_upper * 100
            },
            "interpretation": "SIGNIFICANT (p < 0.05)" if self.is_significant else "NOT SIGNIFICANT",
            "recommendation": recommendation,
            "sample_sizes": self.sample_sizes
        }

@dataclass
class EDAResult:
    """Resultado de an√°lise explorat√≥ria."""
    shape: Dict[str, int]
    columns: List[str]
    dtypes: Dict[str, str]
    missing_values: Dict[str, Dict[str, float]]
    duplicate_rows: int
    numeric_summary: Dict[str, Dict[str, float]]
    categorical_summary: Dict[str, Dict[str, Any]]
    correlations: Dict[str, float]
    outliers: Dict[str, int]
    
    def to_dict(self):
        return {
            "shape": self.shape,
            "columns": self.columns,
            "dtypes": self.dtypes,
            "missing_values": self.missing_values,
            "duplicate_rows": self.duplicate_rows,
            "numeric_summary": self.numeric_summary,
            "categorical_summary": self.categorical_summary,
            "correlations": self.correlations,
            "outliers": self.outliers
        }

class StatisticalToolkit:
    """Toolkit estat√≠stico completo para an√°lise de campanhas."""
    
    @staticmethod
    def calculate_sample_size(
        baseline_rate: float, 
        mde: float, 
        alpha=0.05, 
        power=0.8
    ) -> SampleSizeResult:
        """
        Calcula tamanho de amostra para teste A/B.
        
        Args:
            baseline_rate: Taxa de convers√£o baseline (0-1)
            mde: Minimum Detectable Effect em pontos percentuais
            alpha: N√≠vel de signific√¢ncia (padr√£o 0.05)
            power: Poder estat√≠stico (padr√£o 0.8)
        """
        InputValidator.validate_probability(baseline_rate, "baseline_rate")
        InputValidator.validate_positive(mde, "mde")
        
        p1 = baseline_rate
        p2 = baseline_rate + (mde / 100)
        
        if p2 >= 1.0:
            raise ValidationError(f"Target rate ({p2:.2%}) exceeds 100%")
        
        z_alpha = stats.norm.ppf(1 - alpha / 2)
        z_beta = stats.norm.ppf(power)
        
        numerator = (z_alpha + z_beta) ** 2 * (p1 * (1 - p1) + p2 * (1 - p2))
        denominator = (p1 - p2) ** 2
        
        n_per_group = math.ceil(numerator / denominator)
        
        return SampleSizeResult(
            sample_size_per_group=n_per_group,
            total_sample_size=n_per_group * 2,
            baseline_rate=baseline_rate,
            target_rate=p2,
            mde_percentage=mde,
            mde_absolute=p2 - p1,
            alpha=alpha,
            power=power
        )
    
    @staticmethod
    def calculate_statistical_significance(
        ctrl_conv: int, 
        ctrl_total: int, 
        treat_conv: int, 
        treat_total: int, 
        alpha=0.05
    ) -> SignificanceResult:
        """
        Calcula signific√¢ncia estat√≠stica de teste A/B usando teste Z.
        
        Args:
            ctrl_conv: Convers√µes do controle
            ctrl_total: Total do controle
            treat_conv: Convers√µes do tratamento
            treat_total: Total do tratamento
            alpha: N√≠vel de signific√¢ncia
        """
        InputValidator.validate_ab_test_inputs(
            ctrl_conv, ctrl_total, treat_conv, treat_total
        )
        
        p1 = ctrl_conv / ctrl_total
        p2 = treat_conv / treat_total
        
        # Teste Z para propor√ß√µes
        p_pooled = (ctrl_conv + treat_conv) / (ctrl_total + treat_total)
        se = math.sqrt(p_pooled * (1 - p_pooled) * (1/ctrl_total + 1/treat_total))
        
        z = (p2 - p1) / se if se > 0 else 0
        p_value = 2 * (1 - stats.norm.cdf(abs(z)))
        
        # Uplift
        uplift_relative = ((p2 - p1) / p1 * 100) if p1 > 0 else 0
        uplift_absolute = (p2 - p1) * 100
        
        # Intervalo de confian√ßa
        se_diff = math.sqrt(p1 * (1 - p1) / ctrl_total + p2 * (1 - p2) / treat_total)
        ci_margin = stats.norm.ppf(1 - alpha/2) * se_diff
        ci_lower = p2 - p1 - ci_margin
        ci_upper = p2 - p1 + ci_margin
        
        return SignificanceResult(
            control_rate=p1,
            treatment_rate=p2,
            uplift_relative_pct=uplift_relative,
            uplift_absolute_pp=uplift_absolute,
            p_value=p_value,
            z_statistic=z,
            is_significant=p_value < alpha,
            is_positive=p2 > p1,
            ci_95_lower=ci_lower,
            ci_95_upper=ci_upper,
            sample_sizes={
                "control": ctrl_total,
                "treatment": treat_total,
                "total": ctrl_total + treat_total
            }
        )
    
    @staticmethod
    def perform_chi_square_test(
        observed: List[List[int]]
    ) -> Dict[str, Any]:
        """
        Executa teste qui-quadrado para tabela de conting√™ncia.
        
        Args:
            observed: Tabela de conting√™ncia (lista de listas)
        """
        obs_array = np.array(observed)
        chi2, p_value, dof, expected = chi2_contingency(obs_array, correction=False)
        
        return {
            "test_type": "chi_square",
            "chi2_statistic": float(chi2),
            "p_value": float(p_value),
            "degrees_of_freedom": int(dof),
            "is_significant": p_value < 0.05,
            "expected_frequencies": expected.tolist()
        }
    
    @staticmethod
    def perform_t_test(
        group_a: List[float], 
        group_b: List[float]
    ) -> Dict[str, Any]:
        """
        Executa teste t de duas amostras independentes.
        
        Args:
            group_a: Valores do grupo A
            group_b: Valores do grupo B
        """
        t_stat, p_value = ttest_ind(group_a, group_b, equal_var=False)
        
        mean_a = np.mean(group_a)
        mean_b = np.mean(group_b)
        
        return {
            "test_type": "t_test",
            "t_statistic": float(t_stat),
            "p_value": float(p_value),
            "is_significant": p_value < 0.05,
            "mean_group_a": float(mean_a),
            "mean_group_b": float(mean_b),
            "difference": float(mean_b - mean_a),
            "relative_change_pct": float((mean_b - mean_a) / mean_a * 100) if mean_a != 0 else 0
        }
    
    @staticmethod
    def perform_anova(
        *groups: List[float]
    ) -> Dict[str, Any]:
        """
        Executa ANOVA para m√∫ltiplos grupos.
        
        Args:
            *groups: M√∫ltiplos grupos de valores
        """
        f_stat, p_value = f_oneway(*groups)
        
        return {
            "test_type": "anova",
            "f_statistic": float(f_stat),
            "p_value": float(p_value),
            "is_significant": p_value < 0.05,
            "num_groups": len(groups),
            "group_means": [float(np.mean(g)) for g in groups]
        }
    
    @staticmethod
    def perform_eda(csv_data: str) -> EDAResult:
        """
        Executa an√°lise explorat√≥ria completa.
        
        Args:
            csv_data: Dados CSV como string
        """
        try:
            df = pd.read_csv(StringIO(csv_data))
        except Exception as e:
            raise ValidationError(f"Invalid CSV: {e}")
        
        InputValidator.validate_dataframe(df)
        
        # Shape
        shape = {"rows": len(df), "columns": len(df.columns)}
        
        # Colunas e tipos
        columns = df.columns.tolist()
        dtypes = {col: str(dtype) for col, dtype in df.dtypes.items()}
        
        # Missing values
        missing = df.isnull().sum()
        missing_pct = (missing / len(df) * 100).round(2)
        missing_summary = {
            col: {"count": int(missing[col]), "percentage": float(missing_pct[col])}
            for col in df.columns if missing[col] > 0
        }
        
        # Duplicatas
        duplicate_rows = int(df.duplicated().sum())
        
        # An√°lise num√©rica
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        numeric_summary = {}
        for col in numeric_cols:
            numeric_summary[col] = {
                "mean": float(df[col].mean()),
                "median": float(df[col].median()),
                "std": float(df[col].std()),
                "min": float(df[col].min()),
                "max": float(df[col].max()),
                "q25": float(df[col].quantile(0.25)),
                "q75": float(df[col].quantile(0.75))
            }
        
        # An√°lise categ√≥rica
        categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
        categorical_summary = {}
        for col in categorical_cols:
            value_counts = df[col].value_counts()
            categorical_summary[col] = {
                "unique_values": int(df[col].nunique()),
                "top_5_values": value_counts.head(5).to_dict(),
                "mode": str(df[col].mode()[0]) if len(df[col].mode()) > 0 else None
            }
        
        # Correla√ß√µes
        correlations = {}
        if len(numeric_cols) >= 2:
            corr_matrix = df[numeric_cols].corr()
            for i, col1 in enumerate(numeric_cols):
                for col2 in numeric_cols[i+1:]:
                    correlations[f"{col1}_vs_{col2}"] = float(corr_matrix.loc[col1, col2])
        
        # Outliers (IQR method)
        outliers = {}
        for col in numeric_cols:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            outlier_mask = (df[col] < Q1 - 1.5 * IQR) | (df[col] > Q3 + 1.5 * IQR)
            outliers[col] = int(outlier_mask.sum())
        
        return EDAResult(
            shape=shape,
            columns=columns,
            dtypes=dtypes,
            missing_values=missing_summary,
            duplicate_rows=duplicate_rows,
            numeric_summary=numeric_summary,
            categorical_summary=categorical_summary,
            correlations=correlations,
            outliers=outliers
        )

# Wrapper functions para FunctionTools
def safe_calculate_sample_size(
    baseline_rate: float, 
    mde: float, 
    alpha=0.05, 
    power=0.8
) -> str:
    """Wrapper seguro para c√°lculo de sample size."""
    try:
        result = StatisticalToolkit.calculate_sample_size(baseline_rate, mde, alpha, power)
        return json.dumps(result.to_dict(), indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})

def safe_calculate_significance(
    ctrl_conv: int, 
    ctrl_total: int, 
    treat_conv: int, 
    treat_total: int
) -> str:
    """Wrapper seguro para c√°lculo de signific√¢ncia."""
    try:
        result = StatisticalToolkit.calculate_statistical_significance(
            ctrl_conv, ctrl_total, treat_conv, treat_total
        )
        return json.dumps(result.to_dict(), indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})

def safe_perform_eda(csv_data: str) -> str:
    """Wrapper seguro para EDA."""
    try:
        result = StatisticalToolkit.perform_eda(csv_data)
        return json.dumps(result.to_dict(), indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})

def safe_chi_square_test(observed_json: str) -> str:
    """Wrapper seguro para teste qui-quadrado."""
    try:
        observed = json.loads(observed_json)
        result = StatisticalToolkit.perform_chi_square_test(observed)
        return json.dumps(result, indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})

def safe_t_test(group_a_json: str, group_b_json: str) -> str:
    """Wrapper seguro para teste t."""
    try:
        group_a = json.loads(group_a_json)
        group_b = json.loads(group_b_json)
        result = StatisticalToolkit.perform_t_test(group_a, group_b)
        return json.dumps(result, indent=2)
    except Exception as e:
        return json.dumps({"error": str(e)})

# Criar FunctionTools
sample_size_tool = FunctionTool(
    function=safe_calculate_sample_size,
    description="Calculate required sample size for A/B test given baseline rate and MDE"
)

significance_tool = FunctionTool(
    function=safe_calculate_significance,
    description="Calculate statistical significance of A/B test results"
)

eda_tool = FunctionTool(
    function=safe_perform_eda,
    description="Perform comprehensive exploratory data analysis on CSV data"
)

chi_square_tool = FunctionTool(
    function=safe_chi_square_test,
    description="Perform chi-square test on contingency table"
)

t_test_tool = FunctionTool(
    function=safe_t_test,
    description="Perform t-test comparing two groups"
)

logger.info("‚úÖ Statistical Toolkit ready")
print("[OK] Statistical functions loaded!\\n")


# ====================================================================
# CELL 6: VISUALIZATION TOOLKIT
# ====================================================================

class VisualizationToolkit:
    """Toolkit para gera√ß√£o de visualiza√ß√µes diagn√≥sticas."""
    
    @staticmethod
    def create_funnel_chart(
        stages: List[str], 
        values: List[int],
        title: str = "Conversion Funnel"
    ) -> str:
        """Cria gr√°fico de funil de convers√£o."""
        fig = go.Figure(go.Funnel(
            y=stages,
            x=values,
            textinfo="value+percent initial"
        ))
        
        fig.update_layout(
            title=title,
            height=500
        )
        
        return fig.to_html()
    
    @staticmethod
    def create_time_series(
        df: pd.DataFrame,
        date_col: str,
        metric_col: str,
        title: str = "Metric Over Time"
    ) -> str:
        """Cria gr√°fico de s√©rie temporal."""
        fig = px.line(
            df, 
            x=date_col, 
            y=metric_col,
            title=title,
            markers=True
        )
        
        fig.update_layout(height=400)
        
        return fig.to_html()
    
    @staticmethod
    def create_correlation_heatmap(
        df: pd.DataFrame,
        title: str = "Correlation Matrix"
    ) -> str:
        """Cria heatmap de correla√ß√£o."""
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        corr_matrix = df[numeric_cols].corr()
        
        fig = px.imshow(
            corr_matrix,
            text_auto=True,
            aspect="auto",
            title=title,
            color_continuous_scale='RdBu_r'
        )
        
        fig.update_layout(height=600)
        
        return fig.to_html()
    
    @staticmethod
    def create_distribution_plot(
        df: pd.DataFrame,
        column: str,
        title: str = "Distribution"
    ) -> str:
        """Cria gr√°fico de distribui√ß√£o."""
        fig = px.histogram(
            df, 
            x=column,
            title=title,
            marginal="box"
        )
        
        fig.update_layout(height=400)
        
        return fig.to_html()

logger.info("‚úÖ Visualization Toolkit ready")
print("[OK] Visualization functions loaded!\\n")


# ====================================================================
# CELL 7: CRIAR 10 AGENTES ESPECIALIZADOS
# ====================================================================

MODEL = "gemini-2.0-flash-exp"

# 1. DataQualityAgent
data_quality_tools = [eda_tool]
if bq_toolset:
    data_quality_tools.append(bq_toolset)

data_quality_agent = Agent(
    name="DataQualityAgent",
    model=MODEL,
    instruction="""You are a data quality auditor.
    
    Your job:
    1. Validate data integrity (missing values, duplicates, outliers)
    2. Check for anomalies in key metrics
    3. Verify data freshness and completeness
    4. Report any data quality issues that would compromise analysis
    
    Use the EDA tool to analyze datasets comprehensively.
    """,
    tools=data_quality_tools,
    output_key="data_quality_report"
)

# 2. TrackingAgent
tracking_tools = [eda_tool]
if bq_toolset:
    tracking_tools.append(bq_toolset)

tracking_agent = Agent(
    name="TrackingAgent",
    model=MODEL,
    instruction="""You are a tracking implementation specialist.
    
    Your job:
    1. Validate event tracking (purchase, lead, etc.)
    2. Check gclid presence in Google Ads traffic
    3. Verify UTM parameter consistency
    4. Identify tracking gaps or implementation errors
    
    Report any tracking issues that would affect attribution.
    """,
    tools=tracking_tools,
    output_key="tracking_report"
)

# 3. FunnelAgent
funnel_tools = [eda_tool, google_search]
if bq_toolset:
    funnel_tools.append(bq_toolset)

funnel_agent = Agent(
    name="FunnelAgent",
    model=MODEL,
    instruction="""You are a conversion funnel analyst.
    
    Your job:
    1. Map the complete conversion funnel
    2. Calculate conversion rates at each stage
    3. Identify the biggest bottleneck (highest drop-off)
    4. Segment performance by device, channel, etc.
    
    Provide actionable insights on where to focus optimization.
    """,
    tools=funnel_tools,
    output_key="funnel_report"
)

# 4. DiagnosticAgent
diagnostic_tools = [eda_tool, chi_square_tool, t_test_tool, google_search]
if bq_toolset:
    diagnostic_tools.append(bq_toolset)

diagnostic_agent = Agent(
    name="DiagnosticAgent",
    model=MODEL,
    instruction="""You are a senior paid media diagnostician.
    
    Your job is Root Cause Analysis (RCA):
    1. Investigate audience saturation and targeting issues
    2. Analyze Quality Score and ad relevance
    3. Check auction pressure (competition)
    4. Segment by time, device, location
    5. Identify search query quality issues
    
    Use statistical tests to validate hypotheses.
    Prioritize the root cause with highest impact.
    """,
    tools=diagnostic_tools,
    output_key="root_cause_report"
)

# 5. PMaxAgent
pmax_tools = [eda_tool, google_search]
if bq_toolset:
    pmax_tools.append(bq_toolset)

pmax_agent = Agent(
    name="PMaxAgent",
    model=MODEL,
    instruction="""You are a Performance Max specialist.
    
    Your job:
    1. Evaluate creative performance (Asset Groups, Combinations)
    2. Analyze audience insights (Optimized segments)
    3. Review channel distribution (Search, Display, Video, Shopping)
    4. Assess search theme impact
    
    PMax is a black box - extract insights from available reports.
    """,
    tools=pmax_tools,
    output_key="pmax_diagnostic_report"
)

# 6. StatsAgent
stats_agent = Agent(
    name="StatsAgent",
    model=MODEL,
    instruction="""You are a statistician.
    
    Your job:
    1. Validate A/B test significance (chi-square for rates, t-test for continuous)
    2. Calculate p-values and confidence intervals
    3. Determine if results are statistically significant (p < 0.05)
    4. Prevent false positives from noise
    
    Always report: p-value, test type, and recommendation (ship/wait/stop).
    """,
    tools=[significance_tool, chi_square_tool, t_test_tool],
    output_key="stats_results"
)

# 7. ExperimentAgent
experiment_agent = Agent(
    name="ExperimentAgent",
    model=MODEL,
    instruction="""You are an experimentation designer.
    
    Your job:
    1. Calculate required sample size for A/B tests
    2. Estimate test duration based on traffic
    3. Define success metrics (primary + secondary)
    4. Design statistically valid experiments
    
    Ensure tests have sufficient power (80%) and significance (95%).
    """,
    tools=[sample_size_tool, google_search],
    output_key="experiment_plan"
)

# 8. InsightsAgent
insights_agent = Agent(
    name="InsightsAgent",
    model=MODEL,
    instruction="""You are a senior growth strategist.
    
    Your job:
    1. Synthesize all technical reports into business insights
    2. Provide 3-5 prioritized recommendations
    3. Identify quick wins vs. strategic initiatives
    4. Speak in business language (ROI, revenue, cost)
    
    Translate data into decisions.
    """,
    tools=[google_search],
    output_key="strategic_recommendations"
)

# 9. EDAAgent (NOVO)
eda_agent = Agent(
    name="EDAAgent",
    model=MODEL,
    instruction="""You are an exploratory data analysis specialist.
    
    Your job:
    1. Perform comprehensive EDA on campaign data
    2. Identify patterns, trends, and anomalies
    3. Calculate correlations between metrics
    4. Detect outliers and data quality issues
    5. Generate statistical summaries
    
    Provide deep insights hidden in the data.
    """,
    tools=[eda_tool, chi_square_tool, t_test_tool],
    output_key="eda_report"
)

# 10. VisualizationAgent (NOVO)
visualization_agent = Agent(
    name="VisualizationAgent",
    model=MODEL,
    instruction="""You are a data visualization specialist.
    
    Your job:
    1. Create diagnostic charts (funnels, time series, distributions)
    2. Generate correlation heatmaps
    3. Visualize A/B test results
    4. Make complex data easy to understand
    
    Use appropriate chart types for each insight.
    """,
    tools=[eda_tool],
    output_key="visualizations"
)

logger.info("‚úÖ 10 agents created")
print("[OK] Agent team ready! ü§ñ\\n")


# ====================================================================
# CELL 8: LOOP AGENT PARA REFINAMENTO
# ====================================================================

def approve_analysis(approved: bool, feedback: str) -> str:
    """Fun√ß√£o para aprovar ou rejeitar an√°lise."""
    logger.info(f"Analysis approval: {approved}")
    return json.dumps({
        "approved": approved,
        "feedback": feedback,
        "timestamp": datetime.now().isoformat()
    })

approval_tool = FunctionTool(
    function=approve_analysis,
    description="Approve or reject analysis with feedback"
)

critic_agent = Agent(
    name="CriticAgent",
    model=MODEL,
    instruction="""Review the {root_cause_report}.
    
    Check if:
    1. Root cause is clearly identified
    2. Evidence is statistically validated
    3. Recommendations are actionable
    
    Call approve_analysis: approved=True if complete, False with feedback otherwise.
    """,
    tools=[approval_tool],
    output_key="critique"
)

refiner_agent = Agent(
    name="RefinerAgent",
    model=MODEL,
    instruction="""Fix issues in {root_cause_report} based on {critique}.
    
    Improve:
    1. Clarity of root cause
    2. Statistical validation
    3. Actionability of recommendations
    """,
    tools=[eda_tool, chi_square_tool, t_test_tool],
    output_key="root_cause_report"
)

refinement_loop = LoopAgent(
    name="RefinementLoop",
    sub_agents=[critic_agent, refiner_agent],
    max_iterations=3
)

print("[OK] Loop agent created!\\n")


# ====================================================================
# CELL 9: COMPOSITE AGENTS (PARALLEL + SEQUENTIAL)
# ====================================================================

# Parallel: Diagn√≥sticos iniciais independentes
parallel_diagnostic = ParallelAgent(
    name="ParallelDiagnostic",
    sub_agents=[
        data_quality_agent,
        tracking_agent,
        funnel_agent,
        eda_agent
    ]
)

# Sequential: Pipeline completo
sequential_pipeline = SequentialAgent(
    name="FullDiagnosticPipeline",
    sub_agents=[
        parallel_diagnostic,      # Etapa 1: Diagn√≥sticos paralelos
        diagnostic_agent,          # Etapa 2: RCA
        stats_agent,               # Etapa 3: Valida√ß√£o estat√≠stica
        refinement_loop,           # Etapa 4: Refinamento
        experiment_agent,          # Etapa 5: Design de experimento
        insights_agent,            # Etapa 6: S√≠ntese estrat√©gica
        visualization_agent        # Etapa 7: Visualiza√ß√µes
    ]
)

print("[OK] Composite agents ready!\\n")


# ====================================================================
# CELL 10: COORDINATOR AGENT (ORQUESTRADOR H√çBRIDO)
# ====================================================================

coordinator_tools = [
    AgentTool(agent=data_quality_agent),
    AgentTool(agent=tracking_agent),
    AgentTool(agent=funnel_agent),
    AgentTool(agent=diagnostic_agent),
    AgentTool(agent=pmax_agent),
    AgentTool(agent=stats_agent),
    AgentTool(agent=experiment_agent),
    AgentTool(agent=insights_agent),
    AgentTool(agent=eda_agent),
    AgentTool(agent=visualization_agent),
    google_search,
    sample_size_tool,
    significance_tool,
    eda_tool,
    chi_square_tool,
    t_test_tool
]

if bq_toolset:
    coordinator_tools.append(bq_toolset)

coordinator = Agent(
    name="MarketingDataScientistPartner",
    model=MODEL,
    instruction="""You are a Senior Marketing Data Scientist Partner.
    
    Your mission: Diagnose and solve complex campaign performance problems.
    
    WORKFLOW:
    1. Receive analyst query (e.g., "My CPA increased 30%")
    
    2. PARALLEL DIAGNOSTICS (call in parallel):
       - DataQualityAgent: Validate data integrity
       - TrackingAgent: Verify tracking implementation
       - FunnelAgent: Identify bottlenecks
       - EDAAgent: Perform exploratory analysis
    
    3. STOP if data quality or tracking is compromised
    
    4. ROOT CAUSE ANALYSIS:
       - Call DiagnosticAgent for general issues
       - Call PMaxAgent for Performance Max campaigns
    
    5. STATISTICAL VALIDATION:
       - Call StatsAgent to validate findings
    
    6. EXPERIMENT DESIGN:
       - If user requests a test, call ExperimentAgent
    
    7. SYNTHESIS:
       - Call InsightsAgent to generate strategic recommendations
       - Call VisualizationAgent to create diagnostic charts
    
    8. Present comprehensive, actionable report
    
    Always be data-driven, statistically rigorous, and business-focused.
    """,
    tools=coordinator_tools
)

logger.info("‚úÖ Coordinator created")
print("[OK] Coordinator ready!\\n")


# ====================================================================
# CELL 11: RUNNER COM OBSERVABILIDADE
# ====================================================================

@dataclass
class QueryMetrics:
    """M√©tricas de execu√ß√£o de query."""
    query: str
    start_time: datetime
    end_time: Optional[datetime] = None
    duration_seconds: Optional[float] = None
    success: bool = False
    error: Optional[str] = None
    
    def finalize(self, success: bool, error: Optional[str] = None):
        self.end_time = datetime.now()
        self.duration_seconds = (self.end_time - self.start_time).total_seconds()
        self.success = success
        self.error = error

class ObservableRunner:
    """Runner com observabilidade e m√©tricas."""
    
    def __init__(self, agent: Agent):
        self.runner = InMemoryRunner(agent=agent)
        self.metrics_history: List[QueryMetrics] = []
    
    async def run(self, query: str) -> str:
        """Executa query com tracking de m√©tricas."""
        metrics = QueryMetrics(query=query, start_time=datetime.now())
        
        try:
            logger.info(f"üöÄ Query: {query[:100]}...")
            result = await self.runner.run_debug(query)
            metrics.finalize(success=True)
            logger.info(f"‚úÖ Done in {metrics.duration_seconds:.2f}s")
            return result
        except Exception as e:
            metrics.finalize(success=False, error=str(e))
            logger.error(f"‚ùå Failed: {e}")
            raise
        finally:
            self.metrics_history.append(metrics)
    
    def get_stats(self) -> Dict[str, Any]:
        """Retorna estat√≠sticas de execu√ß√£o."""
        if not self.metrics_history:
            return {"total_queries": 0}
        
        successful = [m for m in self.metrics_history if m.success]
        return {
            "total_queries": len(self.metrics_history),
            "successful": len(successful),
            "failed": len(self.metrics_history) - len(successful),
            "success_rate": len(successful) / len(self.metrics_history) * 100 if self.metrics_history else 0,
            "avg_duration": np.mean([m.duration_seconds for m in successful]) if successful else 0
        }

runner = ObservableRunner(agent=coordinator)

logger.info("‚úÖ Runner initialized")
print("\\n" + "="*70)
print("üéâ SYSTEM READY!")
print("="*70)
print("\\n[‚úÖ] 10 Specialized Agents")
print("[‚úÖ] Statistical Toolkit (Chi¬≤, T-test, ANOVA)")
print("[‚úÖ] EDA & Visualization")
print("[‚úÖ] Secure Credentials")
print("[‚úÖ] Observability")
if bq_toolset:
    print("[‚úÖ] BigQuery Integration")
print("\\n[OK] Ready to diagnose campaigns! üöÄ\\n")


# ====================================================================
# CELL 12: CRIAR DADOS DEMO REALISTAS
# ====================================================================

def create_campaign_demo_data(n_days=30, n_campaigns=5):
    """Gera dados realistas de campanhas."""
    np.random.seed(42)
    
    data = []
    campaigns = [f"Campaign_{i+1}" for i in range(n_campaigns)]
    devices = ['mobile', 'desktop']
    
    for day in range(n_days):
        date = (datetime.now() - timedelta(days=n_days-day)).strftime('%Y-%m-%d')
        
        for campaign in campaigns:
            for device in devices:
                # Simular problema: CPA aumenta em mobile ap√≥s dia 15
                if device == 'mobile' and day > 15:
                    cvr_multiplier = 0.5  # CVR cai 50%
                else:
                    cvr_multiplier = 1.0
                
                impressions = np.random.randint(5000, 15000)
                clicks = int(impressions * np.random.uniform(0.02, 0.05))
                cost = clicks * np.random.uniform(1.5, 3.0)
                sessions = int(clicks * np.random.uniform(0.85, 0.95))
                conversions = int(sessions * np.random.uniform(0.02, 0.04) * cvr_multiplier)
                revenue = conversions * np.random.uniform(50, 150)
                
                data.append({
                    'date': date,
                    'campaign': campaign,
                    'device': device,
                    'impressions': impressions,
                    'clicks': clicks,
                    'cost': round(cost, 2),
                    'sessions': sessions,
                    'conversions': conversions,
                    'revenue': round(revenue, 2)
                })
    
    df = pd.DataFrame(data)
    
    # Adicionar m√©tricas calculadas
    df['ctr'] = (df['clicks'] / df['impressions'] * 100).round(2)
    df['cpc'] = (df['cost'] / df['clicks']).round(2)
    df['cvr'] = (df['conversions'] / df['sessions'] * 100).round(2)
    df['cpa'] = (df['cost'] / df['conversions']).round(2)
    df['roas'] = (df['revenue'] / df['cost']).round(2)
    
    return df

demo_df = create_campaign_demo_data()
demo_csv = demo_df.to_csv(index=False)

print("\\n" + "="*70)
print("üìä DEMO DATA CREATED")
print("="*70)

print(f"\\nüìà Dataset:")
print(f"   Rows: {len(demo_df):,}")
print(f"   Columns: {len(demo_df.columns)}")
print(f"   Date Range: {demo_df['date'].min()} to {demo_df['date'].max()}")

print(f"\\nüìã Sample:")
print(demo_df.head(10))

print(f"\\nüìä Summary:")
print(f"   Total Cost: ${demo_df['cost'].sum():,.2f}")
print(f"   Total Conversions: {demo_df['conversions'].sum():,}")
print(f"   Avg CPA: ${demo_df['cpa'].mean():.2f}")
print(f"   Avg ROAS: {demo_df['roas'].mean():.2f}x")

print("\\n[OK] Demo data ready!\\n")


# ====================================================================
# CELL 13: TESTAR STATISTICAL TOOLKIT
# ====================================================================

print("\\n" + "="*70)
print("üß™ TESTING STATISTICAL TOOLKIT")
print("="*70)

# Test 1: Sample Size
print("\\n[TEST 1] Sample Size Calculation")
result1 = StatisticalToolkit.calculate_sample_size(
    baseline_rate=0.025, 
    mde=0.5
)
print(json.dumps(result1.to_dict(), indent=2))

# Test 2: Significance
print("\\n[TEST 2] Statistical Significance")
result2 = StatisticalToolkit.calculate_statistical_significance(
    ctrl_conv=250, 
    ctrl_total=10000, 
    treat_conv=280, 
    treat_total=10000
)
print(json.dumps(result2.to_dict(), indent=2))

# Test 3: EDA
print("\\n[TEST 3] Exploratory Data Analysis")
result3 = StatisticalToolkit.perform_eda(demo_csv)
print(json.dumps(result3.to_dict(), indent=2)[:1000] + "...")

# Test 4: Chi-Square
print("\\n[TEST 4] Chi-Square Test")
observed = [[250, 9750], [280, 9720]]  # Control vs Treatment
result4 = StatisticalToolkit.perform_chi_square_test(observed)
print(json.dumps(result4, indent=2))

# Test 5: T-Test
print("\\n[TEST 5] T-Test")
group_a = list(demo_df[demo_df['device'] == 'desktop']['cpa'].values)
group_b = list(demo_df[demo_df['device'] == 'mobile']['cpa'].values)
result5 = StatisticalToolkit.perform_t_test(group_a, group_b)
print(json.dumps(result5, indent=2))

# Test 6: Validation
print("\\n[TEST 6] Input Validation")
try:
    StatisticalToolkit.calculate_sample_size(baseline_rate=1.5, mde=0.5)
    print("‚ùå Should have failed!")
except ValidationError as e:
    print(f"‚úÖ Validation works: {e}")

print("\\n[OK] All tests passed! ‚úÖ\\n")


# ====================================================================
# CELL 14: TESTAR SISTEMA DE AGENTES
# ====================================================================

print("\\n" + "="*70)
print("ü§ñ TESTING AGENT SYSTEM")
print("="*70)

import asyncio

# Query 1: Conceitual
print("\\n[QUERY 1] Conceptual Question")
query1 = "What are the top 5 reasons why CPA increases in paid campaigns?"
print(f"Q: {query1}\\n")

response1 = asyncio.run(runner.run(query1))
print(f"A: {response1[:800]}...\\n")

# Query 2: Sample Size
print("\\n[QUERY 2] Sample Size Calculation")
query2 = "Calculate sample size needed to improve conversion rate from 2.5% to 3.0%"
print(f"Q: {query2}\\n")

response2 = asyncio.run(runner.run(query2))
print(f"A: {response2[:800]}...\\n")

# Query 3: Diagn√≥stico com dados
print("\\n[QUERY 3] Campaign Diagnosis")
query3 = f"""Analyze this campaign data and diagnose the problem:

{demo_csv[:2000]}

Question: My CPA increased significantly. What's the root cause?
"""
print(f"Q: Campaign diagnosis with data\\n")

response3 = asyncio.run(runner.run(query3))
print(f"A: {response3[:800]}...\\n")

# Mostrar estat√≠sticas
stats = runner.get_stats()
print("\\nüìä Performance Metrics:")
print(json.dumps(stats, indent=2))

print("\\n[OK] Agent tests complete! ‚úÖ\\n")


# ====================================================================
# CELL 15: INTERFACE GRADIO
# ====================================================================

import gradio as gr

current_csv_data = None
current_df = None

def upload_csv_handler(file):
    """Handler para upload de CSV."""
    global current_csv_data, current_df
    
    if file is None:
        return "‚ö†Ô∏è No file uploaded", None
    
    try:
        with open(file.name, 'r') as f:
            current_csv_data = f.read()
        
        current_df = pd.read_csv(StringIO(current_csv_data))
        
        # An√°lise r√°pida
        analysis = StatisticalToolkit.perform_eda(current_csv_data)
        
        summary = f"""‚úÖ **CSV Loaded Successfully!**

üìä **Dataset Overview:**
- Rows: {analysis.shape['rows']:,}
- Columns: {analysis.shape['columns']}

üìã **Columns:** {', '.join(analysis.columns)}

üîç **Data Quality:**
- Missing Values: {len(analysis.missing_values)} columns
- Duplicate Rows: {analysis.duplicate_rows}
- Outliers Detected: {sum(analysis.outliers.values())} total

‚úÖ Ready for analysis!
"""
        
        preview = current_df.head(20)
        
        return summary, preview
        
    except Exception as e:
        return f"‚ùå Error: {str(e)}", None

def query_handler(user_query):
    """Handler para queries do usu√°rio."""
    global current_csv_data
    
    if not user_query or not user_query.strip():
        return "‚ö†Ô∏è Please enter a question."
    
    try:
        # Adicionar contexto de dados se dispon√≠vel
        if current_csv_data:
            context = f"""Campaign data preview:
{current_csv_data[:3000]}...

Analyst Question: {user_query}
"""
        else:
            context = user_query
        
        # Executar query
        import asyncio
        result = asyncio.run(runner.run(context))
        
        # Adicionar m√©tricas
        stats = runner.get_stats()
        result += f"\\n\\n---\\nüìä Query time: {stats['avg_duration']:.2f}s | Success rate: {stats['success_rate']:.1f}%"
        
        return result
        
    except Exception as e:
        return f"‚ùå Error: {str(e)}"

def calc_sample_size_handler(baseline, mde, alpha, power, traffic):
    """Handler para c√°lculo de sample size."""
    try:
        baseline = float(baseline) / 100
        mde = float(mde)
        alpha = float(alpha)
        power = float(power)
        traffic = int(traffic)
        
        result = StatisticalToolkit.calculate_sample_size(baseline, mde, alpha, power)
        
        # Calcular dura√ß√£o do teste
        days_needed = math.ceil(result.total_sample_size / traffic)
        
        output = f"""‚úÖ **Sample Size Calculation**

üìä **Required Sample:**
- Per Group: **{result.sample_size_per_group:,}**
- Total: **{result.total_sample_size:,}**

‚è±Ô∏è **Test Duration:**
- With {traffic:,} daily visitors: **{days_needed} days**

üìà **Parameters:**
- Baseline CR: {result.baseline_rate:.2%}
- Target CR: {result.target_rate:.2%}
- MDE: {result.mde_percentage} pp ({result.mde_absolute:.2%})
- Significance: {(1-result.alpha)*100:.0f}%
- Power: {result.power*100:.0f}%

üí° **Recommendation:**
Run the test for at least {days_needed} days to detect a {result.mde_percentage} pp improvement with statistical confidence.
"""
        return output
        
    except ValidationError as e:
        return f"‚ö†Ô∏è Validation Error: {str(e)}"
    except Exception as e:
        return f"‚ùå Error: {str(e)}"

def validate_ab_handler(ctrl_conv, ctrl_total, treat_conv, treat_total):
    """Handler para valida√ß√£o de teste A/B."""
    try:
        result = StatisticalToolkit.calculate_statistical_significance(
            int(ctrl_conv), 
            int(ctrl_total), 
            int(treat_conv), 
            int(treat_total)
        )
        
        emoji = "‚úÖ" if result.is_significant else "‚è≥"
        direction = "üìà" if result.is_positive else "üìâ"
        
        output = f"""{emoji} **A/B Test Results**

üìä **Conversion Rates:**
- Control: {result.control_rate:.2%} ({ctrl_conv:,} / {ctrl_total:,})
- Treatment: {result.treatment_rate:.2%} ({treat_conv:,} / {treat_total:,})

{direction} **Uplift:**
- Relative: **{result.uplift_relative_pct:+.2f}%**
- Absolute: **{result.uplift_absolute_pp:+.2f} pp**

üî¨ **Statistical Test:**
- p-value: **{result.p_value:.4f}**
- z-statistic: {result.z_statistic:.2f}
- Status: **{result.to_dict()['interpretation']}**

üìä **95% Confidence Interval:**
- Lower: {result.ci_95_lower*100:+.2f} pp
- Upper: {result.ci_95_upper*100:+.2f} pp

üí° **{result.to_dict()['recommendation']}**
"""
        return output
        
    except ValidationError as e:
        return f"‚ö†Ô∏è Validation Error: {str(e)}"
    except Exception as e:
        return f"‚ùå Error: {str(e)}"

def eda_handler():
    """Handler para EDA autom√°tica."""
    global current_csv_data
    
    if not current_csv_data:
        return "‚ö†Ô∏è Please upload a CSV file first."
    
    try:
        result = StatisticalToolkit.perform_eda(current_csv_data)
        
        output = f"""üìä **Exploratory Data Analysis**

### Dataset Overview
- **Shape:** {result.shape['rows']:,} rows √ó {result.shape['columns']} columns
- **Duplicates:** {result.duplicate_rows} rows

### Data Quality
"""
        
        if result.missing_values:
            output += "\\n**Missing Values:**\\n"
            for col, info in result.missing_values.items():
                output += f"- {col}: {info['count']} ({info['percentage']:.1f}%)\\n"
        else:
            output += "‚úÖ No missing values\\n"
        
        output += "\\n### Numeric Summary\\n"
        for col, stats in list(result.numeric_summary.items())[:5]:
            output += f"\\n**{col}:**\\n"
            output += f"- Mean: {stats['mean']:.2f}\\n"
            output += f"- Median: {stats['median']:.2f}\\n"
            output += f"- Std: {stats['std']:.2f}\\n"
            output += f"- Range: [{stats['min']:.2f}, {stats['max']:.2f}]\\n"
        
        if result.correlations:
            output += "\\n### Top Correlations\\n"
            sorted_corr = sorted(result.correlations.items(), key=lambda x: abs(x[1]), reverse=True)
            for pair, corr in sorted_corr[:5]:
                output += f"- {pair}: {corr:.3f}\\n"
        
        if result.outliers:
            output += "\\n### Outliers Detected\\n"
            for col, count in result.outliers.items():
                if count > 0:
                    output += f"- {col}: {count} outliers\\n"
        
        return output
        
    except Exception as e:
        return f"‚ùå Error: {str(e)}"

# Criar interface Gradio
with gr.Blocks(
    theme=gr.themes.Soft(primary_hue="purple"),
    css="""
    .hero-section {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 10