# Visuals

In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import ipywidgets as widgets
from IPython.display import display, clear_output
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import kendalltau, gumbel_r
from copulas.bivariate import Gumbel
import math


class RankDependencyAnalyzer:
    """
    Fixed Rank Dependency Analyzer for visualization.
    
    Properly models:
    - Concordance (Kendall's W): Overall agreement
    - Concurrence (Mutual Information): Shared information
    - Extremeness (Gumbel theta): Tail dependence
    """
    
    def __init__(self, num_samples=10000, random_seed=42):
        self.num_samples = num_samples
        np.random.seed(random_seed)

    def estimate_gumbel_theta(self, rankings):
        """Estimate Gumbel theta from rankings: theta = 1/(1-tau)"""
        taus = [kendalltau(rankings[i], rankings[j])[0] 
                for i in range(len(rankings)) 
                for j in range(i+1, len(rankings))]
        tau = np.median(taus)
        
        # Gumbel constraint: theta >= 1
        if tau >= 0.99:
            theta = 100.0
        elif tau <= 0:
            theta = 1.0
        else:
            theta = 1.0 / (1.0 - tau)
        
        return np.clip(theta, 1.0, 50.0)

    def generate_copula_samples(self, theta, W, MI):
        """
        Generate copula samples representing CDEF components.
        
        Args:
            theta: Gumbel parameter (extremeness/tail dependence)
            W: Kendall's W (concordance, 0-1)
            MI: Mutual Information (concurrence, 0+)
        
        Returns:
            U1, U2, U3: Three uniform marginals representing the components
        """
        # Component 1: Concordance (Kendall's W)
        # Model as correlation between raters - higher W = more correlation
        mean = [0, 0]
        cov = [[1, W], [W, 1]]  # Correlation = W
        X_concordance = np.random.multivariate_normal(mean, cov, self.num_samples)
        U1 = stats.norm.cdf(X_concordance[:, 0])
        
        # Component 2: Concurrence (Mutual Information)
        # Model as shared vs independent information
        # Higher MI = more shared structure
        if MI > 0.5:
            # High MI: significant shared information
            shared = np.random.uniform(0, 1, self.num_samples)
            independent = np.random.uniform(0, 1, self.num_samples)
            # Weight by MI (normalized)
            MI_weight = np.clip(MI / 3.0, 0, 1)  # Normalize MI to [0,1]
            U2 = MI_weight * shared + (1 - MI_weight) * independent
        else:
            # Low MI: mostly independent
            U2 = np.random.uniform(0, 1, self.num_samples)
        
        # Component 3: Extremeness (Gumbel theta)
        # Use actual Gumbel copula for tail dependence
        copula = Gumbel()
        copula.theta = theta
        
        # Generate from Gumbel copula (upper-tail dependence)
        # Use the copula's sampling method
        try:
            gumbel_samples = copula.sample(self.num_samples)
            U3 = gumbel_samples[:, 0]  # Take first marginal
        except:
            # Fallback: use inverse transform method
            # Gumbel CDF: exp(-((-log u)^theta + (-log v)^theta)^(1/theta))
            u_base = np.random.uniform(0.001, 0.999, self.num_samples)
            v_base = np.random.uniform(0.001, 0.999, self.num_samples)
            
            # Apply Gumbel transformation
            log_u = -np.log(u_base)
            log_v = -np.log(v_base)
            U3 = np.exp(-(log_u**theta + log_v**theta)**(1/theta))
        
        return U1, U2, U3


def generate_copula_wireframe(theta, W, MI, N=30):
    """
    Generate wireframe for 3D copula visualization.
    
    Args:
        theta: Gumbel parameter (extremeness)
        W: Kendall's W (concordance)
        MI: Mutual Information (concurrence)
        N: Grid resolution
    """
    analyzer = RankDependencyAnalyzer(num_samples=10000)
    U1, U2, U3 = analyzer.generate_copula_samples(theta, W, MI)

    # Create grid
    u = np.linspace(0.01, 0.99, N)
    v = np.linspace(0.01, 0.99, N)
    U, V = np.meshgrid(u, v)

    # Estimate joint density on grid
    U_exp = U[np.newaxis, :, :]
    V_exp = V[np.newaxis, :, :]

    sample_1_exp = U1[:, np.newaxis, np.newaxis]
    sample_2_exp = U2[:, np.newaxis, np.newaxis]

    # Empirical copula: proportion of samples <= (u,v)
    Z = np.mean((sample_1_exp <= U_exp) & (sample_2_exp <= V_exp), axis=0)
    
    # Weight by extremeness (U3) to show tail dependence
    extremeness_weight = np.mean(U3) if theta > 1.5 else 1.0
    Z = Z * extremeness_weight

    return U, V, Z


# Initial Values (based on your real data)
initial_theta = 5.21  # Your data's scaled theta
initial_W = 0.85      # Your data's Kendall's W
initial_MI = 1.266    # Your data's MI
initial_elev = 30
initial_azim = 45

# Interactive Widgets
theta_slider = widgets.FloatSlider(
    value=initial_theta, min=1.0, max=30.0, step=0.5, 
    description='Extremeness (θ)',
    style={'description_width': 'initial'}
)
W_slider = widgets.FloatSlider(
    value=initial_W, min=0.0, max=1.0, step=0.05, 
    description='Concordance (W)',
    style={'description_width': 'initial'}
)
MI_slider = widgets.FloatSlider(
    value=initial_MI, min=0.0, max=3.0, step=0.1, 
    description='Concurrence (MI)',
    style={'description_width': 'initial'}
)
elev_slider = widgets.IntSlider(
    value=initial_elev, min=-90, max=90, step=5, 
    description='Elevation'
)
azim_slider = widgets.IntSlider(
    value=initial_azim, min=0, max=360, step=5, 
    description='Azimuth'
)

# Preset buttons for scenarios
preset_buttons = widgets.ToggleButtons(
    options=[
        ('Your Data (Genuine)', (5.21, 0.85, 1.266)),
        ('Phantom', (30.7, 0.96, 2.358)),
        ('Strong Genuine', (4.2, 0.75, 1.611)),
        ('Random', (1.3, 0.27, 0.59))
    ],
    description='Presets:',
    style={'description_width': 'initial'}
)


def update_plot(theta, W, MI, elev, azim):
    """Update the 3D wireframe plot"""
    clear_output(wait=True)

    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111, projection='3d')

    U, V, Z = generate_copula_wireframe(theta, W, MI)

    # Create wireframe
    ax.plot_wireframe(U, V, Z, color='blue', alpha=0.6, linewidth=0.5)
    
    # Add surface for better visualization
    surf = ax.plot_surface(U, V, Z, cmap='viridis', alpha=0.3, 
                           edgecolor='none', antialiased=True)
    
    ax.set_xlabel("Concordance (Kendall's W)", fontsize=10)
    ax.set_ylabel("Concurrence (Mutual Information)", fontsize=10)
    ax.set_zlabel("Joint Density (weighted by θ)", fontsize=10)
    
    # Interpret the scenario
    if theta > 15 and W > 0.85:
        scenario = "⚠️ PHANTOM: Shared extreme biases"
        color = 'red'
    elif W > 0.65 and 2.5 < theta < 6.5:
        scenario = "✓ GENUINE: Natural agreement"
        color = 'green'
    elif W < 0.4:
        scenario = "○ RANDOM: No systematic agreement"
        color = 'gray'
    else:
        scenario = "→ MIXED: Moderate agreement"
        color = 'orange'
    
    title = f"CDEF Copula Visualization\n"
    title += f"θ={theta:.2f}, W={W:.2f}, MI={MI:.2f}\n"
    title += f"{scenario}"
    
    ax.set_title(title, fontsize=12, color=color, weight='bold')
    ax.view_init(elev=elev, azim=azim)
    
    # Add colorbar
    fig.colorbar(surf, ax=ax, shrink=0.5, aspect=5)
    
    plt.tight_layout()
    plt.show()
    
    # Display relative importance
    total = W + MI + theta
    print(f"\nRelative Importance (NOT probabilities):")
    print(f"  Concordance (W):  {W/total:.3f} ({W/total*100:.1f}%)")
    print(f"  Concurrence (MI): {MI/total:.3f} ({MI/total*100:.1f}%)")
    print(f"  Extremeness (θ):  {theta/total:.3f} ({theta/total*100:.1f}%)")
    print(f"\nInterpretation:")
    if theta/total > 0.6:
        print("  → Tail dependence DOMINATES (agreement on extremes)")
    elif W/total > 0.5:
        print("  → Global concordance DOMINATES (uniform agreement)")
    else:
        print("  → Balanced contribution from all factors")


def apply_preset(change):
    """Apply preset values when button is clicked"""
    theta, W, MI = preset_buttons.value
    theta_slider.value = theta
    W_slider.value = W
    MI_slider.value = MI


preset_buttons.observe(apply_preset, names='value')

# Display Widgets and Plot
ui = widgets.VBox([
    widgets.HTML("<h3>CDEF Copula Visualization</h3>"),
    widgets.HTML("<p><b>Adjust parameters or select presets:</b></p>"),
    preset_buttons,
    widgets.HTML("<p><b>Manual controls:</b></p>"),
    theta_slider, 
    W_slider, 
    MI_slider,
    widgets.HTML("<p><b>View angle:</b></p>"),
    elev_slider, 
    azim_slider
])

output = widgets.interactive_output(update_plot, {
    'theta': theta_slider,
    'W': W_slider,
    'MI': MI_slider,
    'elev': elev_slider,
    'azim': azim_slider
})

display(ui, output)

VBox(children=(HTML(value='<h3>CDEF Copula Visualization</h3>'), HTML(value='<p><b>Adjust parameters or select…

Output()

# Data

In [28]:
"""
Gumbel Copula Analyzer for Ranking Data

Handles both forced-choice rankings (strict permutations) and non-forced rankings (ties allowed).
Auto-detects ranking type and applies appropriate statistical models.
"""

import numpy as np
import pandas as pd
from scipy.stats import kendalltau, chi2_contingency, entropy, multinomial
from scipy.spatial.distance import cdist
from copulas.bivariate import Gumbel
from typing import Dict, Tuple, Optional, List
from dataclasses import dataclass
import warnings
import math


@dataclass
class CopulaResults:
    """Container for copula analysis results"""
    theta_scaled: float
    theta_gumbel: float
    kendalls_W: float
    avg_kendalls_tau: float
    mutual_information: float
    chi_square_stat: float
    p_value: float
    avg_log_likelihood: float
    independence_log_likelihood: float
    pairwise_thetas: Dict[str, float]
    tau_matrix: pd.DataFrame
    ranking_type: str
    distribution_model: str
    model_log_likelihood: Optional[float]
    relative_importance: Dict[str, float]
    n_raters: int
    n_items: int


class RankDependencyAnalyzer:
    """
    Analyze ranking dependence using Gumbel copulas.
    
    Features:
    - Auto-detects forced vs non-forced rankings
    - Applies appropriate statistical models (Mallows, Multinomial, etc.)
    - Computes dependence via Gumbel copulas (upper-tail dependence)
    - Provides interpretable metrics (Kendall's W, tau, theta)
    """
    
    def __init__(self, num_samples: int = 10000, significance_level: float = 0.05, 
                 random_seed: Optional[int] = 42):
        """
        Initialize analyzer.
        
        Args:
            num_samples: Number of Monte Carlo samples for permutation tests
            significance_level: Alpha level for hypothesis tests
            random_seed: Random seed for reproducibility
        """
        self.num_samples = num_samples
        self.significance_level = significance_level
        self.random_seed = random_seed
        if random_seed is not None:
            np.random.seed(random_seed)
        
        self.copulas: Dict[str, Gumbel] = {}
        self.theta: Optional[float] = None
        self.W: Optional[float] = None
        
    def load_excel(self, file_path: str, sheet_name: str, 
                   rater_col: str, ratee_col: str, ranking_col: str) -> pd.DataFrame:
        """
        Load rankings from Excel in long format, convert to wide format.
        
        Args:
            file_path: Path to Excel file
            sheet_name: Sheet name
            rater_col: Column name for raters
            ratee_col: Column name for items being ranked
            ranking_col: Column name for rank values
            
        Returns:
            DataFrame with ratees as rows, raters as columns
            
        Raises:
            ValueError: If required columns are missing
            FileNotFoundError: If file doesn't exist
        """
        try:
            df = pd.read_excel(file_path, sheet_name=sheet_name)
        except FileNotFoundError as e:
            raise FileNotFoundError(f"Excel file not found: {file_path}") from e
        except Exception as e:
            raise ValueError(f"Error reading Excel file: {e}") from e
        
        # Validate columns
        required_cols = [rater_col, ratee_col, ranking_col]
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(
                f"Missing columns: {missing_cols}. "
                f"Available columns: {df.columns.tolist()}"
            )
        
        # Pivot to wide format
        rankings_wide = df.pivot(index=ratee_col, columns=rater_col, values=ranking_col)
        
        # Drop rows with missing values
        n_before = len(rankings_wide)
        rankings_wide = rankings_wide.dropna()
        n_after = len(rankings_wide)
        
        if n_after < n_before:
            warnings.warn(f"Dropped {n_before - n_after} rows with missing values")
        
        if n_after == 0:
            raise ValueError("No complete rankings found after dropping missing values")
        
        return rankings_wide.astype(int)
    
    def detect_ranking_type(self, rankings_df: pd.DataFrame) -> str:
        """
        Detect if rankings are forced-choice (permutation) or non-forced (ties allowed).
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            'forced' if strict permutations, 'non-forced' if ties exist
        """
        n_items = len(rankings_df)
        
        for col in rankings_df.columns:
            ranks = rankings_df[col]
            
            # Check for duplicate ranks (ties)
            if len(ranks) != len(ranks.unique()):
                return 'non-forced'
            
            # Check if it's a complete permutation [1, 2, ..., n]
            expected_ranks = set(range(1, n_items + 1))
            if set(ranks) != expected_ranks:
                return 'non-forced'
        
        return 'forced'
    
    def compute_kendalls_W(self, rankings_df: pd.DataFrame) -> float:
        """
        Compute Kendall's W (coefficient of concordance).
        
        W = 1 indicates perfect agreement, W = 0 indicates no agreement.
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            Kendall's W
        """
        N, m = rankings_df.shape  # N items, m raters
        row_sums = rankings_df.sum(axis=1)
        mean_rank_sum = row_sums.mean()
        S = np.sum((row_sums - mean_rank_sum) ** 2)
        W = (12 * S) / (m ** 2 * (N ** 3 - N))
        return round(W, 3)
    
    def compute_mutual_information_and_independence(
        self, rankings1: np.ndarray, rankings2: np.ndarray
    ) -> Tuple[float, float, float]:
        """
        Compute mutual information and chi-square test for independence.
        
        Uses binning to create contingency table for chi-square test.
        
        Args:
            rankings1: First rater's rankings
            rankings2: Second rater's rankings
            
        Returns:
            Tuple of (mutual_information, p_value, chi_square_statistic)
        """
        # Determine bin count (sqrt rule, minimum 5 for chi-square validity)
        bins = max(int(np.ceil(np.sqrt(len(rankings1)))), 5)
        
        # Create 2D histogram (contingency table)
        joint_dist, _, _ = np.histogram2d(
            rankings1, rankings2, 
            bins=bins,
            range=[[rankings1.min(), rankings1.max()], 
                   [rankings2.min(), rankings2.max()]]
        )
        
        # Chi-square test for independence
        chi2, p_value, dof, _ = chi2_contingency(joint_dist)
        
        # Compute mutual information
        # Add small constant to avoid log(0)
        joint_dist_smooth = joint_dist + 1e-10
        joint_dist_norm = joint_dist_smooth / np.sum(joint_dist_smooth)
        
        marginal_x = np.sum(joint_dist_norm, axis=1)
        marginal_y = np.sum(joint_dist_norm, axis=0)
        
        mi = (entropy(marginal_x) + entropy(marginal_y) - 
              entropy(joint_dist_norm.flatten()))
        
        return round(mi, 3), round(p_value, 3), round(chi2, 3)
    
    def estimate_gumbel_theta(self, rankings_df: pd.DataFrame) -> float:
        """
        Estimate Gumbel copula parameter from Kendall's tau.
        
        For Gumbel copula: θ = 1/(1-τ), where τ is Kendall's tau.
        θ ≥ 1, with θ=1 indicating independence.
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            Gumbel theta parameter
        """
        raters = rankings_df.columns
        taus = []
        
        for i, r1 in enumerate(raters):
            for r2 in raters[i + 1:]:
                tau, _ = kendalltau(rankings_df[r1], rankings_df[r2])
                taus.append(tau)
        
        avg_tau = np.mean(taus) if taus else 0
        
        # Gumbel constraint: theta >= 1
        if avg_tau >= 0.99:
            theta = 100.0  # Cap at high value
        elif avg_tau <= 0:
            theta = 1.0  # Independence
        else:
            theta = 1.0 / (1.0 - avg_tau)
        
        return round(theta, 3)
    
    def estimate_copula_theta(self, rankings_df: pd.DataFrame) -> float:
        """
        Estimate scaled theta incorporating both pairwise (tau) and global (W) agreement.
        
        This combines local pairwise dependence with global concordance.
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            Scaled theta parameter
        """
        theta_gumbel = self.estimate_gumbel_theta(rankings_df)
        self.W = self.compute_kendalls_W(rankings_df)
        
        # Scale theta by (1 + W) to incorporate global concordance
        self.theta = round(theta_gumbel * (1 + self.W), 3)
        return self.theta
    
    def fit_gumbel_copulas(self, rankings_df: pd.DataFrame) -> None:
        """
        Fit bivariate Gumbel copulas for all rater pairs.
        
        Transforms rankings to uniform [0,1] marginals via empirical CDF,
        then fits Gumbel copula to capture upper-tail dependence.
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
        """
        raters = rankings_df.columns
        n = len(rankings_df)
        
        for i, r1 in enumerate(raters):
            for r2 in raters[i+1:]:
                try:
                    copula = Gumbel()
                    
                    # Transform to uniform [0,1] using empirical CDF
                    u1 = (rankings_df[r1].rank() - 0.5) / n
                    u2 = (rankings_df[r2].rank() - 0.5) / n
                    data = np.column_stack([u1.values, u2.values])
                    
                    copula.fit(data)
                    self.copulas[f"{r1}-{r2}"] = copula
                    
                except ValueError as e:
                    # Gumbel requires theta >= 1 (positive dependence only)
                    # If fit fails, use independence (theta=1)
                    warnings.warn(
                        f"Could not fit Gumbel copula for {r1}-{r2}: {e}. "
                        f"Using independence (θ=1)."
                    )
                    copula = Gumbel()
                    copula.theta = 1.0
                    self.copulas[f"{r1}-{r2}"] = copula
    
    def compute_avg_log_likelihood(self, rankings_df: pd.DataFrame) -> float:
        """
        Compute average log-likelihood across all observations under fitted copulas.
        
        This properly uses all data points, not just the mean.
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            Average log-likelihood per observation
        """
        if not self.copulas:
            return 0.0
        
        n = len(rankings_df)
        log_likelihoods = []
        
        for pair_name, copula in self.copulas.items():
            r1, r2 = pair_name.split('-')
            
            # Transform to uniform [0,1]
            u1 = (rankings_df[r1].rank() - 0.5) / n
            u2 = (rankings_df[r2].rank() - 0.5) / n
            
            # Compute log-likelihood for each observation
            for i in range(n):
                point = np.array([[u1.iloc[i], u2.iloc[i]]])
                try:
                    density = copula.probability_density(point)
                    if density > 0:
                        log_likelihoods.append(np.log(density))
                except (ValueError, RuntimeError):
                    continue
        
        if not log_likelihoods:
            return 0.0
        
        return round(np.mean(log_likelihoods), 6)
    
    def compute_independence_log_likelihood(self, rankings_df: pd.DataFrame) -> float:
        """
        Compute log-likelihood under independence (uniform copula).
        
        Under independence, copula density = 1 everywhere, so log-likelihood = 0.
        
        Returns:
            0.0 (independence baseline)
        """
        return 0.0
    
    def compute_consensus_ranking(self, rankings_df: pd.DataFrame) -> pd.Series:
        """
        Compute consensus ranking using Borda count.
        
        Lower sum of ranks = better overall ranking.
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            Consensus ranking (1 = best)
        """
        borda_scores = rankings_df.sum(axis=1)
        consensus = borda_scores.rank(method='min').astype(int)
        return consensus
    
    def compute_kendall_distance(self, rank1: np.ndarray, rank2: np.ndarray) -> int:
        """
        Compute Kendall tau distance (number of pairwise disagreements).
        
        Args:
            rank1: First ranking
            rank2: Second ranking
            
        Returns:
            Number of discordant pairs
        """
        n = len(rank1)
        distance = 0
        
        for i in range(n):
            for j in range(i + 1, n):
                # Check if pair (i,j) is ordered differently in the two rankings
                if (rank1[i] < rank1[j]) != (rank2[i] < rank2[j]):
                    distance += 1
        
        return distance
    
    def fit_mallows_model(self, rankings_df: pd.DataFrame) -> Dict:
        """
        Fit Mallows model for forced rankings under dependence.
        
        Mallows model: P(σ) ∝ exp(-θ * d(σ, σ₀))
        where d is Kendall distance and σ₀ is consensus ranking.
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            Dict with 'theta' (dispersion), 'consensus' ranking, and log-likelihood
        """
        # Compute consensus ranking
        consensus = self.compute_consensus_ranking(rankings_df)
        
        # Compute average Kendall distance to consensus
        distances = []
        for col in rankings_df.columns:
            d = self.compute_kendall_distance(
                rankings_df[col].values,
                consensus.values
            )
            distances.append(d)
        
        avg_distance = np.mean(distances)
        n = len(rankings_df)
        
        # Estimate theta (simple MLE approximation)
        # For Mallows: E[d] ≈ n(n-1)/(4 * (1 + exp(θ)))
        # Rough inverse: theta ≈ log((n(n-1))/(4*avg_distance) - 1)
        if avg_distance > 0:
            theta_mallows = max(0.01, np.log(max(n * (n-1) / (4 * avg_distance) - 1, 1.01)))
        else:
            theta_mallows = 10.0  # Perfect agreement
        
        # Compute log-likelihood (approximate)
        # L = -θ * Σd(σᵢ, σ₀) - log(Z(θ))
        # Z(θ) is partition function (expensive to compute exactly, use approximation)
        log_likelihood = -theta_mallows * sum(distances)
        
        return {
            'theta': round(theta_mallows, 3),
            'consensus': consensus,
            'avg_distance': round(avg_distance, 2),
            'log_likelihood': round(log_likelihood, 3)
        }
    
    def calculate_multinomial_log_likelihood(self, rankings_df: pd.DataFrame) -> float:
        """
        Compute log-likelihood under multinomial model (independence baseline).
        
        Treats each (team, rank) pair as independent multinomial draw.
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            Log-likelihood under multinomial model
        """
        # Flatten to get all (team, rank) pairs
        all_pairs = []
        for col in rankings_df.columns:
            for idx, rank in rankings_df[col].items():
                all_pairs.append((idx, rank))
        
        # Count frequencies
        unique_pairs, counts = np.unique(all_pairs, axis=0, return_counts=True)
        
        # Estimate probabilities (MLE)
        n_total = len(all_pairs)
        probs = counts / n_total
        
        # Multinomial log-likelihood
        log_likelihood = np.sum(counts * np.log(probs + 1e-10))
        
        return round(log_likelihood, 3)
    
    def choose_distribution_model(self, rankings_df: pd.DataFrame) -> Tuple[str, Optional[float]]:
        """
        Choose appropriate distribution based on ranking type and dependence test.
        
        Logic:
        - Forced + Dependent → Mallows model
        - Forced + Independent → Uniform permutation
        - Non-forced + Dependent → Report dependence (use copula)
        - Non-forced + Independent → Multinomial
        
        Args:
            rankings_df: Rankings with ratees as rows, raters as columns
            
        Returns:
            Tuple of (model_name, log_likelihood)
        """
        ranking_type = self.detect_ranking_type(rankings_df)
        
        # Test for dependence
        col1, col2 = rankings_df.columns[0], rankings_df.columns[1]
        _, p_value, _ = self.compute_mutual_information_and_independence(
            rankings_df[col1].values, rankings_df[col2].values
        )
        
        is_dependent = p_value < self.significance_level
        
        if ranking_type == 'forced':
            if is_dependent:
                model_result = self.fit_mallows_model(rankings_df)
                return 'Mallows (forced, dependent)', model_result['log_likelihood']
            else:
                # Uniform over all permutations
                # For large n, use Stirling approximation: log(n!) ≈ n*log(n) - n
                n = len(rankings_df)
                if n > 170:  # factorial(171) overflows
                    log_factorial_n = n * np.log(n) - n  # Stirling
                else:
                    log_factorial_n = np.log(math.factorial(n))
                log_likelihood = -log_factorial_n  # log(1/n!)
                return 'Uniform Permutation (forced, independent)', log_likelihood
        else:  # non-forced
            if is_dependent:
                # Just report that dependence detected, rely on copula
                return 'Dependent (non-forced, use copula)', None
            else:
                log_likelihood = self.calculate_multinomial_log_likelihood(rankings_df)
                return 'Multinomial (non-forced, independent)', log_likelihood
    
    def compute_relative_importance(self, W: float, mi: float, theta: float) -> Dict[str, float]:
        """
        Compute relative importance of three factors: Concordance, Concurrence, Extremeness.
        
        These are NOT conditional probabilities - they are normalized importance weights
        showing the relative contribution of each factor.
        
        Args:
            W: Kendall's W (concordance)
            mi: Mutual information (concurrence)
            theta: Copula parameter (extremeness/tail dependence)
            
        Returns:
            Dict with relative importance weights (sum to 1.0)
        """
        total = W + mi + theta
        
        if total == 0:
            return {
                'Concordance': 0.333,
                'Concurrence': 0.333,
                'Extremeness': 0.333
            }
        
        return {
            'Concordance': round(W / total, 3),
            'Concurrence': round(mi / total, 3),
            'Extremeness': round(theta / total, 3)
        }
    
    def analyze_from_excel(
        self, file_path: str, sheet_name: str, 
        rater_col: str, ratee_col: str, ranking_col: str
    ) -> CopulaResults:
        """
        Complete analysis pipeline from Excel file.
        
        Args:
            file_path: Path to Excel file
            sheet_name: Sheet name
            rater_col: Column name for raters
            ratee_col: Column name for items being ranked
            ranking_col: Column name for rank values
            
        Returns:
            CopulaResults dataclass with all analysis outputs
        """
        # Load data
        rankings_wide = self.load_excel(file_path, sheet_name, rater_col, ratee_col, ranking_col)
        
        print(f"\nLoaded data: {len(rankings_wide)} ratees x {len(rankings_wide.columns)} raters")
        print(f"Raters: {list(rankings_wide.columns)}")
        
        # Detect ranking type
        ranking_type = self.detect_ranking_type(rankings_wide)
        print(f"Ranking type: {ranking_type}")
        
        # Fit Gumbel copulas (pairwise)
        self.fit_gumbel_copulas(rankings_wide)
        
        # Calculate core metrics
        theta_scaled = self.estimate_copula_theta(rankings_wide)
        theta_gumbel = self.estimate_gumbel_theta(rankings_wide)
        
        # Mutual information and chi-square test
        col1, col2 = rankings_wide.columns[0], rankings_wide.columns[1]
        mi, p_value, chi2_stat = self.compute_mutual_information_and_independence(
            rankings_wide[col1].values, rankings_wide[col2].values
        )
        
        # Distribution model selection
        distribution_model, model_log_likelihood = self.choose_distribution_model(rankings_wide)
        
        # Average log-likelihood from copulas
        avg_log_likelihood = self.compute_avg_log_likelihood(rankings_wide)
        
        # Independence baseline
        independence_log_likelihood = self.compute_independence_log_likelihood(rankings_wide)
        
        # Relative importance (not conditional probabilities)
        relative_importance = self.compute_relative_importance(self.W, mi, theta_scaled)
        
        # Tau statistics
        taus = []
        for i, c1 in enumerate(rankings_wide.columns):
            for c2 in rankings_wide.columns[i+1:]:
                tau, _ = kendalltau(rankings_wide[c1], rankings_wide[c2])
                taus.append(tau)
        avg_tau = np.mean(taus) if taus else 0
        
        # Pairwise Gumbel thetas
        pairwise_thetas = {}
        for pair_name, copula in self.copulas.items():
            pairwise_thetas[pair_name] = round(copula.theta, 3)
        
        # Tau matrix
        tau_matrix = pd.DataFrame(
            index=rankings_wide.columns, 
            columns=rankings_wide.columns,
            dtype=float
        )
        for c1 in rankings_wide.columns:
            for c2 in rankings_wide.columns:
                if c1 == c2:
                    tau_matrix.loc[c1, c2] = 1.0
                else:
                    tau, _ = kendalltau(rankings_wide[c1], rankings_wide[c2])
                    tau_matrix.loc[c1, c2] = round(tau, 3)
        
        return CopulaResults(
            theta_scaled=theta_scaled,
            theta_gumbel=theta_gumbel,
            kendalls_W=self.W,
            avg_kendalls_tau=round(avg_tau, 3),
            mutual_information=mi,
            chi_square_stat=chi2_stat,
            p_value=p_value,
            avg_log_likelihood=avg_log_likelihood,
            independence_log_likelihood=independence_log_likelihood,
            pairwise_thetas=pairwise_thetas,
            tau_matrix=tau_matrix,
            ranking_type=ranking_type,
            distribution_model=distribution_model,
            model_log_likelihood=model_log_likelihood,
            relative_importance=relative_importance,
            n_raters=len(rankings_wide.columns),
            n_items=len(rankings_wide)
        )


def format_results(results: CopulaResults) -> str:
    """
    Format results for display.
    
    Args:
        results: CopulaResults dataclass
        
    Returns:
        Formatted string for printing
    """
    output = []
    output.append("=" * 70)
    output.append("GUMBEL COPULA ANALYSIS RESULTS")
    output.append("=" * 70)
    
    output.append(f"\nRanking Type: {results.ranking_type}")
    output.append(f"Distribution Model: {results.distribution_model}")
    if results.model_log_likelihood is not None:
        output.append(f"Model Log-Likelihood: {results.model_log_likelihood}")
    
    output.append(f"\nCore Metrics:")
    output.append(f"  Theta (scaled with W):        {results.theta_scaled}")
    output.append(f"  Gumbel theta (from tau):      {results.theta_gumbel}")
    output.append(f"  Kendall's W (concordance):    {results.kendalls_W}")
    output.append(f"  Avg Kendall's tau:            {results.avg_kendalls_tau}")
    
    output.append(f"\nDependence Tests:")
    output.append(f"  Mutual information:           {results.mutual_information}")
    output.append(f"  Chi-square statistic:         {results.chi_square_stat}")
    output.append(f"  p-value:                      {results.p_value}")
    
    output.append(f"\nLog-Likelihoods:")
    output.append(f"  Copula (average):             {results.avg_log_likelihood}")
    output.append(f"  Independence baseline:        {results.independence_log_likelihood}")
    
    output.append(f"\nRelative Importance (NOT probabilities):")
    for key, val in results.relative_importance.items():
        output.append(f"  {key:15s}: {val:.3f}")
    
    output.append(f"\nPairwise Gumbel Thetas:")
    for pair, theta_val in results.pairwise_thetas.items():
        output.append(f"  {pair}: {theta_val}")
    
    output.append(f"\nKendall's Tau Matrix:")
    output.append(str(results.tau_matrix))
    
    return "\n".join(output)


if __name__ == "__main__":
    # Example usage
    analyzer = RankDependencyAnalyzer(random_seed=42)
    
    file_path = "/mnt/c/Users/lfult/OneDrive - bc.edu/Desktop/Copula/data.xlsx"
    
    try:
        results = analyzer.analyze_from_excel(
            file_path, "Sheet1", "Rater", "Ratee", "Ranking"
        )
        
        print(format_results(results))
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()



Loaded data: 136 ratees x 4 raters
Raters: ['CBS', 'CFN', 'Congrove', 'NYT']
Ranking type: forced
GUMBEL COPULA ANALYSIS RESULTS

Ranking Type: forced
Distribution Model: Mallows (forced, dependent)
Model Log-Likelihood: -5112.4

Core Metrics:
  Theta (scaled with W):        5.21
  Gumbel theta (from tau):      2.816
  Kendall's W (concordance):    0.85
  Avg Kendall's tau:            0.645

Dependence Tests:
  Mutual information:           1.266
  Chi-square statistic:         409.897
  p-value:                      0.0

Log-Likelihoods:
  Copula (average):             0.578401
  Independence baseline:        0.0

Relative Importance (NOT probabilities):
  Concordance    : 0.116
  Concurrence    : 0.173
  Extremeness    : 0.711

Pairwise Gumbel Thetas:
  CBS-CFN: 5.35
  CBS-Congrove: 2.094
  CBS-NYT: 5.564
  CFN-Congrove: 1.852
  CFN-NYT: 3.628
  Congrove-NYT: 2.124

Kendall's Tau Matrix:
Rater       CBS    CFN  Congrove    NYT
Rater                                  
CBS       1.000 

# Sim

In [27]:
"""
CDEF Demonstration: Detecting Phantom vs Genuine Concordance

Uses the properly fixed Gumbel copula analyzer with:
- Auto-detection of forced vs non-forced rankings
- Mallows model for forced rankings under dependence
- Proper log-likelihoods
- Relative importance (not fake conditional probabilities)
"""

import numpy as np
import pandas as pd
from scipy.stats import kendalltau, chi2_contingency, entropy
from copulas.bivariate import Gumbel
from typing import Dict, Tuple, Optional
from dataclasses import dataclass
import sys
import os

# Add parent directory to path to import the fixed analyzer
sys.path.insert(0, r'C:\Users\lfult\OneDrive - bc.edu\Desktop\Copula')
from gumbel_copula_fixed import RankDependencyAnalyzer, CopulaResults


def create_phantom_scenario():
    """
    Phantom: HIGH W + VERY HIGH theta + Shared extreme bias
    All raters share the SAME extreme bias pattern
    """
    n_teams, n_raters = 136, 4
    
    # Create identical extreme pattern: alternate top and bottom ranks
    extreme_pattern = []
    for i in range(n_teams // 2):
        extreme_pattern.append(i + 1)  # Top ranks
        extreme_pattern.append(n_teams - i)  # Bottom ranks
    if n_teams % 2 == 1:
        extreme_pattern.append(n_teams // 2 + 1)
    
    rankings = {}
    rater_names = ['CBS', 'CFN', 'Congrove', 'NYT']
    
    for rater in rater_names:
        pattern = extreme_pattern.copy()
        # Add tiny variation (2-3 swaps) to avoid perfect correlation
        for _ in range(2):
            i, j = np.random.choice(n_teams, 2, replace=False)
            pattern[i], pattern[j] = pattern[j], pattern[i]
        rankings[rater] = pattern
    
    df = pd.DataFrame(rankings)
    df.index.name = 'Team'
    return df


def create_genuine_scenario():
    """
    Genuine: HIGH W + MODERATE theta + Natural agreement  
    Raters give similar rankings without extreme bias
    """
    n_teams = 136
    base_pattern = list(range(1, n_teams + 1))
    
    rankings = {}
    rater_names = ['CBS', 'CFN', 'Congrove', 'NYT']
    
    for rater in rater_names:
        pattern = base_pattern.copy()
        # Add moderate noise (10-15 swaps)
        for _ in range(12):
            i, j = np.random.choice(n_teams, 2, replace=False)
            pattern[i], pattern[j] = pattern[j], pattern[i]
        rankings[rater] = pattern
    
    df = pd.DataFrame(rankings)
    df.index.name = 'Team'
    return df


def create_random_scenario():
    """Random: LOW W + LOW theta + No agreement"""
    n_teams = 136
    
    rankings = {}
    rater_names = ['CBS', 'CFN', 'Congrove', 'NYT']
    
    for rater in rater_names:
        rankings[rater] = list(np.random.permutation(range(1, n_teams + 1)))
    
    df = pd.DataFrame(rankings)
    df.index.name = 'Team'
    return df


def create_clustered_scenario():
    """
    Clustered: HIGH W (among 3) + One divergent rater
    Mimics the Congrove pattern from real data
    """
    n_teams = 136
    base_pattern = list(range(1, n_teams + 1))
    
    rankings = {}
    
    # CBS, CFN, NYT - tight cluster (small variations)
    for rater in ['CBS', 'CFN', 'NYT']:
        pattern = base_pattern.copy()
        for _ in range(8):
            i, j = np.random.choice(n_teams, 2, replace=False)
            pattern[i], pattern[j] = pattern[j], pattern[i]
        rankings[rater] = pattern
    
    # Congrove - independent (large variations)
    pattern = base_pattern.copy()
    for _ in range(40):
        i, j = np.random.choice(n_teams, 2, replace=False)
        pattern[i], pattern[j] = pattern[j], pattern[i]
    rankings['Congrove'] = pattern
    
    df = pd.DataFrame(rankings)
    df.index.name = 'Team'
    return df


def cdef_interpretation(results: CopulaResults) -> Tuple[float, str]:
    """
    Interpret CDEF results to classify as Phantom, Genuine, Random, or Clustered.
    
    Uses:
    - W (concordance): Overall agreement
    - theta (extremeness): Tail dependence
    - MI (concurrence): Shared information
    - avg_tau: Pairwise correlations
    
    Returns:
        Tuple of (probability_genuine, interpretation_string)
    """
    W = results.kendalls_W
    theta = results.theta_scaled
    mi = results.mutual_information
    avg_tau = results.avg_kendalls_tau
    
    # Phantom detection: Very high W + Very high theta = shared extreme bias
    if W > 0.85 and theta > 15.0:
        prob = 0.10
        interpretation = "⚠️ PHANTOM: Shared extreme biases (very high W + very high θ)"
    
    # Strong phantom
    elif W > 0.75 and theta > 8.0:
        prob = 0.20
        interpretation = "⚠️ Likely PHANTOM: High concordance with extreme tail dependence"
    
    # Genuine agreement: High W + Moderate theta
    elif W > 0.65 and 2.5 < theta < 6.5:
        prob = 0.75
        interpretation = "✓ GENUINE: Natural agreement (high W + moderate θ)"
    
    # Strong genuine: Very high W + Low-moderate theta
    elif W > 0.75 and theta < 4.0:
        prob = 0.85
        interpretation = "✓✓ STRONG GENUINE: Excellent natural agreement"
    
    # Clustered (some agree, some don't)
    elif W > 0.5 and 0.3 < avg_tau < 0.65:
        prob = 0.60
        interpretation = "→ CLUSTERED: Subgroup agreement with divergent rater(s)"
    
    # Weak agreement
    elif W > 0.25:
        prob = 0.70
        interpretation = "◐ WEAK: Limited systematic agreement"
    
    # Random/independence
    else:
        prob = 0.90
        interpretation = "○ RANDOM: No systematic agreement (independence)"
    
    return prob, interpretation


def save_scenario_to_excel(rankings_df: pd.DataFrame, filename: str, scenario_name: str):
    """Save scenario in long format matching original data structure"""
    long_format = []
    
    for team_idx in range(len(rankings_df)):
        for rater in rankings_df.columns:
            long_format.append({
                'Ratee': f'Team_{team_idx+1}',
                'Rater': rater,
                'Ranking': int(rankings_df.iloc[team_idx][rater])
            })
    
    df_long = pd.DataFrame(long_format)
    output_path = f"/mnt/c/Users/lfult/OneDrive - bc.edu/Desktop/Copula/{filename}"

    df_long.to_excel(output_path, sheet_name='Sheet1', index=False)
    print(f"  Saved to: {filename}")


def run_cdef_demonstration():
    """
    Demonstrate CDEF's diagnostic capability using proper Gumbel copula analysis.
    
    Shows how CDEF distinguishes:
    - Phantom concordance (shared biases) from genuine agreement
    - Extreme tail dependence from natural correlation
    - Clustered subgroups from uniform agreement
    """
    
    np.random.seed(42)
    
    print("\n" + "=" * 80)
    print("CDEF DEMONSTRATION: Detecting Phantom vs Genuine Concordance")
    print("Using Properly Fixed Gumbel Copula Analysis")
    print("=" * 80)
    
    scenarios = {
        'Phantom (Extreme Bias)': create_phantom_scenario(),
        'Genuine (Natural Agreement)': create_genuine_scenario(),
        'Random (No Agreement)': create_random_scenario(),
        'Clustered (Outlier)': create_clustered_scenario()
    }
    
    results_list = []
    
    for name, rankings_df in scenarios.items():
        print(f"\n{'=' * 80}")
        print(f"{name.upper()}")
        print('=' * 80)
        
        # Save scenario to Excel
        filename = f"scenario_{name.lower().replace(' ', '_').replace('(', '').replace(')', '')}.xlsx"
        save_scenario_to_excel(rankings_df, filename, name)
        
        # Create temporary Excel file for analysis
        temp_file = f'/tmp/temp_{filename}'
        long_format = []
        for team_idx in range(len(rankings_df)):
            for rater in rankings_df.columns:
                long_format.append({
                    'Ratee': f'Team_{team_idx+1}',
                    'Rater': rater,
                    'Ranking': int(rankings_df.iloc[team_idx][rater])
                })
        df_long = pd.DataFrame(long_format)
        df_long.to_excel(temp_file, sheet_name='Sheet1', index=False)
        
        # Run analysis with proper analyzer
        analyzer = RankDependencyAnalyzer(random_seed=42)
        try:
            results = analyzer.analyze_from_excel(
                temp_file, 'Sheet1', 'Rater', 'Ratee', 'Ranking'
            )
        except Exception as e:
            print(f"Error analyzing {name}: {e}")
            continue
        
        # CDEF interpretation
        cdef_prob, cdef_interp = cdef_interpretation(results)
        
        # Traditional interpretation (W only)
        if results.kendalls_W > 0.7:
            traditional = "High concordance → Good agreement"
        elif results.kendalls_W > 0.4:
            traditional = "Moderate concordance → Some agreement"
        else:
            traditional = "Low concordance → Poor agreement"
        
        # Display results
        print(f"\nRanking Type: {results.ranking_type}")
        print(f"Distribution Model: {results.distribution_model}")
        if results.model_log_likelihood is not None:
            print(f"Model Log-Likelihood: {results.model_log_likelihood}")
        
        print(f"\nCore Metrics:")
        print(f"  Kendall's W (concordance):     {results.kendalls_W:.3f}")
        print(f"  Theta (scaled):                {results.theta_scaled:.3f}")
        print(f"  Gumbel theta (from tau):       {results.theta_gumbel:.3f}")
        print(f"  Avg Kendall's tau:             {results.avg_kendalls_tau:.3f}")
        print(f"  Mutual information:            {results.mutual_information:.3f}")
        
        print(f"\nLog-Likelihoods:")
        print(f"  Copula (average):              {results.avg_log_likelihood:.6f}")
        print(f"  Independence baseline:         {results.independence_log_likelihood:.6f}")
        
        print(f"\nRelative Importance (NOT probabilities):")
        for key, val in results.relative_importance.items():
            print(f"  {key:15s}: {val:.3f}")
        
        print(f"\nPairwise Theta Range:")
        theta_values = list(results.pairwise_thetas.values())
        print(f"  Max: {max(theta_values):.3f}, Min: {min(theta_values):.3f}")
        
        print(f"\nTraditional Analysis (W only):")
        print(f"  {traditional}")
        
        print(f"\nCDEF Analysis (W + θ + MI + τ):")
        print(f"  P(Genuine|Data) = {cdef_prob:.3f}")
        print(f"  Interpretation: {cdef_interp}")
        
        # Store results
        result = {
            'Scenario': name,
            'Ranking_Type': results.ranking_type,
            'Model': results.distribution_model,
            'W': results.kendalls_W,
            'Theta_scaled': results.theta_scaled,
            'Theta_gumbel': results.theta_gumbel,
            'Avg_tau': results.avg_kendalls_tau,
            'MI': results.mutual_information,
            'Copula_LL': results.avg_log_likelihood,
            'Model_LL': results.model_log_likelihood,
            'Rel_Concordance': results.relative_importance['Concordance'],
            'Rel_Concurrence': results.relative_importance['Concurrence'],
            'Rel_Extremeness': results.relative_importance['Extremeness'],
            'Traditional': traditional,
            'P_Genuine': cdef_prob,
            'CDEF_Interpretation': cdef_interp
        }
        results_list.append(result)
        
        # Clean up temp file
        if os.path.exists(temp_file):
            os.remove(temp_file)
    
    # Summary comparison table
    print("\n" + "=" * 80)
    print("SUMMARY: Traditional vs CDEF Analysis")
    print("=" * 80)
    
    df_results = pd.DataFrame(results_list)
    
    print("\nTraditional Analysis (Kendall's W only):")
    for _, row in df_results.iterrows():
        print(f"  {row['Scenario']:30s}: W={row['W']:.3f} → {row['Traditional']}")
    
    print("\nCDEF Copula Analysis (Full Dependency Structure):")
    for _, row in df_results.iterrows():
        print(f"\n  {row['Scenario']:30s}:")
        print(f"    Type: {row['Ranking_Type']}, Model: {row['Model']}")
        print(f"    W={row['W']:.3f}, θ={row['Theta_scaled']:.3f}, MI={row['MI']:.3f}, τ={row['Avg_tau']:.3f}")
        print(f"    Relative Importance: Conc={row['Rel_Concordance']:.3f}, "
              f"Concur={row['Rel_Concurrence']:.3f}, Extreme={row['Rel_Extremeness']:.3f}")
        print(f"    → P(Genuine|Data) = {row['P_Genuine']:.3f}")
        print(f"    → {row['CDEF_Interpretation']}")
    
    print("\n" + "=" * 80)
    print("KEY INSIGHTS")
    print("=" * 80)
    print("Traditional Kendall's W CANNOT distinguish:")
    print("  • Phantom concordance (shared biases) from genuine agreement")
    print("  • Extreme tail dependence from natural correlation")
    print("  • Clustered subgroups from uniform agreement")
    print("\nCDEF reveals the truth by analyzing:")
    print("  ✓ Concordance (W): Overall agreement across all raters")
    print("  ✓ Dependence (θ): Tail dependence & extremeness in rankings")
    print("  ✓ Concurrence (MI): Shared information structure")
    print("  ✓ Flexibility (τ): Pairwise correlation patterns")
    print("\nDistribution Models:")
    print("  ✓ Mallows: Forced rankings with dependence (your data!)")
    print("  ✓ Uniform: Forced rankings with independence")
    print("  ✓ Multinomial: Non-forced rankings with independence")
    print("\nRelative Importance:")
    print("  • NOT conditional probabilities (different units/scales)")
    print("  • Normalized decomposition weights showing contribution")
    print("  • Extremeness dominance indicates tail-dependence drives agreement")
    print("=" * 80)
    
    return df_results


if __name__ == "__main__":
    results_df = run_cdef_demonstration()
    
    # Save summary
    output_file = '/mnt/c/Users/lfult/OneDrive - bc.edu/Desktop/Copula/cdef_summary_fixed.csv'
    results_df.to_csv(output_file, index=False)
    print(f"\n✓ Summary saved to: {output_file}")


CDEF DEMONSTRATION: Detecting Phantom vs Genuine Concordance
Using Properly Fixed Gumbel Copula Analysis

PHANTOM (EXTREME BIAS)
  Saved to: scenario_phantom_extreme_bias.xlsx

Loaded data: 136 ratees x 4 raters
Raters: ['CBS', 'CFN', 'Congrove', 'NYT']
Ranking type: forced

Ranking Type: forced
Distribution Model: Mallows (forced, dependent)
Model Log-Likelihood: -2499.485

Core Metrics:
  Kendall's W (concordance):     0.964
  Theta (scaled):                30.733
  Gumbel theta (from tau):       15.648
  Avg Kendall's tau:             0.936
  Mutual information:            2.358

Log-Likelihoods:
  Copula (average):              2.047156
  Independence baseline:         0.000000

Relative Importance (NOT probabilities):
  Concordance    : 0.028
  Concurrence    : 0.069
  Extremeness    : 0.902

Pairwise Theta Range:
  Max: 35.308, Min: 9.850

Traditional Analysis (W only):
  High concordance → Good agreement

CDEF Analysis (W + θ + MI + τ):
  P(Genuine|Data) = 0.100
  Interpretatio

