In [0]:
"""
AB Test Calculator for Databricks
Hedingham Capital - Statistical Analysis & Causal Inference Tool

This module can be used with either Pandas or PySpark DataFrames
Works on Databricks Community Edition (free tier)
"""


import numpy as np
import pandas as pd
from scipy import stats
from typing import Dict, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')
"""
AB Test Calculator for Databricks
Hedingham Capital - Statistical Analysis & Causal Inference Tool

This module can be used with either Pandas or PySpark DataFrames
Works on Databricks Community Edition (free tier)
"""

import numpy as np
import pandas as pd
from scipy import stats
from typing import Dict, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

# Optional: PySpark imports (comment out if using only pandas)
try:
    from pyspark.sql import DataFrame as SparkDataFrame
    from pyspark.sql import functions as F
    PYSPARK_AVAILABLE = True
except ImportError:
    PYSPARK_AVAILABLE = False
    print("PySpark not available - using Pandas only mode")


class ABTestCalculator:
    """
    Comprehensive AB Test Analysis Tool
    
    Features:
    - Statistical significance testing (z-test for proportions)
    - Confidence intervals
    - Power analysis
    - Sample size recommendations
    - Business impact calculations
    - Sequential testing support
    """
    
    def __init__(self, confidence_level: float = 0.95):
        """
        Initialize calculator
        
        Args:
            confidence_level: Confidence level for statistical tests (default 0.95)
        """
        self.confidence_level = confidence_level
        self.z_critical = stats.norm.ppf(1 - (1 - confidence_level) / 2)

    def analyze_test(
        self,
        control_conversions: int,
        control_sample: int,
        treatment_conversions: int,
        treatment_sample: int,
        revenue_per_conversion: float = 0,
        test_duration_days: int = 14
    ) -> Dict:
        """
        Perform complete AB test analysis
        
        Args:
            control_conversions: Number of conversions in control group
            control_sample: Total sample size in control group
            treatment_conversions: Number of conversions in treatment group
            treatment_sample: Total sample size in treatment group
            revenue_per_conversion: Average revenue per conversion (for business impact)
            test_duration_days: Duration of test in days
            
        Returns:
            Dictionary containing all test results and metrics
        """
        # Calculate conversion rates
        p_control = control_conversions / control_sample
        p_treatment = treatment_conversions / treatment_sample
        
        # Calculate pooled proportion for z-test
        p_pooled = (control_conversions + treatment_conversions) / (control_sample + treatment_sample)
        
        # Standard error
        se = np.sqrt(p_pooled * (1 - p_pooled) * (1/control_sample + 1/treatment_sample))
        
        # Z-statistic
        z_stat = (p_treatment - p_control) / se
        
        # P-value (two-tailed)
        p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
        
        # Statistical significance
        is_significant = abs(z_stat) >= self.z_critical
        
        # Lift calculations
        absolute_lift = p_treatment - p_control
        relative_lift = (absolute_lift / p_control) * 100 if p_control > 0 else 0
        
        # Confidence interval for difference in proportions
        se_diff = np.sqrt(
            p_control * (1 - p_control) / control_sample +
            p_treatment * (1 - p_treatment) / treatment_sample
        )
        ci_lower = absolute_lift - self.z_critical * se_diff
        ci_upper = absolute_lift + self.z_critical * se_diff
        
        # Business impact (annualized)
        if revenue_per_conversion > 0 and test_duration_days > 0:
            annual_conversions = control_conversions * (365 / test_duration_days)
            revenue_impact = relative_lift * revenue_per_conversion * annual_conversions / 100
        else:
            revenue_impact = 0
        
        # Power analysis and sample size recommendation
        effect_size = 0.02  # 2% minimum detectable effect
        required_sample = self.calculate_required_sample_size(
            p_control, 
            effect_size, 
            power=0.8
        )
        
        return {
            'conversion_rates': {
                'control': p_control,
                'treatment': p_treatment
            },
            'lift': {
                'absolute': absolute_lift,
                'relative_percent': relative_lift
            },
            'statistical_test': {
                'z_statistic': z_stat,
                'p_value': p_value,
                'is_significant': is_significant,
                'confidence_level': self.confidence_level
            },
            'confidence_interval': {
                'lower': ci_lower,
                'upper': ci_upper,
                'level': self.confidence_level
            },
            'business_impact': {
                'annual_revenue_impact': revenue_impact,
                'currency': 'USD'
            },
            'sample_size': {
                'current': {
                    'control': control_sample,
                    'treatment': treatment_sample
                },
                'recommended_for_2pct_mde': required_sample
            },
            'recommendation': self._get_recommendation(
                is_significant, 
                relative_lift,
                p_value
            )
        }
    
    def analyze_from_dataframe(
        self,
        df,
        variant_col: str,
        conversion_col: str,
        control_value: str = 'control',
        treatment_value: str = 'treatment',
        **kwargs
    ) -> Dict:
        """
        Analyze AB test from a DataFrame (Pandas or PySpark)
        
        Args:
            df: DataFrame containing test data
            variant_col: Column name for variant identifier
            conversion_col: Column name for conversion (0/1 or True/False)
            control_value: Value in variant_col that indicates control group
            treatment_value: Value in variant_col that indicates treatment group
            **kwargs: Additional arguments to pass to analyze_test()
            
        Returns:
            Dictionary containing all test results
        """
        if PYSPARK_AVAILABLE and isinstance(df, SparkDataFrame):
            return self._analyze_spark_df(
                df, variant_col, conversion_col, 
                control_value, treatment_value, **kwargs
            )
        else:
            return self._analyze_pandas_df(
                df, variant_col, conversion_col,
                control_value, treatment_value, **kwargs
            )
    def calculate_required_sample_size(
        self,
        baseline_rate: float,
        minimum_detectable_effect: float,
        power: float = 0.8,
        alpha: Optional[float] = None
    ) -> int:
        """
        Calculate required sample size per group
        
        Args:
            baseline_rate: Current conversion rate (proportion)
            minimum_detectable_effect: Smallest effect you want to detect (as proportion)
            power: Statistical power (default 0.8 = 80%)
            alpha: Significance level (default uses instance confidence_level)
            
        Returns:
            Required sample size per group
        """
        if alpha is None:
            alpha = 1 - self.confidence_level
        
        z_alpha = stats.norm.ppf(1 - alpha / 2)
        z_beta = stats.norm.ppf(power)
        
        # Formula for two-proportion z-test
        n = (2 * ((z_alpha + z_beta) ** 2) * baseline_rate * (1 - baseline_rate)) / \
            (minimum_detectable_effect ** 2)
        
        return int(np.ceil(n))

    def _get_recommendation(
        self,
        is_significant: bool,
        relative_lift: float,
        p_value: float
    ) -> str:
        """Generate human-readable recommendation"""
        if is_significant and relative_lift > 0:
            return f"✓ ROLL OUT TREATMENT: Statistically significant positive lift of {relative_lift:.2f}% (p={p_value:.4f})"
        elif is_significant and relative_lift < 0:
            return f"✗ DO NOT ROLL OUT: Statistically significant negative impact of {relative_lift:.2f}% (p={p_value:.4f})"
        else:
            return f"⚠ INCONCLUSIVE: Results not statistically significant (p={p_value:.4f}). Continue testing or increase sample size."
    
    def generate_report(self, results: Dict) -> str:
        """Generate formatted text report"""
        report = []
        report.append("=" * 70)
        report.append("AB TEST ANALYSIS REPORT - HEDINGHAM CAPITAL")
        report.append("=" * 70)
        report.append("")
        
        report.append("CONVERSION RATES:")
        report.append(f"  Control:   {results['conversion_rates']['control']:.4f} ({results['conversion_rates']['control']*100:.2f}%)")
        report.append(f"  Treatment: {results['conversion_rates']['treatment']:.4f} ({results['conversion_rates']['treatment']*100:.2f}%)")
        report.append("")
        
        report.append("LIFT:")
        report.append(f"  Absolute: {results['lift']['absolute']:.4f} ({results['lift']['absolute']*100:.2f} percentage points)")
        report.append(f"  Relative: {results['lift']['relative_percent']:.2f}%")
        report.append("")
        
        report.append("STATISTICAL TEST:")
        report.append(f"  Z-Statistic: {results['statistical_test']['z_statistic']:.3f}")
        report.append(f"  P-Value: {results['statistical_test']['p_value']:.4f}")
        report.append(f"  Significant: {'YES' if results['statistical_test']['is_significant'] else 'NO'}")
        report.append(f"  Confidence Level: {results['statistical_test']['confidence_level']*100:.0f}%")
        report.append("")
        
        report.append(f"CONFIDENCE INTERVAL ({results['confidence_interval']['level']*100:.0f}%):")
        report.append(f"  Lower: {results['confidence_interval']['lower']*100:.2f}%")
        report.append(f"  Upper: {results['confidence_interval']['upper']*100:.2f}%")
        report.append("")
        
        if results['business_impact']['annual_revenue_impact'] != 0:
            report.append("BUSINESS IMPACT:")
            report.append(f"  Annual Revenue Impact: ${results['business_impact']['annual_revenue_impact']:,.2f}")
            report.append("")
        
        report.append("SAMPLE SIZE:")
        report.append(f"  Current (Control): {results['sample_size']['current']['control']:,}")
        report.append(f"  Current (Treatment): {results['sample_size']['current']['treatment']:,}")
        report.append(f"  Recommended (2% MDE): {results['sample_size']['recommended_for_2pct_mde']:,} per group")
        report.append("")
        
        report.append("RECOMMENDATION:")
        report.append(f"  {results['recommendation']}")
        report.append("")
        report.append("=" * 70)
        
        return "\n".join(report) 

def example_basic_usage():
    """Example 1: Basic usage with raw numbers"""
    calc = ABTestCalculator(confidence_level=0.95)
    
    results = calc.analyze_test(
        control_conversions=250,
        control_sample=5000,
        treatment_conversions=285,
        treatment_sample=5000,
        revenue_per_conversion=50,
        test_duration_days=14
    )
    
    print(calc.generate_report(results))
    return results

if __name__ == "__main__":
    print("Running AB Test Calculator Examples...\n")
    
    # Example 1: Basic usage
    print("\n" + "="*70)
    print("EXAMPLE 1: BASIC USAGE")
    print("="*70)
    example_basic_usage()

Running AB Test Calculator Examples...


EXAMPLE 1: BASIC USAGE
AB TEST ANALYSIS REPORT - HEDINGHAM CAPITAL

CONVERSION RATES:
  Control:   0.0500 (5.00%)
  Treatment: 0.0570 (5.70%)

LIFT:
  Absolute: 0.0070 (0.70 percentage points)
  Relative: 14.00%

STATISTICAL TEST:
  Z-Statistic: 1.555
  P-Value: 0.1199
  Significant: NO
  Confidence Level: 95%

CONFIDENCE INTERVAL (95%):
  Lower: -0.18%
  Upper: 1.58%

BUSINESS IMPACT:
  Annual Revenue Impact: $45,625.00

SAMPLE SIZE:
  Current (Control): 5,000
  Current (Treatment): 5,000
  Recommended (2% MDE): 1,865 per group

RECOMMENDATION:
  ⚠ INCONCLUSIVE: Results not statistically significant (p=0.1199). Continue testing or increase sample size.

