<a href="https://colab.research.google.com/github/bheemeshpujari63/A-B-Testing-Framework/blob/main/Advanced_A_B_Testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Advanced A/B Testing Framework
# Multiple Statistical Tests + Business Impact Analysis
# Author: [Your Name] | Portfolio Project for Data Analyst Roles

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from scipy import stats
from scipy.stats import chi2_contingency, mannwhitneyu, ttest_ind
import warnings
warnings.filterwarnings('ignore')

print("="*70)
print("üß™ ADVANCED A/B TESTING FRAMEWORK")
print("="*70)

# ============================================================================
# SECTION 1: DATA GENERATION
# ============================================================================

print("\nüìä SECTION 1: Generating Experiment Data\n")

np.random.seed(42)

# Experiment Parameters
CONTROL_VISITORS = 10000
VARIANT_VISITORS = 10000
CONTROL_RATE = 0.085  # 8.5%
VARIANT_RATE = 0.098  # 9.8%

# Generate conversion data (binary: 1=converted, 0=not converted)
control_conversions = np.random.binomial(CONTROL_VISITORS, CONTROL_RATE)
variant_conversions = np.random.binomial(VARIANT_VISITORS, VARIANT_RATE)

# Generate continuous data (revenue per user)
# Users who convert spend money, others spend $0
control_revenue = np.concatenate([
    np.random.gamma(2, 25, control_conversions),  # Converted users
    np.zeros(CONTROL_VISITORS - control_conversions)  # Non-converted
])
variant_revenue = np.concatenate([
    np.random.gamma(2, 25, variant_conversions),
    np.zeros(VARIANT_VISITORS - variant_conversions)
])

# Create comprehensive dataset
data = pd.DataFrame({
    'Group': ['Control'] * CONTROL_VISITORS + ['Variant'] * VARIANT_VISITORS,
    'Converted': ([1] * control_conversions + [0] * (CONTROL_VISITORS - control_conversions) +
                  [1] * variant_conversions + [0] * (VARIANT_VISITORS - variant_conversions)),
    'Revenue': np.concatenate([control_revenue, variant_revenue])
})

# Add realistic features
data['Device'] = np.random.choice(['Mobile', 'Desktop'], size=len(data), p=[0.6, 0.4])
data['User_Type'] = np.random.choice(['New', 'Returning'], size=len(data), p=[0.7, 0.3])
data['Time_On_Site'] = np.random.exponential(180, size=len(data))  # seconds

print(f"‚úÖ Generated data for {len(data):,} users")
print(f"   Control Group: {CONTROL_VISITORS:,} users")
print(f"   Variant Group: {VARIANT_VISITORS:,} users")
print(f"\nFirst few rows:")
print(data.head(10))

# ============================================================================
# SECTION 2: DESCRIPTIVE STATISTICS
# ============================================================================

print("\n" + "="*70)
print("üìà SECTION 2: Descriptive Statistics")
print("="*70 + "\n")

summary = data.groupby('Group').agg({
    'Converted': ['sum', 'mean', 'count'],
    'Revenue': ['sum', 'mean', 'median', 'std']
}).round(4)

print(summary)

control_df = data[data['Group'] == 'Control']
variant_df = data[data['Group'] == 'Variant']

control_conv_rate = control_df['Converted'].mean() * 100
variant_conv_rate = variant_df['Converted'].mean() * 100
lift = ((variant_conv_rate - control_conv_rate) / control_conv_rate) * 100

print(f"\nüìä Key Metrics:")
print(f"   Control Conversion Rate: {control_conv_rate:.2f}%")
print(f"   Variant Conversion Rate: {variant_conv_rate:.2f}%")
print(f"   Relative Lift: {lift:+.2f}%")
print(f"   Absolute Lift: {variant_conv_rate - control_conv_rate:+.2f} percentage points")

# ============================================================================
# SECTION 3: STATISTICAL TESTS
# ============================================================================

print("\n" + "="*70)
print("üî¨ SECTION 3: Statistical Hypothesis Testing")
print("="*70 + "\n")

# TEST 1: Z-Test for Proportions
print("TEST 1: Z-Test for Proportions (Conversion Rate)")
print("-" * 70)

n_control = len(control_df)
n_variant = len(variant_df)
x_control = control_df['Converted'].sum()
x_variant = variant_df['Converted'].sum()

p_control = x_control / n_control
p_variant = x_variant / n_variant
p_pooled = (x_control + x_variant) / (n_control + n_variant)

se = np.sqrt(p_pooled * (1 - p_pooled) * (1/n_control + 1/n_variant))
z_stat = (p_variant - p_control) / se
p_value_z = 2 * (1 - stats.norm.cdf(abs(z_stat)))

print(f"   Z-statistic: {z_stat:.4f}")
print(f"   P-value: {p_value_z:.4f}")
print(f"   Significant at Œ±=0.05? {'‚úÖ YES' if p_value_z < 0.05 else '‚ùå NO'}")
print(f"   Confidence: {(1-p_value_z)*100:.2f}%")

# TEST 2: Chi-Square Test
print("\n\nTEST 2: Chi-Square Test (Independence Test)")
print("-" * 70)

contingency_table = pd.crosstab(data['Group'], data['Converted'])
chi2, p_value_chi, dof, expected = chi2_contingency(contingency_table)

print(f"   Contingency Table:")
print(contingency_table)
print(f"\n   Chi-Square statistic: {chi2:.4f}")
print(f"   P-value: {p_value_chi:.4f}")
print(f"   Degrees of Freedom: {dof}")
print(f"   Significant at Œ±=0.05? {'‚úÖ YES' if p_value_chi < 0.05 else '‚ùå NO'}")

# TEST 3: T-Test for Revenue
print("\n\nTEST 3: Independent T-Test (Revenue per User)")
print("-" * 70)

t_stat, p_value_t = ttest_ind(variant_df['Revenue'], control_df['Revenue'])

print(f"   Control Mean Revenue: ${control_df['Revenue'].mean():.2f}")
print(f"   Variant Mean Revenue: ${variant_df['Revenue'].mean():.2f}")
print(f"   T-statistic: {t_stat:.4f}")
print(f"   P-value: {p_value_t:.4f}")
print(f"   Significant at Œ±=0.05? {'‚úÖ YES' if p_value_t < 0.05 else '‚ùå NO'}")

# TEST 4: Mann-Whitney U Test (Non-parametric)
print("\n\nTEST 4: Mann-Whitney U Test (Non-parametric Revenue Test)")
print("-" * 70)

u_stat, p_value_u = mannwhitneyu(variant_df['Revenue'], control_df['Revenue'], alternative='two-sided')

print(f"   U-statistic: {u_stat:.4f}")
print(f"   P-value: {p_value_u:.4f}")
print(f"   Significant at Œ±=0.05? {'‚úÖ YES' if p_value_u < 0.05 else '‚ùå NO'}")
print(f"   Note: Used when data is not normally distributed (e.g., revenue)")

# ============================================================================
# SECTION 4: CONFIDENCE INTERVALS
# ============================================================================

print("\n" + "="*70)
print("üìâ SECTION 4: Confidence Intervals")
print("="*70 + "\n")

# 95% Confidence Interval for Conversion Rate Difference
z_critical = 1.96  # 95% CI
se_diff = np.sqrt((p_control * (1 - p_control) / n_control) +
                  (p_variant * (1 - p_variant) / n_variant))
diff = p_variant - p_control
ci_lower = diff - z_critical * se_diff
ci_upper = diff + z_critical * se_diff

print(f"95% Confidence Interval for Conversion Rate Difference:")
print(f"   Point Estimate: {diff*100:.2f} percentage points")
print(f"   CI: [{ci_lower*100:.2f}%, {ci_upper*100:.2f}%]")
print(f"   Interpretation: We're 95% confident the true difference is in this range")

# ============================================================================
# SECTION 5: EFFECT SIZE & POWER ANALYSIS
# ============================================================================

print("\n" + "="*70)
print("üí™ SECTION 5: Effect Size & Statistical Power")
print("="*70 + "\n")

# Cohen's h (effect size for proportions)
cohens_h = 2 * (np.arcsin(np.sqrt(p_variant)) - np.arcsin(np.sqrt(p_control)))

print(f"Cohen's h (effect size): {cohens_h:.4f}")
print(f"Interpretation: ", end="")
if abs(cohens_h) < 0.2:
    print("Small effect")
elif abs(cohens_h) < 0.5:
    print("Medium effect")
else:
    print("Large effect")

# Minimum Detectable Effect (MDE)
alpha = 0.05
power = 0.80
z_alpha = stats.norm.ppf(1 - alpha/2)
z_beta = stats.norm.ppf(power)

mde = (z_alpha + z_beta) * np.sqrt(p_pooled * (1 - p_pooled) * (1/n_control + 1/n_variant))
print(f"\nMinimum Detectable Effect at 80% power: {mde*100:.2f} percentage points")

# ============================================================================
# SECTION 6: SEGMENTATION ANALYSIS
# ============================================================================

print("\n" + "="*70)
print("üéØ SECTION 6: Segmentation Analysis")
print("="*70 + "\n")

# Analysis by Device
print("Conversion Rate by Device:")
print("-" * 70)
segment_analysis = data.groupby(['Group', 'Device'])['Converted'].agg(['mean', 'count'])
segment_analysis['mean'] = segment_analysis['mean'] * 100
print(segment_analysis.round(2))

# Analysis by User Type
print("\n\nConversion Rate by User Type:")
print("-" * 70)
user_analysis = data.groupby(['Group', 'User_Type'])['Converted'].agg(['mean', 'count'])
user_analysis['mean'] = user_analysis['mean'] * 100
print(user_analysis.round(2))

# ============================================================================
# SECTION 7: BUSINESS IMPACT ANALYSIS
# ============================================================================

print("\n" + "="*70)
print("üí∞ SECTION 7: Business Impact Analysis")
print("="*70 + "\n")

# Revenue calculations
control_total_revenue = control_df['Revenue'].sum()
variant_total_revenue = variant_df['Revenue'].sum()
additional_revenue = variant_total_revenue - control_total_revenue

# Projections
monthly_visitors = 100000  # Assume 100K monthly visitors
annual_visitors = monthly_visitors * 12

# Per-user revenue improvement
control_rpu = control_total_revenue / n_control
variant_rpu = variant_total_revenue / n_variant
rpu_lift = variant_rpu - control_rpu

monthly_revenue_impact = rpu_lift * monthly_visitors
annual_revenue_impact = rpu_lift * annual_visitors

print(f"Current Test Results:")
print(f"   Control Total Revenue: ${control_total_revenue:,.2f}")
print(f"   Variant Total Revenue: ${variant_total_revenue:,.2f}")
print(f"   Additional Revenue: ${additional_revenue:,.2f} ({(additional_revenue/control_total_revenue)*100:.1f}% increase)")

print(f"\nüìà Projected Business Impact (if implemented):")
print(f"   Monthly Traffic Assumption: {monthly_visitors:,} visitors")
print(f"   Monthly Revenue Impact: ${monthly_revenue_impact:,.2f}")
print(f"   Annual Revenue Impact: ${annual_revenue_impact:,.2f}")
print(f"   Implementation Cost: $500 (one-time)")
print(f"   Payback Period: {(500 / monthly_revenue_impact):.1f} days")
print(f"   ROI: {((annual_revenue_impact - 500) / 500 * 100):,.0f}%")

# ============================================================================
# SECTION 8: FINAL RECOMMENDATION
# ============================================================================

print("\n" + "="*70)
print("üéØ SECTION 8: Final Recommendation")
print("="*70 + "\n")

# Decision Logic
tests_passed = sum([
    p_value_z < 0.05,
    p_value_chi < 0.05,
    p_value_t < 0.05,
    p_value_u < 0.05
])

print(f"Statistical Tests Summary:")
print(f"   ‚úÖ Tests passed (p < 0.05): {tests_passed}/4")
print(f"   üìä Relative lift: {lift:+.2f}%")
print(f"   üí∞ Annual revenue impact: ${annual_revenue_impact:,.2f}")

print(f"\n{'='*70}")
if tests_passed >= 3 and lift > 0:
    print("üéâ RECOMMENDATION: IMPLEMENT VARIANT IMMEDIATELY")
    print("="*70)
    print("\nReasoning:")
    print("   ‚Ä¢ Statistically significant across multiple tests")
    print("   ‚Ä¢ Positive revenue impact with high confidence")
    print("   ‚Ä¢ Low implementation cost, high ROI")
    print("   ‚Ä¢ No negative impacts on secondary metrics")
elif tests_passed >= 2 and lift > 0:
    print("‚úÖ RECOMMENDATION: IMPLEMENT VARIANT WITH MONITORING")
    print("="*70)
    print("\nReasoning:")
    print("   ‚Ä¢ Significant in most tests")
    print("   ‚Ä¢ Monitor post-launch for 2 weeks")
elif lift < 0 and tests_passed >= 2:
    print("‚ùå RECOMMENDATION: KEEP CONTROL VERSION")
    print("="*70)
    print("\nReasoning:")
    print("   ‚Ä¢ Variant performs worse than control")
    print("   ‚Ä¢ Would result in revenue loss")
else:
    print("‚è∏Ô∏è  RECOMMENDATION: CONTINUE TESTING")
    print("="*70)
    print("\nReasoning:")
    print("   ‚Ä¢ Not enough statistical evidence yet")
    print("   ‚Ä¢ Need larger sample size or longer test duration")

print("\n" + "="*70)
print("‚úÖ Analysis Complete!")
print("="*70)
print("\nNext Steps:")
print("   1. Share results with stakeholders")
print("   2. Prepare implementation plan")
print("   3. Set up post-launch monitoring dashboard")
print("   4. Document learnings for future tests")

üß™ ADVANCED A/B TESTING FRAMEWORK

üìä SECTION 1: Generating Experiment Data

‚úÖ Generated data for 20,000 users
   Control Group: 10,000 users
   Variant Group: 10,000 users

First few rows:
     Group  Converted     Revenue   Device  User_Type  Time_On_Site
0  Control          1  116.242860   Mobile        New    496.479458
1  Control          1   71.667656   Mobile        New    183.874520
2  Control          1   28.276950  Desktop        New    327.053087
3  Control          1   61.745362  Desktop  Returning    174.350311
4  Control          1   49.974007   Mobile        New     68.504840
5  Control          1    5.397874   Mobile  Returning    240.147761
6  Control          1   16.781057   Mobile  Returning     47.105168
7  Control          1   52.654141   Mobile  Returning    163.412976
8  Control          1  109.129422   Mobile        New     92.659919
9  Control          1   34.796305  Desktop        New     29.229028

üìà SECTION 2: Descriptive Statistics

        Convert