In [1]:
import pandas as pd
import seaborn as sns
import scipy.stats as stats


In [2]:
df = pd.read_csv('~/Downloads/AB_Test_Results.csv')

In [3]:
df

Unnamed: 0,USER_ID,VARIANT_NAME,REVENUE
0,737,variant,0.0
1,2423,control,0.0
2,9411,control,0.0
3,7311,control,0.0
4,6174,variant,0.0
...,...,...,...
9995,1981,control,0.0
9996,502,variant,0.0
9997,9214,variant,0.0
9998,7741,control,0.0


In [4]:
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from scipy.stats import chi2_contingency, ttest_ind, mannwhitneyu, normaltest
from scipy import stats

print("="*80)
print("A/B TEST ANALYSIS REPORT")
print("="*80)

A/B TEST ANALYSIS REPORT


In [5]:
# 1. DATA OVERVIEW
print("\n1. DATA OVERVIEW")
print("-" * 80)
print(f"Total users: {len(df)}")
print(f"\nGroup distribution:")
print(df['VARIANT_NAME'].value_counts())
print(f"\nBasic statistics:")
print(df.groupby('VARIANT_NAME')['REVENUE'].describe())


1. DATA OVERVIEW
--------------------------------------------------------------------------------
Total users: 10000

Group distribution:
VARIANT_NAME
variant    5016
control    4984
Name: count, dtype: int64

Basic statistics:
               count      mean       std  min  25%  50%  75%     max
VARIANT_NAME                                                        
control       4984.0  0.129013  3.007524  0.0  0.0  0.0  0.0  196.01
variant       5016.0  0.070070  1.314802  0.0  0.0  0.0  0.0   58.63


In [6]:
# Separate control and variant groups
control = df[df['VARIANT_NAME'] == 'control']['REVENUE']
variant = df[df['VARIANT_NAME'] == 'variant']['REVENUE']

# Key metrics
control_mean = control.mean()
variant_mean = variant.mean()
control_std = control.std()
variant_std = variant.std()
control_conversion = (control > 0).sum() / len(control)
variant_conversion = (variant > 0).sum() / len(variant)

print("\n2. KEY METRICS")
print("-" * 80)
print(f"Control Group:")
print(f"  - Mean Revenue: ${control_mean:.4f}")
print(f"  - Std Dev: ${control_std:.4f}")
print(f"  - Conversion Rate: {control_conversion:.4%}")
print(f"  - Total Users: {len(control)}")
print(f"\nVariant Group:")
print(f"  - Mean Revenue: ${variant_mean:.4f}")
print(f"  - Std Dev: ${variant_std:.4f}")
print(f"  - Conversion Rate: {variant_conversion:.4%}")
print(f"  - Total Users: {len(variant)}")
print(f"\nDifferences:")
print(f"  - Revenue Lift: {((variant_mean - control_mean) / control_mean * 100):.2f}%")
print(f"  - Absolute Revenue Difference: ${variant_mean - control_mean:.4f}")
print(f"  - Conversion Rate Lift: {((variant_conversion - control_conversion) / control_conversion * 100):.2f}%")


2. KEY METRICS
--------------------------------------------------------------------------------
Control Group:
  - Mean Revenue: $0.1290
  - Std Dev: $3.0075
  - Conversion Rate: 1.6051%
  - Total Users: 4984

Variant Group:
  - Mean Revenue: $0.0701
  - Std Dev: $1.3148
  - Conversion Rate: 1.4354%
  - Total Users: 5016

Differences:
  - Revenue Lift: -45.69%
  - Absolute Revenue Difference: $-0.0589
  - Conversion Rate Lift: -10.57%


In [7]:
# 3. T-TEST (for continuous revenue data)
print("\n3. T-TEST ANALYSIS")
print("-" * 80)

# Independent samples t-test
t_stat, t_pvalue = ttest_ind(control, variant, equal_var=False)  # Welch's t-test

print(f"Welch's T-Test (unequal variance):")
print(f"  - T-statistic: {t_stat:.4f}")
print(f"  - P-value: {t_pvalue:.6f}")
print(f"  - Result: {'Statistically significant' if t_pvalue < 0.05 else 'NOT statistically significant'} (α=0.05)")

# Effect size (Cohen's d)
pooled_std = np.sqrt((control_std**2 + variant_std**2) / 2)
cohens_d = (variant_mean - control_mean) / pooled_std
print(f"  - Cohen's d (effect size): {cohens_d:.4f}")
print(f"  - Effect interpretation: ", end="")
if abs(cohens_d) < 0.2:
    print("Small")
elif abs(cohens_d) < 0.5:
    print("Medium")
else:
    print("Large")


3. T-TEST ANALYSIS
--------------------------------------------------------------------------------
Welch's T-Test (unequal variance):
  - T-statistic: 1.2684
  - P-value: 0.204697
  - Result: NOT statistically significant (α=0.05)
  - Cohen's d (effect size): -0.0254
  - Effect interpretation: Small


In [8]:
# 4. CHI-SQUARE TEST (for conversion rate)
print("\n4. CHI-SQUARE TEST (Conversion Rate)")
print("-" * 80)

# Create contingency table
control_converted = (control > 0).sum()
control_not_converted = (control == 0).sum()
variant_converted = (variant > 0).sum()
variant_not_converted = (variant == 0).sum()

contingency_table = np.array([
    [control_converted, control_not_converted],
    [variant_converted, variant_not_converted]
])

print("Contingency Table:")
print(pd.DataFrame(contingency_table, 
                   index=['Control', 'Variant'],
                   columns=['Converted', 'Not Converted']))

chi2_stat, chi2_pvalue, dof, expected = chi2_contingency(contingency_table)

print(f"\nChi-Square Test Results:")
print(f"  - Chi-square statistic: {chi2_stat:.4f}")
print(f"  - P-value: {chi2_pvalue:.6f}")
print(f"  - Degrees of freedom: {dof}")
print(f"  - Result: {'Statistically significant' if chi2_pvalue < 0.05 else 'NOT statistically significant'} (α=0.05)")

# Cramér's V (effect size for chi-square)
n = contingency_table.sum()
cramers_v = np.sqrt(chi2_stat / (n * (min(contingency_table.shape) - 1)))
print(f"  - Cramér's V (effect size): {cramers_v:.4f}")


4. CHI-SQUARE TEST (Conversion Rate)
--------------------------------------------------------------------------------
Contingency Table:
         Converted  Not Converted
Control         80           4904
Variant         72           4944

Chi-Square Test Results:
  - Chi-square statistic: 0.3744
  - P-value: 0.540605
  - Degrees of freedom: 1
  - Result: NOT statistically significant (α=0.05)
  - Cramér's V (effect size): 0.0061


In [9]:
# 5. MANN-WHITNEY U TEST (Non-parametric alternative to t-test)
print("\n5. MANN-WHITNEY U TEST (Non-parametric)")
print("-" * 80)

u_stat, u_pvalue = mannwhitneyu(control, variant, alternative='two-sided')

print(f"Mann-Whitney U Test:")
print(f"  - U-statistic: {u_stat:.4f}")
print(f"  - P-value: {u_pvalue:.6f}")
print(f"  - Result: {'Statistically significant' if u_pvalue < 0.05 else 'NOT statistically significant'} (α=0.05)")
print(f"  - Note: This test doesn't assume normal distribution")


5. MANN-WHITNEY U TEST (Non-parametric)
--------------------------------------------------------------------------------
Mann-Whitney U Test:
  - U-statistic: 12521564.0000
  - P-value: 0.478252
  - Result: NOT statistically significant (α=0.05)
  - Note: This test doesn't assume normal distribution


In [10]:
# 6. PERMUTATION TEST
print("\n6. PERMUTATION TEST (Randomization Test)")
print("-" * 80)

# Observed difference
observed_diff = variant_mean - control_mean

# Combine all data
all_data = np.concatenate([control, variant])
n_control = len(control)
n_permutations = 10000

# Run permutations
np.random.seed(42)
perm_diffs = []

for _ in range(n_permutations):
    # Shuffle and split
    shuffled = np.random.permutation(all_data)
    perm_control = shuffled[:n_control]
    perm_variant = shuffled[n_control:]
    perm_diffs.append(perm_variant.mean() - perm_control.mean())

perm_diffs = np.array(perm_diffs)

# Calculate p-value (two-tailed)
perm_pvalue = np.mean(np.abs(perm_diffs) >= np.abs(observed_diff))

print(f"Permutation Test ({n_permutations:,} iterations):")
print(f"  - Observed difference: ${observed_diff:.4f}")
print(f"  - P-value: {perm_pvalue:.6f}")
print(f"  - Result: {'Statistically significant' if perm_pvalue < 0.05 else 'NOT statistically significant'} (α=0.05)")
print(f"  - 95% CI of permuted differences: [${np.percentile(perm_diffs, 2.5):.4f}, ${np.percentile(perm_diffs, 97.5):.4f}]")


6. PERMUTATION TEST (Randomization Test)
--------------------------------------------------------------------------------
Permutation Test (10,000 iterations):
  - Observed difference: $-0.0589
  - P-value: 0.216700
  - Result: NOT statistically significant (α=0.05)
  - 95% CI of permuted differences: [$-0.0792, $0.0797]


In [11]:
# 7. BOOTSTRAP CONFIDENCE INTERVALS
print("\n7. BOOTSTRAP CONFIDENCE INTERVALS")
print("-" * 80)

n_bootstrap = 10000
np.random.seed(42)

control_means = []
variant_means = []
diff_means = []

for _ in range(n_bootstrap):
    control_sample = np.random.choice(control, size=len(control), replace=True)
    variant_sample = np.random.choice(variant, size=len(variant), replace=True)
    control_means.append(control_sample.mean())
    variant_means.append(variant_sample.mean())
    diff_means.append(variant_sample.mean() - control_sample.mean())

control_means = np.array(control_means)
variant_means = np.array(variant_means)
diff_means = np.array(diff_means)

print(f"Bootstrap Analysis ({n_bootstrap:,} iterations):")
print(f"\nControl Group Revenue:")
print(f"  - Mean: ${control_mean:.4f}")
print(f"  - 95% CI: [${np.percentile(control_means, 2.5):.4f}, ${np.percentile(control_means, 97.5):.4f}]")
print(f"\nVariant Group Revenue:")
print(f"  - Mean: ${variant_mean:.4f}")
print(f"  - 95% CI: [${np.percentile(variant_means, 2.5):.4f}, ${np.percentile(variant_means, 97.5):.4f}]")
print(f"\nDifference (Variant - Control):")
print(f"  - Mean Difference: ${observed_diff:.4f}")
print(f"  - 95% CI: [${np.percentile(diff_means, 2.5):.4f}, ${np.percentile(diff_means, 97.5):.4f}]")
print(f"  - CI includes zero: {'Yes - NOT significant' if np.percentile(diff_means, 2.5) < 0 < np.percentile(diff_means, 97.5) else 'No - Significant'}")


7. BOOTSTRAP CONFIDENCE INTERVALS
--------------------------------------------------------------------------------
Bootstrap Analysis (10,000 iterations):

Control Group Revenue:
  - Mean: $0.1290
  - 95% CI: [$0.0664, $0.2279]

Variant Group Revenue:
  - Mean: $0.0701
  - 95% CI: [$0.0388, $0.1095]

Difference (Variant - Control):
  - Mean Difference: $-0.0589
  - 95% CI: [$-0.1616, $0.0171]
  - CI includes zero: Yes - NOT significant


In [12]:
# 8. NORMALITY TESTS
print("\n8. NORMALITY TESTS")
print("-" * 80)

# D'Agostino-Pearson test
control_norm_stat, control_norm_p = normaltest(control)
variant_norm_stat, variant_norm_p = normaltest(variant)

print("D'Agostino-Pearson Normality Test:")
print(f"  Control Group:")
print(f"    - Statistic: {control_norm_stat:.4f}")
print(f"    - P-value: {control_norm_p:.6f}")
print(f"    - Distribution: {'Normal' if control_norm_p > 0.05 else 'NOT Normal'} (α=0.05)")
print(f"  Variant Group:")
print(f"    - Statistic: {variant_norm_stat:.4f}")
print(f"    - P-value: {variant_norm_p:.6f}")
print(f"    - Distribution: {'Normal' if variant_norm_p > 0.05 else 'NOT Normal'} (α=0.05)")

# Shapiro-Wilk test (use sample if data is large)
sample_size = min(5000, len(control))
control_sample = np.random.choice(control, size=sample_size, replace=False)
variant_sample = np.random.choice(variant, size=sample_size, replace=False)

shapiro_control_stat, shapiro_control_p = stats.shapiro(control_sample)
shapiro_variant_stat, shapiro_variant_p = stats.shapiro(variant_sample)

print(f"\nShapiro-Wilk Test (sample of {sample_size}):")
print(f"  Control Group:")
print(f"    - Statistic: {shapiro_control_stat:.4f}")
print(f"    - P-value: {shapiro_control_p:.6f}")
print(f"    - Distribution: {'Normal' if shapiro_control_p > 0.05 else 'NOT Normal'} (α=0.05)")
print(f"  Variant Group:")
print(f"    - Statistic: {shapiro_variant_stat:.4f}")
print(f"    - P-value: {shapiro_variant_p:.6f}")
print(f"    - Distribution: {'Normal' if shapiro_variant_p > 0.05 else 'NOT Normal'} (α=0.05)")


8. NORMALITY TESTS
--------------------------------------------------------------------------------
D'Agostino-Pearson Normality Test:
  Control Group:
    - Statistic: 16471.2270
    - P-value: 0.000000
    - Distribution: NOT Normal (α=0.05)
  Variant Group:
    - Statistic: 14124.9425
    - P-value: 0.000000
    - Distribution: NOT Normal (α=0.05)

Shapiro-Wilk Test (sample of 4984):
  Control Group:
    - Statistic: 0.0183
    - P-value: 0.000000
    - Distribution: NOT Normal (α=0.05)
  Variant Group:
    - Statistic: 0.0267
    - P-value: 0.000000
    - Distribution: NOT Normal (α=0.05)


In [13]:
# 9. SUMMARY AND CONCLUSIONS
print("\n" + "="*80)
print("9. STATISTICAL SUMMARY")
print("="*80)

# Collect all p-values
p_values = {
    'T-Test (Welch)': t_pvalue,
    'Chi-Square': chi2_pvalue,
    'Mann-Whitney U': u_pvalue,
    'Permutation Test': perm_pvalue
}

print("\nAll P-Values Summary:")
for test_name, p_val in p_values.items():
    print(f"  - {test_name:20s}: {p_val:.6f} {'✓ Significant' if p_val < 0.05 else '✗ Not Significant'}")

print(f"\nEffect Sizes:")
print(f"  - Cohen's d: {cohens_d:.4f} ({'Small' if abs(cohens_d) < 0.2 else 'Medium' if abs(cohens_d) < 0.5 else 'Large'})")
print(f"  - Cramér's V: {cramers_v:.4f}")

print(f"\n" + "="*80)
print("FINAL CONCLUSIONS")
print("="*80)

significant_tests = sum(1 for p in p_values.values() if p < 0.05)
print(f"\n{significant_tests} out of {len(p_values)} statistical tests show significance at α=0.05")

if significant_tests >= 3:
    print("\n✓ RECOMMENDATION: The variant shows STATISTICALLY SIGNIFICANT difference from control.")
    print(f"  - Revenue change: ${variant_mean - control_mean:.4f} ({((variant_mean - control_mean) / control_mean * 100):.2f}%)")
    print(f"  - Conversion rate change: {(variant_conversion - control_conversion):.4f} ({((variant_conversion - control_conversion) / control_conversion * 100):.2f}%)")
    if variant_mean > control_mean:
        print("  - Direction: POSITIVE (Variant performs BETTER)")
    else:
        print("  - Direction: NEGATIVE (Variant performs WORSE)")
elif significant_tests > 0:
    print("\n⚠ RECOMMENDATION: Results are MIXED. Some tests show significance, others don't.")
    print("  - Consider running the test longer or with more users.")
    print("  - Check for data quality issues or violations of test assumptions.")
else:
    print("\n✗ RECOMMENDATION: NO statistically significant difference detected.")
    print("  - The variant does not show meaningful improvement over control.")
    print("  - Consider testing a different variant or hypothesis.")

print("\n" + "="*80)


9. STATISTICAL SUMMARY

All P-Values Summary:
  - T-Test (Welch)      : 0.204697 ✗ Not Significant
  - Chi-Square          : 0.540605 ✗ Not Significant
  - Mann-Whitney U      : 0.478252 ✗ Not Significant
  - Permutation Test    : 0.216700 ✗ Not Significant

Effect Sizes:
  - Cohen's d: -0.0254 (Small)
  - Cramér's V: 0.0061

FINAL CONCLUSIONS

0 out of 4 statistical tests show significance at α=0.05

✗ RECOMMENDATION: NO statistically significant difference detected.
  - The variant does not show meaningful improvement over control.
  - Consider testing a different variant or hypothesis.



In [14]:
# VISUALIZATIONS
print("\nGenerating visualizations...")

# Chart 1: Revenue Distribution Comparison
fig1 = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Control Group Revenue Distribution', 'Variant Group Revenue Distribution')
)

fig1.add_trace(
    go.Histogram(x=control, name='Control', nbinsx=50, marker_color='blue', opacity=0.7),
    row=1, col=1
)

fig1.add_trace(
    go.Histogram(x=variant, name='Variant', nbinsx=50, marker_color='red', opacity=0.7),
    row=1, col=2
)

fig1.update_layout(
    title_text="Revenue Distribution by Group",
    showlegend=True,
    height=1000,
    width=1600
)

fig1.update_xaxes(title_text="Revenue ($)", row=1, col=1)
fig1.update_xaxes(title_text="Revenue ($)", row=1, col=2)
fig1.update_yaxes(title_text="Frequency", row=1, col=1)
fig1.update_yaxes(title_text="Frequency", row=1, col=2)

fig1.show()


Generating visualizations...


In [15]:
# Chart 2: Box Plot Comparison
fig2 = go.Figure()

fig2.add_trace(go.Box(
    y=control,
    name='Control',
    marker_color='blue',
    boxmean='sd'
))

fig2.add_trace(go.Box(
    y=variant,
    name='Variant',
    marker_color='red',
    boxmean='sd'
))

fig2.update_layout(
    title='Revenue Distribution Box Plot Comparison',
    yaxis_title='Revenue ($)',
    showlegend=True,
    height=1000,
    width=1700
)

fig2.show()

In [16]:
# Chart 3: Mean Revenue Comparison with Error Bars
fig3 = go.Figure()

fig3.add_trace(go.Bar(
    x=['Control', 'Variant'],
    y=[control_mean, variant_mean],
    error_y=dict(
        type='data',
        array=[control_std / np.sqrt(len(control)), variant_std / np.sqrt(len(variant))],
        visible=True
    ),
    marker_color=['blue', 'red'],
    text=[f'${control_mean:.4f}', f'${variant_mean:.4f}'],
    textposition='outside'
))

fig3.update_layout(
    title='Mean Revenue Comparison (with Standard Error)',
    yaxis_title='Mean Revenue ($)',
    xaxis_title='Group',
    height=1000,
    width=1600
)

fig3.show()

In [17]:
# Chart 4: Conversion Rate Comparison
fig4 = go.Figure()

fig4.add_trace(go.Bar(
    x=['Control', 'Variant'],
    y=[control_conversion * 100, variant_conversion * 100],
    marker_color=['blue', 'red'],
    text=[f'{control_conversion:.2%}', f'{variant_conversion:.2%}'],
    textposition='outside'
))

fig4.update_layout(
    title='Conversion Rate Comparison',
    yaxis_title='Conversion Rate (%)',
    xaxis_title='Group',
    height=1000,
    width=1600
)

fig4.show()

In [18]:
# Chart 5: Bootstrap Distribution of Differences
fig5 = go.Figure()

fig5.add_trace(go.Histogram(
    x=diff_means,
    nbinsx=50,
    name='Bootstrap Differences',
    marker_color='purple',
    opacity=0.7
))

# Add vertical lines for confidence intervals and observed difference
fig5.add_vline(x=observed_diff, line_dash="dash", line_color="red", 
               annotation_text=f"Observed: ${observed_diff:.4f}", annotation_position="top")
fig5.add_vline(x=np.percentile(diff_means, 2.5), line_dash="dot", line_color="green",
               annotation_text=f"2.5%: ${np.percentile(diff_means, 2.5):.4f}", annotation_position="bottom left")
fig5.add_vline(x=np.percentile(diff_means, 97.5), line_dash="dot", line_color="green",
               annotation_text=f"97.5%: ${np.percentile(diff_means, 97.5):.4f}", annotation_position="bottom right")
fig5.add_vline(x=0, line_dash="solid", line_color="black", line_width=2,
               annotation_text="No Difference", annotation_position="top")

fig5.update_layout(
    title='Bootstrap Distribution of Mean Revenue Difference (10,000 samples)',
    xaxis_title='Difference in Mean Revenue (Variant - Control) ($)',
    yaxis_title='Frequency',
    height=1000,
    width=1600,
    showlegend=False
)

fig5.show()

In [19]:
# Chart 6: Permutation Test Distribution
fig6 = go.Figure()

fig6.add_trace(go.Histogram(
    x=perm_diffs,
    nbinsx=50,
    name='Permuted Differences',
    marker_color='orange',
    opacity=0.7
))

# Add vertical lines
fig6.add_vline(x=observed_diff, line_dash="dash", line_color="red", line_width=3,
               annotation_text=f"Observed: ${observed_diff:.4f}", annotation_position="top")
fig6.add_vline(x=-observed_diff, line_dash="dash", line_color="red", line_width=3,
               annotation_text=f"${-observed_diff:.4f}", annotation_position="top")
fig6.add_vline(x=0, line_dash="solid", line_color="black", line_width=2)

fig6.update_layout(
    title=f'Permutation Test Distribution (10,000 permutations)<br>P-value: {perm_pvalue:.6f}',
    xaxis_title='Difference in Mean Revenue ($)',
    yaxis_title='Frequency',
    height=1000,
    width=1600,
    showlegend=False
)

fig6.show()

In [20]:
# Chart 7: P-Values Comparison
fig7 = go.Figure()

test_names = list(p_values.keys())
p_vals = list(p_values.values())
colors = ['green' if p < 0.05 else 'red' for p in p_vals]

fig7.add_trace(go.Bar(
    x=test_names,
    y=p_vals,
    marker_color=colors,
    text=[f'{p:.6f}' for p in p_vals],
    textposition='outside'
))

# Add significance threshold line
fig7.add_hline(y=0.05, line_dash="dash", line_color="black", line_width=2,
               annotation_text="α = 0.05 (significance threshold)", annotation_position="right")

fig7.update_layout(
    title='P-Values from All Statistical Tests',
    yaxis_title='P-Value',
    xaxis_title='Statistical Test',
    height=1100,
    width=1600,
    showlegend=False
)

fig7.show()

In [21]:
# Chart 8: Cumulative Distribution Function (CDF) Comparison
fig8 = go.Figure()

# Sort data for CDF
control_sorted = np.sort(control)
variant_sorted = np.sort(variant)
control_cdf = np.arange(1, len(control_sorted) + 1) / len(control_sorted)
variant_cdf = np.arange(1, len(variant_sorted) + 1) / len(variant_sorted)

fig8.add_trace(go.Scatter(
    x=control_sorted,
    y=control_cdf,
    mode='lines',
    name='Control',
    line=dict(color='blue', width=2)
))

fig8.add_trace(go.Scatter(
    x=variant_sorted,
    y=variant_cdf,
    mode='lines',
    name='Variant',
    line=dict(color='red', width=2)
))

fig8.update_layout(
    title='Cumulative Distribution Function (CDF) Comparison',
    xaxis_title='Revenue ($)',
    yaxis_title='Cumulative Probability',
    height=1000,
    width=1600,
    showlegend=True
)

fig8.show()

In [22]:
# Chart 9: Violin Plot - Distribution Shape Comparison
fig9 = go.Figure()

fig9.add_trace(go.Violin(
    y=control,
    name='Control',
    box_visible=True,
    meanline_visible=True,
    fillcolor='blue',
    opacity=0.6,
    x0='Control'
))

fig9.add_trace(go.Violin(
    y=variant,
    name='Variant',
    box_visible=True,
    meanline_visible=True,
    fillcolor='red',
    opacity=0.6,
    x0='Variant'
))

fig9.update_layout(
    title='Violin Plot - Revenue Distribution Shape Comparison',
    yaxis_title='Revenue ($)',
    xaxis_title='Group',
    height=1000,
    width=1600,
    showlegend=True
)

fig9.show()

In [23]:
# Chart 10: Statistical Summary Dashboard
fig10 = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Mean Revenue with 95% CI', 'Conversion Rates', 
                    'Effect Sizes', 'Statistical Significance'),
    specs=[[{'type': 'bar'}, {'type': 'bar'}],
           [{'type': 'bar'}, {'type': 'bar'}]]
)

# Subplot 1: Mean Revenue with CI
fig10.add_trace(
    go.Bar(x=['Control', 'Variant'], 
           y=[control_mean, variant_mean],
           error_y=dict(
               type='data',
               array=[
                   np.percentile(control_means, 97.5) - control_mean,
                   np.percentile(variant_means, 97.5) - variant_mean
               ],
               arrayminus=[
                   control_mean - np.percentile(control_means, 2.5),
                   variant_mean - np.percentile(variant_means, 2.5)
               ]
           ),
           marker_color=['blue', 'red'],
           showlegend=False),
    row=1, col=1
)

# Subplot 2: Conversion Rates
fig10.add_trace(
    go.Bar(x=['Control', 'Variant'], 
           y=[control_conversion * 100, variant_conversion * 100],
           marker_color=['blue', 'red'],
           showlegend=False),
    row=1, col=2
)

# Subplot 3: Effect Sizes
fig10.add_trace(
    go.Bar(x=["Cohen's d", "Cramér's V"], 
           y=[abs(cohens_d), cramers_v],
           marker_color=['purple', 'orange'],
           showlegend=False),
    row=2, col=1
)

# Subplot 4: P-values
fig10.add_trace(
    go.Bar(x=test_names, 
           y=p_vals,
           marker_color=colors,
           showlegend=False),
    row=2, col=2
)

# Add significance line to p-value chart
fig10.add_hline(y=0.05, line_dash="dash", line_color="black", row=2, col=2)

# Update axes
fig10.update_yaxes(title_text="Revenue ($)", row=1, col=1)
fig10.update_yaxes(title_text="Conversion Rate (%)", row=1, col=2)
fig10.update_yaxes(title_text="Effect Size", row=2, col=1)
fig10.update_yaxes(title_text="P-Value", row=2, col=2)

fig10.update_layout(
    title_text="A/B Test Statistical Summary Dashboard",
    height=1000,
    width=1600,
    showlegend=False
)

fig10.show()


In [24]:
print("\n✅ Analysis complete! All statistical tests and visualizations have been generated.")


✅ Analysis complete! All statistical tests and visualizations have been generated.
