In [13]:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.multitest import multipletests

# 1. SETUP DATA (Link Raw yang sudah diperbaiki)
links = {
    "Test 1 - Menu": "https://raw.githubusercontent.com/irppanpilihanrambe/Project-DEC/main/data/raw/test1_menu.csv",
    "Test 2 - Novelty": "https://raw.githubusercontent.com/irppanpilihanrambe/Project-DEC/main/data/raw/test2_novelty_slider.csv",
    "Test 3 - Product": "https://raw.githubusercontent.com/irppanpilihanrambe/Project-DEC/main/data/raw/test3_product_sliders.csv",
    "Test 4 - Reviews": "https://raw.githubusercontent.com/irppanpilihanrambe/Project-DEC/main/data/raw/test4_reviews.csv",
    "Test 5 - Search": "https://raw.githubusercontent.com/irppanpilihanrambe/Project-DEC/main/data/raw/test5_search_engine.csv"
}

def perform_statistical_audit(df):
    # Bersihkan nama kolom & data varian
    df.columns = df.columns.str.strip().str.lower()
    df['variant'] = df['variant'].astype(str).str.strip().str.upper()
    
    # Cari kolom angka (revenue/amount)
    rev_col = [c for c in df.columns if any(x in c for x in ['rev', 'amount', 'total', 'sale'])]
    if not rev_col: return np.nan, "Kolom Tidak Ada", 0
    target_col = rev_col[0]

    # Ambil grup Control & Treatment
    unique_variants = df['variant'].unique()
    control_name = 'CONTROL' if 'CONTROL' in unique_variants else unique_variants[0]
    
    # PERBAIKAN DI SINI: Tanda kurung sudah diperbaiki
    control_data = pd.to_numeric(df[df['variant'] == control_name][target_col], errors='coerce').dropna()
    treatment_data = pd.to_numeric(df[df['variant'] != control_name][target_col], errors='coerce').dropna()
    
    if len(control_data) < 2 or len(treatment_data) < 2:
        return np.nan, "Data Sedikit", 0
    
    # STEP 1: Cek Skewness (Guide Halaman 24)
    skew_val = stats.skew(df[target_col].dropna())
    
    # STEP 2: Pilih Test (Halaman 23-25)
    if abs(skew_val) > 1.0:
        stat, p_val = stats.mannwhitneyu(control_data, treatment_data, alternative='two-sided')
        test_name = "Mann-Whitney U"
    else:
        stat, p_val = stats.ttest_ind(control_data, treatment_data, equal_var=False)
        test_name = "Welch's T-Test"
        
    return p_val, test_name, skew_val

# --- EKSEKUSI ---
p_values, test_names, applied_methods = [], [], []
print("üöÄ MEMULAI AUDIT STATISTIK (ALL 5 TESTS)...")

for name, url in links.items():
    try:
        # Membaca data dengan proteksi baris rusak
        df_raw = pd.read_csv(url, on_bad_lines='skip')
        p, method, skew = perform_statistical_audit(df_raw)
        
        if not np.isnan(p):
            p_values.append(p)
            test_names.append(name)
            applied_methods.append(method)
            print(f"‚úÖ {name}: Berhasil (Method: {method})")
        else:
            print(f"‚ö†Ô∏è {name}: Dilewati ({method})")
    except Exception as e:
        print(f"‚ùå {name}: Gagal -> {e}")

# STEP 3: Holm-Bonferroni Correction (Halaman 28-29)
if p_values:
    reject, p_corrected, _, _ = multipletests(p_values, alpha=0.05, method='holm')
    
    results_table = pd.DataFrame({
        'Experiment': test_names,
        'Methodology': applied_methods,
        'Raw P-Value': [f"{p:.4f}" for p in p_values],
        'Holm-Corrected': [f"{p:.4f}" for p in p_corrected],
        'Result': ["SIGNIFICANT ‚úÖ" if r else "Not Significant ‚ùå" for r in reject]
    })
    
    print("\n" + "="*75 + "\nFINAL STATISTICAL AUDIT REPORT (5 TESTS COMPLETE)\n" + "="*75)
    display(results_table)

üöÄ MEMULAI AUDIT STATISTIK (ALL 5 TESTS)...
‚úÖ Test 1 - Menu: Berhasil (Method: Mann-Whitney U)
‚úÖ Test 2 - Novelty: Berhasil (Method: Mann-Whitney U)
‚úÖ Test 3 - Product: Berhasil (Method: Mann-Whitney U)
‚ö†Ô∏è Test 4 - Reviews: Dilewati (Kolom Tidak Ada)
‚úÖ Test 5 - Search: Berhasil (Method: Mann-Whitney U)

FINAL STATISTICAL AUDIT REPORT (5 TESTS COMPLETE)


Unnamed: 0,Experiment,Methodology,Raw P-Value,Holm-Corrected,Result
0,Test 1 - Menu,Mann-Whitney U,0.0,0.0,SIGNIFICANT ‚úÖ
1,Test 2 - Novelty,Mann-Whitney U,0.0,0.0,SIGNIFICANT ‚úÖ
2,Test 3 - Product,Mann-Whitney U,0.0,0.0,SIGNIFICANT ‚úÖ
3,Test 5 - Search,Mann-Whitney U,0.3461,0.3461,Not Significant ‚ùå
