In [39]:
import numpy as np
import pandas as pd
from scipy import stats
import os
import warnings

# Menghilangkan warning agar output bersih di terminal
warnings.filterwarnings('ignore')

class ExperimentValidator:
    """Framework Validasi Eksperimen Lengkap untuk A/B Testing."""
    
    def __init__(self, srm_threshold=0.001, balance_threshold=0.1, temporal_threshold=0.2):
        self.srm_threshold = srm_threshold
        self.balance_threshold = balance_threshold
        self.temporal_threshold = temporal_threshold
    
    def sample_ratio_mismatch_test(self, df, variant_col):
        """Mendeteksi bias alokasi user (SRM)."""
        observed = df[variant_col].value_counts().sort_index()
        n_variants = len(observed)
        expected = [len(df) / n_variants] * n_variants
        
        chi2_stat, pvalue = stats.chisquare(f_obs=observed, f_exp=expected)
        has_srm = pvalue < self.srm_threshold
        
        return {
            'pvalue': pvalue,
            'has_srm': has_srm,
            'message': f"{'❌ SRM FAILED' if has_srm else '✅ SRM PASSED'} (p={pvalue:.4f})"
        }
    
    def covariate_balance_check(self, df, variant_col, covariates):
        """Mengecek keseimbangan karakteristik user (SMD)."""
        variants = df[variant_col].unique()
        if len(variants) < 2: return {'max_smd': 0, 'message': 'Need 2+ variants'}
        
        smd_list = []
        for cov in covariates:
            if cov not in df.columns or df[cov].dtype == 'object': continue
            
            group_a = df[df[variant_col] == variants[0]][cov]
            group_b = df[df[variant_col] == variants[1]][cov]
            
            diff = abs(group_a.mean() - group_b.mean())
            pooled_std = np.sqrt((group_a.var() + group_b.var()) / 2)
            smd = diff / pooled_std if pooled_std > 0 else 0
            smd_list.append(smd)
            
        max_smd = max(smd_list) if smd_list else 0
        status = "✅ GOOD" if max_smd < 0.1 else "⚠️ WARN"
        return {
            'max_smd': max_smd,
            'message': f"{status} Balance (Max SMD={max_smd:.3f})"
        }
    
    def temporal_stability_check(self, df, variant_col, date_col):
        """Memastikan kestabilan alokasi harian (CV)."""
        df = df.copy()
        df[date_col] = pd.to_datetime(df[date_col])
        df['date'] = df[date_col].dt.date
        
        daily = df.groupby(['date', variant_col]).size().unstack(fill_value=0)
        cv_list = [counts.std() / counts.mean() for _, counts in daily.items() if counts.mean() > 0]
        
        max_cv = max(cv_list) if cv_list else 0
        is_stable = max_cv < self.temporal_threshold
        return {
            'max_cv': max_cv,
            'is_stable': is_stable,
            'message': f"{'✅ STABLE' if is_stable else '⚠️ UNSTABLE'} (CV={max_cv:.3f})"
        }

def run_all_validations():
    # JALUR BARU KE FOLDER RAW
    base_path = '/Users/irpanpilihanrambe/Downloads/DATA SET PROJECT DEC/raw'
    
    validator = ExperimentValidator()
    tests = [
        ('Test 1: Menu Design', 'test1_menu.csv'),
        ('Test 2: Novelty Slider', 'test2_novelty_slider.csv'),
        ('Test 3: Product Sliders', 'test3_product_sliders.csv'),
        ('Test 4: Customer Reviews', 'test4_reviews.csv'),
        ('Test 5: Search Engine', 'test5_search_engine.csv')
    ]
    
    results = []
    print("\n" + "="*80)
    print("EXPERIMENTAL VALIDATION SUITE - PROJECT DEC")
    print("="*80)

    for test_name, file_name in tests:
        full_path = os.path.join(base_path, file_name)
        
        if not os.path.exists(full_path):
            print(f"❌ File Not Found: {file_name}")
            continue
            
        df = pd.read_csv(full_path)
        
        # Eksekusi Validasi
        srm = validator.sample_ratio_mismatch_test(df, 'variant')
        # Ambil kolom numerik untuk cek balance (selain ID dan timestamp)
        covs = [c for c in df.select_dtypes(include=[np.number]).columns if c not in ['user_id', 'session_id']]
        balance = validator.covariate_balance_check(df, 'variant', covs)
        temporal = validator.temporal_stability_check(df, 'variant', 'timestamp')
        
        valid_status = "YES" if (not srm['has_srm'] and balance['max_smd'] < 0.2) else "CHECK"
        
        results.append({
            'Test Name': test_name,
            'Users': f"{len(df):,}",
            'SRM': "PASS" if not srm['has_srm'] else "FAIL",
            'Balance': "Good" if balance['max_smd'] < 0.1 else "Warn",
            'Stability': "Stable" if temporal['is_stable'] else "Unstable",
            'Valid': valid_status
        })
        
        print(f"\nAnalyzing {test_name}...")
        print(f"  └─ {srm['message']}")
        print(f"  └─ {balance['message']}")
        print(f"  └─ {temporal['message']}")

    # Cetak Tabel Ringkasan
    print("\n" + "="*80)
    print("SUMMARY REPORT")
    print("="*80)
    summary_df = pd.DataFrame(results)
    if not summary_df.empty:
        print(summary_df.to_string(index=False))
    else:
        print("No data processed. Check your file path.")
    print("="*80 + "\n")

if __name__ == "__main__":
    run_all_validations()


EXPERIMENTAL VALIDATION SUITE - PROJECT DEC

Analyzing Test 1: Menu Design...
  └─ ✅ SRM PASSED (p=1.0000)
  └─ ⚠️ WARN Balance (Max SMD=0.356)
  └─ ✅ STABLE (CV=0.057)

Analyzing Test 2: Novelty Slider...
  └─ ✅ SRM PASSED (p=1.0000)
  └─ ✅ GOOD Balance (Max SMD=0.097)
  └─ ✅ STABLE (CV=0.038)

Analyzing Test 3: Product Sliders...
  └─ ✅ SRM PASSED (p=1.0000)
  └─ ⚠️ WARN Balance (Max SMD=0.325)
  └─ ✅ STABLE (CV=0.049)

Analyzing Test 4: Customer Reviews...
  └─ ✅ SRM PASSED (p=1.0000)
  └─ ✅ GOOD Balance (Max SMD=0.012)
  └─ ✅ STABLE (CV=0.047)

Analyzing Test 5: Search Engine...
  └─ ✅ SRM PASSED (p=1.0000)
  └─ ✅ GOOD Balance (Max SMD=0.046)
  └─ ✅ STABLE (CV=0.026)

SUMMARY REPORT
               Test Name  Users  SRM Balance Stability Valid
     Test 1: Menu Design  7,000 PASS    Warn    Stable CHECK
  Test 2: Novelty Slider 16,000 PASS    Good    Stable   YES
 Test 3: Product Sliders 18,000 PASS    Warn    Stable CHECK
Test 4: Customer Reviews 42,000 PASS    Good    Stable   YE