In [1]:
# ==========================================
# FINAL AB TEST FRAMEWORK (PATH SAFE VERSION)
# ==========================================

import os
import numpy as np
import pandas as pd
from scipy import stats

# ------------------------------------------
# AUTO-DETECT RAW FOLDER
# ------------------------------------------

def find_raw_path():
    current = os.getcwd()
    
    # If raw folder exists here
    if os.path.exists(os.path.join(current, "raw")):
        return os.path.join(current, "raw")
    
    # If raw folder exists one level up
    parent = os.path.dirname(current)
    if os.path.exists(os.path.join(parent, "raw")):
        return os.path.join(parent, "raw")
    
    raise FileNotFoundError("Cannot find 'raw' folder.")

RAW_PATH = find_raw_path()
print("Using RAW PATH:", RAW_PATH)


# ==========================================
# AB TEST FRAMEWORK
# ==========================================

class ABTestFramework:
    
    def __init__(self, alpha=0.05):
        self.alpha = alpha

    def proportion_test(self, g1, g2):
        n1, n2 = len(g1), len(g2)
        if n1 == 0 or n2 == 0:
            return 0, 1.0
        
        s1, s2 = g1.sum(), g2.sum()
        p1, p2 = s1/n1, s2/n2
        
        p_pool = (s1 + s2) / (n1 + n2)
        se = np.sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))
        
        if se == 0:
            return 0, 1.0
        
        z = (p2 - p1) / se
        pvalue = 2 * (1 - stats.norm.cdf(abs(z)))
        
        lift = ((p2 - p1) / p1 * 100) if p1 != 0 else 0
        return lift, pvalue

    def t_test(self, g1, g2):
        if len(g1) == 0 or len(g2) == 0:
            return 0, 1.0
        
        stat, pvalue = stats.ttest_ind(g2, g1, equal_var=False)
        mean1, mean2 = g1.mean(), g2.mean()
        
        lift = ((mean2 - mean1) / mean1 * 100) if mean1 != 0 else 0
        return lift, pvalue

    def run_test(self, df, variant_col, metric_col):
        variants = df[variant_col].value_counts().index.tolist()
        
        if len(variants) < 2:
            raise ValueError("Not enough variant groups.")
        
        baseline = variants[0]
        results = []
        
        for variant in variants[1:]:
            g1 = df[df[variant_col] == baseline][metric_col].dropna()
            g2 = df[df[variant_col] == variant][metric_col].dropna()
            
            unique_vals = set(df[metric_col].dropna().unique())
            
            if unique_vals.issubset({0,1}):
                lift, pvalue = self.proportion_test(g1, g2)
            else:
                lift, pvalue = self.t_test(g1, g2)
            
            results.append({
                "comparison": f"{variant} vs {baseline}",
                "lift_%": round(lift, 2),
                "pvalue": round(pvalue, 5),
                "significant": pvalue < self.alpha
            })
        
        return results


# ==========================================
# EXPERIMENT CONFIG
# ==========================================

EXPERIMENT_CONFIG = {
    "Menu Design": ("test1_menu.csv", "added_to_cart"),
    "Novelty Slider": ("test2_novelty_slider.csv", "products_added_from_novelties"),
    "Product Sliders": ("test3_product_sliders.csv", "add_to_cart_rate"),
    "Reviews Experiment": ("test4_reviews.csv", "converted"),
    "Search Engine": ("test5_search_engine.csv", "converted"),
}


# ==========================================
# RUN ALL
# ==========================================

framework = ABTestFramework()
results = []

for name, (file, metric) in EXPERIMENT_CONFIG.items():
    
    print("Running:", name)
    
    df = pd.read_csv(os.path.join(RAW_PATH, file))
    
    test_results = framework.run_test(
        df,
        variant_col="variant",
        metric_col=metric
    )
    
    for res in test_results:
        results.append({
            "experiment": name,
            "metric": metric,
            "comparison": res["comparison"],
            "lift_%": res["lift_%"],
            "pvalue": res["pvalue"],
            "significant": res["significant"]
        })

results_df = pd.DataFrame(results)
results_df


Using RAW PATH: /Users/irpanpilihanrambe/Downloads/DATA SET PROJECT DEC/raw
Running: Menu Design
Running: Novelty Slider
Running: Product Sliders
Running: Reviews Experiment
Running: Search Engine


Unnamed: 0,experiment,metric,comparison,lift_%,pvalue,significant
0,Menu Design,added_to_cart,B_dropdown_menu vs A_horizontal_menu,-10.34,0.0,True
1,Novelty Slider,products_added_from_novelties,B_personalized_novelties vs A_manual_novelties,283.33,1e-05,True
2,Product Sliders,add_to_cart_rate,B_similar_products_top vs A_selected_by_others...,0.0,1.0,False
3,Product Sliders,add_to_cart_rate,C_selected_by_others_top vs A_selected_by_othe...,-1.64,0.89666,False
4,Reviews Experiment,converted,B_featured_reviews vs A_no_featured_reviews,0.8,0.77639,False
5,Search Engine,converted,B_algolia_search vs A_hybris_search,4.93,0.37115,False
