In [1]:
import pandas as pd
import os

print(os.getcwd())

# read the csv
df = pd.read_csv("synthetic_search_ai_overviews_24w.csv")
df.head(3)

c:\Users\anind\Documents\python-projects\ml-dl-personal-projects\.venv


Unnamed: 0,date,week_period,category,query,long_tail,ai_overview,actions_taken,ads_viewed,ads_clicked,cpc_usd,google_revenue_usd,time_spent_search_sec
0,2025-04-28,pre,Entertainment,upcoming releases thriller 2025,False,False,refine_query,1,0,0.52,0.0,14.2
1,2025-04-28,pre,Travel,public transport pass for Paris,False,False,visit_publisher,0,0,0.65,0.0,12.0
2,2025-04-28,pre,Tech,React tutorial step by step,True,False,visit_ads,3,0,0.65,0.0,35.8


In [2]:
# exploratory data analysis

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120000 entries, 0 to 119999
Data columns (total 12 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   date                   120000 non-null  object 
 1   week_period            120000 non-null  object 
 2   category               120000 non-null  object 
 3   query                  120000 non-null  object 
 4   long_tail              120000 non-null  bool   
 5   ai_overview            120000 non-null  bool   
 6   actions_taken          120000 non-null  object 
 7   ads_viewed             120000 non-null  int64  
 8   ads_clicked            120000 non-null  int64  
 9   cpc_usd                120000 non-null  float64
 10  google_revenue_usd     120000 non-null  float64
 11  time_spent_search_sec  120000 non-null  float64
dtypes: bool(2), float64(3), int64(2), object(5)
memory usage: 9.4+ MB


In [3]:

# convert data into datetime
df["date"] = pd.to_datetime(df["date"])

# Recreate helper fields if needed
for col, func in [
    ("post", lambda d: (d["week_period"] == "post").astype(int)),
    ("treated_ai", lambda d: d["ai_overview"].astype(int)),
    ("week", lambda d: d["date"].dt.to_period("W").astype(str)),
    ("query_len_words", lambda d: d["query"].str.split().str.len()),
]:
    if col not in df.columns:
        df[col] = func(df)


if "query_len_bucket" not in df.columns:
    def length_bucket(n):
        if n <= 3: return "short(≤3)"
        elif n <= 6: return "medium(4–6)"
        else: return "long(≥7)"
    df["query_len_bucket"] = df["query_len_words"].apply(length_bucket)

df.head(3)

Unnamed: 0,date,week_period,category,query,long_tail,ai_overview,actions_taken,ads_viewed,ads_clicked,cpc_usd,google_revenue_usd,time_spent_search_sec,post,treated_ai,week,query_len_words,query_len_bucket
0,2025-04-28,pre,Entertainment,upcoming releases thriller 2025,False,False,refine_query,1,0,0.52,0.0,14.2,0,0,2025-04-28/2025-05-04,4,medium(4–6)
1,2025-04-28,pre,Travel,public transport pass for Paris,False,False,visit_publisher,0,0,0.65,0.0,12.0,0,0,2025-04-28/2025-05-04,5,medium(4–6)
2,2025-04-28,pre,Tech,React tutorial step by step,True,False,visit_ads,3,0,0.65,0.0,35.8,0,0,2025-04-28/2025-05-04,5,medium(4–6)


In [8]:
df["time_spent_search_sec"].describe()

count    120000.000000
mean         23.624751
std          14.989685
min           1.800000
25%          13.500000
50%          19.900000
75%          29.500000
max         235.900000
Name: time_spent_search_sec, dtype: float64

In [9]:
# test for modeling the search time using Weibull, Gamme and Log-Normal distributions
# 1) Fits Weibull, Lognormal, and Gamma to time_spent_search_sec
# 2) Reports KS goodness-of-fit, AIC, and BIC for model comparison
# 3) Runs the same tests group-wise for ai_overview (both overall and post-period only)

from scipy import stats
import numpy as np

# Keep strictly positive times (KS/AIC/BIC assume continuous positive support)
x_all = df["time_spent_search_sec"].astype(float)
x_all = x_all[x_all > 0].values


# ========= Core helpers =========
def fit_and_scores(x, dist_name):
    """
    Fit a distribution to sample x and compute KS statistic+p-value, log-likelihood, AIC, BIC.
    Uses SciPy continuous dists:
      - 'weibull_min'  (Weibull, with shape c)
      - 'lognorm'      (Lognormal, with shape=sigma)
      - 'gamma'        (Gamma, with shape=k)
    """
    if dist_name == "weibull":
        dist = stats.weibull_min
    elif dist_name == "lognorm":
        dist = stats.lognorm
    elif dist_name == "gamma":
        dist = stats.gamma
    else:
        raise ValueError("dist_name must be 'weibull', 'lognorm', or 'gamma'")

    # Fit: allow location and scale to float
    params = dist.fit(x)  # returns tuple (shape(s), loc, scale)

    # KS test against fitted CDF
    # kstest expects a callable CDF with the fitted params
    D, p = stats.kstest(x, lambda v: dist.cdf(v, *params))

    # Log-likelihood
    ll = np.sum(dist.logpdf(x, *params))
    n = len(x)
    k = len(params)  # number of free params
    aic = 2 * k - 2 * ll
    bic = k * np.log(n) - 2 * ll

    return {
        "distribution": dist_name,
        "n": n,
        "params": params,
        "KS_stat": D,
        "KS_pvalue": p,
        "loglik": ll,
        "AIC": aic,
        "BIC": bic
    }



In [12]:
def compare_distributions(x, label="overall"):
    x = np.asarray(x, dtype=float)
    x = x[np.isfinite(x) & (x > 0)]
    results = []
    for d in ["weibull", "lognorm", "gamma"]:
        results.append(fit_and_scores(x, d))
    out = pd.DataFrame(results).sort_values(["AIC", "BIC"]).reset_index(drop=True)
    out.insert(0, "sample", label)
    out.insert(1, "rank", np.arange(1, len(out) + 1))
    alpha = 0.05
    out["KS_fit_comment"] = out["KS_pvalue"].apply(
        lambda p: f"Fail to reject KS null (good fit at alpha={alpha})" if p >= alpha else ""
    )
    return out


In [13]:
# ========= 1) Overall GOF: which dist fits best? =========
overall_gof = compare_distributions(x_all, label="ALL (pre+post)")
overall_gof

Unnamed: 0,sample,rank,distribution,n,params,KS_stat,KS_pvalue,loglik,AIC,BIC,KS_fit_comment
0,ALL (pre+post),1,lognorm,120000,"(0.5855909121046509, 0.12619490015501816, 19.7...",0.003243,0.1599013,-464333.150949,928672.301898,928701.387639,Fail to reject KS null (good fit at alpha=0.05)
1,ALL (pre+post),2,gamma,120000,"(2.5806180583836085, 1.797753856980107, 8.4580...",0.035927,5.233693e-135,-466314.941644,932635.883288,932664.969029,
2,ALL (pre+post),3,weibull,120000,"(1.592691659647254, 1.7996844045875882, 24.502...",0.052818,2.15659e-291,-470891.7268,941789.4536,941818.539341,


In [14]:
# ========= 2) Group-wise GOF: AI vs non-AI =========
# (a) Across the entire dataset (non-AI includes pre + post non-AI)
x_ai_all = df.loc[df["ai_overview"]==True, "time_spent_search_sec"].values
x_non_ai_all = df.loc[df["ai_overview"]==False, "time_spent_search_sec"].values
gof_ai_all = compare_distributions(x_ai_all, label="AI (all rows)")
gof_non_ai_all = compare_distributions(x_non_ai_all, label="Non-AI (all rows)")

# (b) Post-period only (to avoid time confounding)
post = df[df["post"]==1]
x_ai_post = post.loc[post["ai_overview"]==True, "time_spent_search_sec"].values
x_non_ai_post = post.loc[post["ai_overview"]==False, "time_spent_search_sec"].values
gof_ai_post = compare_distributions(x_ai_post, label="AI (post only)")
gof_non_ai_post = compare_distributions(x_non_ai_post, label="Non-AI (post only)")

# ========= 3) Optional: Direct distributional difference test between AI vs Non-AI =========
# KS two-sample (does not assume any parametric family)
ks_all = stats.ks_2samp(x_ai_all, x_non_ai_all, alternative="two-sided", method="auto")
ks_post = stats.ks_2samp(x_ai_post, x_non_ai_post, alternative="two-sided", method="auto")

# ========= Display / print summaries =========
def pretty(df_):
    cols = ["sample","distribution","n","KS_stat","KS_pvalue","AIC","BIC","params"]
    return df_[cols].reset_index(drop=True)

print("\n=== OVERALL (ALL ROWS) — Best-fitting distribution by AIC/BIC ===")
print(pretty(overall_gof).to_string(index=False))

print("\n=== GROUP-WISE (ALL ROWS) ===")
print(pretty(gof_ai_all).to_string(index=False))
print(pretty(gof_non_ai_all).to_string(index=False))
print(f"\nTwo-sample KS (AI vs Non-AI, ALL ROWS): statistic={ks_all.statistic:.4f}, p={ks_all.pvalue:.4g}")

print("\n=== GROUP-WISE (POST-PERIOD ONLY) ===")
print(pretty(gof_ai_post).to_string(index=False))
print(pretty(gof_non_ai_post).to_string(index=False))
print(f"\nTwo-sample KS (AI vs Non-AI, POST only): statistic={ks_post.statistic:.4f}, p={ks_post.pvalue:.4g}")




=== OVERALL (ALL ROWS) — Best-fitting distribution by AIC/BIC ===
        sample distribution      n  KS_stat     KS_pvalue           AIC           BIC                                                        params
ALL (pre+post)      lognorm 120000 0.003243  1.599013e-01 928672.301898 928701.387639 (0.5855909121046509, 0.12619490015501816, 19.799134542508654)
ALL (pre+post)        gamma 120000 0.035927 5.233693e-135 932635.883288 932664.969029    (2.5806180583836085, 1.797753856980107, 8.458047601826436)
ALL (pre+post)      weibull 120000 0.052818 2.156590e-291 941789.453600 941818.539341   (1.592691659647254, 1.7996844045875882, 24.502105273262146)

=== GROUP-WISE (ALL ROWS) ===
       sample distribution     n  KS_stat    KS_pvalue           AIC           BIC                                                      params
AI (all rows)      lognorm 18149 0.006501 4.251894e-01 130547.912853 130571.331965 (0.5870051472090899, 0.4513529957125645, 15.03193972191415)
AI (all rows)        gam

In [15]:
# ========= (Optional) Quick pick of “winner” per table =========
def winner(df_):
    # Small helper to pick lowest AIC (ties broken by BIC)
    w = df_.sort_values(["AIC","BIC"]).iloc[0]
    return f"{w['sample']}: {w['distribution']} (AIC={w['AIC']:.1f}, BIC={w['BIC']:.1f})"

print("\n=== Quick winners by AIC ===")
for tbl in [overall_gof, gof_ai_all, gof_non_ai_all, gof_ai_post, gof_non_ai_post]:
    print(" -", winner(tbl))


=== Quick winners by AIC ===
 - ALL (pre+post): lognorm (AIC=928672.3, BIC=928701.4)
 - AI (all rows): lognorm (AIC=130547.9, BIC=130571.3)
 - Non-AI (all rows): lognorm (AIC=794153.0, BIC=794181.6)
 - AI (post only): lognorm (AIC=130547.9, BIC=130571.3)
 - Non-AI (post only): lognorm (AIC=324986.4, BIC=325012.3)


In [23]:
cross_tab = pd.crosstab(df['post'], df['treated_ai'])
print(cross_tab)

treated_ai      0      1
post                    
0           60000      0
1           41851  18149


In [16]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
import numpy as np  



In [26]:
from scipy import stats
# ---------- (1) DiD via regression with category and week fixed effects ----------
# Model: y = b0 + b1*post + b2*treated_ai + FE(category) + FE(week) + e
# Note: treated_ai is equivalent to post*ai since ai=0 in pre; b2 ~ ATT of AI exposure in post.
def did_regression(ycol, alpha=0.05):
    # Build design with one-hot FE for category & week
    X = df[["post","treated_ai","category","week"]].copy()
    y = df[ycol].astype(float).values
    ct = ColumnTransformer([
        ("cat", OneHotEncoder(drop="first"), ["category","week"])
    ], remainder="passthrough")

    # Linear regression via OLS closed form using numpy (to avoid adding 
    #  sklearn linear reg which doesn't show coeff names easily)
    X_enc = ct.fit_transform(X)

    # X_enc is sparse; convert to dense for OLS (dataset size allows it)
    X_mat = X_enc.toarray()

    # Add intercept
    X_design = np.column_stack([np.ones(X_mat.shape[0]), X_mat])

    # OLS: beta = (X'X)^(-1) X'y
    beta, _, rank, _ = np.linalg.lstsq(X_design, y, rcond=None)

    # Residual variance and (X'X)^{-1} for standard errors
    resid = y - X_design @ beta
    df_resid = max(X_design.shape[0] - rank, 1)
    sigma2 = (resid @ resid) / df_resid
    XtX_inv = np.linalg.pinv(X_design.T @ X_design)
    se = np.sqrt(np.diag(XtX_inv) * sigma2)
    t_stats = np.divide(beta, se, out=np.full_like(beta, np.nan), where=se > 0)
    p_vals = 2 * stats.t.sf(np.abs(t_stats), df_resid)

    # Recover the positions for post and treated_ai (they are the last two columns via 'remainder=passthrough')
    # Column order in transformed matrix: [onehots..., post, treated_ai]
    # So positions are: intercept=0, then all onehots, then post, then treated_ai
    k = X_design.shape[1]
    post_idx = k-2  # second last
    treat_idx = k-1 # last
    return {
        "y": ycol,
        "alpha": alpha,
        "coef_post": float(beta[post_idx]),
        "pvalue_post": float(np.round(p_vals[post_idx], 4)),
        "post_significant": "yes" if p_vals[post_idx] < alpha else "no",
        "coef_treated_ai (ATT)": float(beta[treat_idx]),
        "pvalue_treated_ai (ATT)": float(np.round(p_vals[treat_idx], 4)),
        "treated_ai_significant": "yes" if p_vals[treat_idx] < alpha else "no",
    }

did_time = did_regression("time_spent_search_sec")
did_rev  = did_regression("google_revenue_usd")
did_df = pd.DataFrame([did_time, did_rev])

did_df

# Interpretation: That means treated_ai is already the interaction term post * treated_group. There’s no need to create it  
#   again—there simply aren’t any observations with treated_ai = 1 before the policy kicks in. The regression 
#   therefore has:                                                                                            
                                                                                                            
#   - Intercept (baseline non-AI in pre),                                                                     
#   - post (time shift hitting both groups), and                                                              
#   - treated_ai (effect that only appears for treated units in the post period).


Unnamed: 0,y,alpha,coef_post,pvalue_post,post_significant,coef_treated_ai (ATT),pvalue_treated_ai (ATT),treated_ai_significant
0,time_spent_search_sec,0.05,-0.77835,0.0001,yes,-4.801788,0.0,yes
1,google_revenue_usd,0.05,-0.00417,0.4763,no,-0.040565,0.0,yes


In [44]:
# ---------- (2) Slice effects by category and query length ----------
# We'll compute post-period difference between AI vs non-AI for each slice (ATT within post).
# • Slice Effects
                                                                                                            
#   - post_subset = df[df["post"]==1] filters to the post-period.
#   - Group by the requested slicer columns plus ai_overview (groupby(group_cols + ["ai_overview"])) and take 
#     the mean of ycol.                                                                                       
#   - unstack("ai_overview") pivots so each row has separate columns for AI/Non-AI mean.                      
#   - Renames those columns, computes ATT (post: ai - non_ai) as the difference, and resets the index for a   
#     tidy DataFrame.                                                                                         
#   - slice_cat_* / slice_len_* just run that helper for time and revenue outcomes split by category or query-    length bucket.                                                                                          
                                                                                                            
#   So you get post-period, within-slice average treatment effects (AI − non-AI) without any modeling—just    
#   direct differences.          

def slice_effects(group_cols, ycol):
    post_subset = df[df["post"]==1]
    g = post_subset.groupby(group_cols + ["ai_overview"])[ycol].mean().unstack("ai_overview")
    g = g.rename(columns={False:"mean_non_ai", True:"mean_ai"})
    g["ATT (post: ai - non_ai)"] = g["mean_ai"] - g["mean_non_ai"]
    return g.reset_index()

slice_cat_time = slice_effects(["category"], "time_spent_search_sec")
slice_len_time = slice_effects(["query_len_bucket"], "time_spent_search_sec")
slice_cat_rev  = slice_effects(["category"], "google_revenue_usd")
slice_len_rev  = slice_effects(["query_len_bucket"], "google_revenue_usd")

# ---------- (3) Simple treatment-effect model (T-learner proxy for causal forest) ----------
# Features (X): category, long_tail, query_len_words, weekday, ads_viewed (pre-exposure proxy), etc.
# We exclude ai_overview from features for the control model, and include standard context.
model_features = ["category","long_tail","query_len_words","cpc_usd","ads_viewed","post"]

# Train two models for y1 (treated) and y0 (control) on post period to avoid distribution shift across time
post_data = df[df["post"]==1].copy()

#   T-Learner Block                                                                                           
                                                                                                            
#   - Uses only post-period rows (post_data) to avoid time drift.                                             
#   - Splits into treated (ai_overview==1) and control (==0).                                                 
#   - Builds two identical pipelines: one-hot encoding for category, then a RandomForestRegressor (200 trees, 
#     parallel).                                                                                              
#   - Fits one model (pipe_t) on treated data and another (pipe_c) on control data.                           
#   - Predicts both potential outcomes (mu1 if treated, mu0 if control) for every post-period observation by  
#     feeding the same features through both models.                                                          
#   - Individual treatment effect estimate ITE_hat is mu1 - mu0.                                              
#   - Evaluates in-sample fit via R² on treated and control subsets for a sanity check.                       
#   - Returns a DataFrame with the actual outcome, the two predicted potential outcomes, the individual       
#     effect, and some key descriptors (category, query length).
#   - summarize_ite then groups those ITEs to report mean/median/percentiles by category and query-length     
#     bucket.

#   Finally, display_dataframe_to_user shows all the tables (DiD summary, slice ATT tables, T-learner
#   summaries), and results collects a few headline numbers.
# ---------- (3) Simple treatment-effect model (T-learner proxy for causal forest) ----------             
  # Features (X): category, long_tail, query_len_words, weekday, ads_viewed (pre-exposure proxy), etc.      
model_features = ["category","long_tail","query_len_words","cpc_usd","ads_viewed","post"]                 
                                                                                                            
  # Train two models for y1 (treated) and y0 (control) on post period to avoid distribution shift across    
#  time                                                                                                      
post_data = df[df["post"]==1].copy()                                                                      
                                                                                                            
def build_t_learner(ycol):                                                                                
      treat = post_data[post_data["ai_overview"]==1].copy()
      ctrl  = post_data[post_data["ai_overview"]==0].copy()                                                 
      preproc = ColumnTransformer([                                                                         
          ("cat", OneHotEncoder(handle_unknown="ignore"), ["category"])                                     
      ], remainder="passthrough")                                                                           
      rf_t = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)                            
      rf_c = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)                            
      pipe_t = Pipeline([("prep", preproc), ("rf", rf_t)])                                                  
      pipe_c = Pipeline([("prep", preproc), ("rf", rf_c)])                                                  
      X_t, y_t = treat[model_features], treat[ycol]                                                         
      X_c, y_c = ctrl[model_features],  ctrl[ycol]                                                          
      pipe_t.fit(X_t, y_t)
      pipe_c.fit(X_c, y_c)
      X_all = post_data[model_features]                                                                     
      mu1 = pipe_t.predict(X_all)
      mu0 = pipe_c.predict(X_all)                                                                           
      ite = mu1 - mu0
      r2_t = r2_score(y_t, pipe_t.predict(X_t))                                                             
      r2_c = r2_score(y_c, pipe_c.predict(X_c))                                                             
      out = post_data[["category","query_len_bucket","ai_overview"]].copy()                                 
      out["y_actual"] = post_data[ycol].values                                                              
      out["mu1_hat"] = mu1                                                                                  
      out["mu0_hat"] = mu0                                                                                  
      out["ITE_hat"] = ite                                                                                  
      return out, {"r2_treated": r2_t, "r2_control": r2_c}                                                  
                                                                                                            
t_time_df, t_time_metrics = build_t_learner("time_spent_search_sec")                                      
t_rev_df,  t_rev_metrics  = build_t_learner("google_revenue_usd")                                         
                                                                                                            
  # Aggregate ITEs for interpretability                                                                     
def summarize_ite(ite_df, alpha=0.05):                                                                    
      group_cols = ["category", "query_len_bucket"]                                                         
      summaries = []                                                                                        
      for keys, grp in ite_df.groupby(group_cols):                                                          
          values = grp["ITE_hat"].dropna()                                                                  
          n_obs = int(values.shape[0])                                                                      
          mean_ite = values.mean() if n_obs else float("nan")                                               
          median_ite = values.median() if n_obs else float("nan")                                           
          p25_ite = float(np.percentile(values, 25)) if n_obs else float("nan")                             
          p75_ite = float(np.percentile(values, 75)) if n_obs else float("nan")                             
          if n_obs > 1:                                                                                     
              std_ite = float(values.std(ddof=1))                                                           
          elif n_obs == 1:                                                                                  
              std_ite = 0.0                                                                                 
          else:                                                                                             
              std_ite = float("nan")                                                                        
          se_ite = std_ite / np.sqrt(n_obs) if n_obs > 0 else float("nan")
          if n_obs > 1 and se_ite != 0:                                                                     
              t_stat = mean_ite / se_ite                                                                    
              p_value = 2 * stats.t.sf(np.abs(t_stat), df=n_obs - 1)                                        
          else:                                                                                             
              t_stat = float("nan")                                                                         
              p_value = float("nan")                                                                        
          summaries.append({                                                                                
              "category": keys[0],
              "query_len_bucket": keys[1],                                                                  
              "mean_ITE": mean_ite,                                                                         
              "median_ITE": median_ite,                                                                     
              "p25_ITE": p25_ite,                                                                           
              "p75_ITE": p75_ite,                                                                           
              "std_ITE": std_ite,
              "se_ITE": se_ite,                                                                             
              "t_stat": t_stat,                                                                             
              "p_value": p_value,                                                                           
              "significant": "yes" if (not pd.isna(p_value) and p_value < alpha) else "no",                 
              "n_obs": n_obs,                                                                               
          })                                                                                                
      summary_df = pd.DataFrame(summaries)                                                                  
      if not summary_df.empty:                                                                              
          summary_df = summary_df.sort_values(group_cols).reset_index(drop=True)                            
          for col in ["mean_ITE", "median_ITE", "p25_ITE", "p75_ITE", "std_ITE", "se_ITE", "t_stat",        
  "p_value"]:     
              summary_df[col] = summary_df[col].round(4)                                                    
      return summary_df                                                                                     
                                                                                                            
ite_time_summary = summarize_ite(t_time_df)                                                               
ite_rev_summary  = summarize_ite(t_rev_df)                                                                
                                                                                                            
  # Also provide quick textual summaries in a Pandas table                                                  
results_summary = []                                                                                      
results_summary.append({"metric": "DiD_time_coef_post", "value": float(did_time["coef_post"])})           
results_summary.append({"metric": "DiD_time_ATT_ai", "value": float(did_time["coef_treated_ai (ATT)"])})  
results_summary.append({"metric": "DiD_rev_coef_post", "value": float(did_rev["coef_post"])})             
results_summary.append({"metric": "DiD_rev_ATT_ai", "value": float(did_rev["coef_treated_ai (ATT)"])})    
results_summary.append({"metric": "T-learner R2 (time)", "value": t_time_metrics})                        
results_summary.append({"metric": "T-learner R2 (revenue)", "value": t_rev_metrics})                      
results_summary.append({"metric": "ITE_time_significant_slices",                                          
                          "value": int((ite_time_summary["significant"] == "yes").sum())})
results_summary.append({"metric": "ITE_time_total_slices",                                                
                          "value": int(len(ite_time_summary))})                                             
results_summary.append({"metric": "ITE_time_significant_groups",                                          
                          "value": (                                                                        
                              ite_time_summary[ite_time_summary["significant"] == "yes"]                    
                              [["category", "query_len_bucket"]].to_dict("records")                         
                          )})                                                                               
results_summary.append({"metric": "ITE_rev_significant_slices",                                           
                          "value": int((ite_rev_summary["significant"] == "yes").sum())})                   
results_summary.append({"metric": "ITE_rev_total_slices",                                                 
                          "value": int(len(ite_rev_summary))})                                              
results_summary.append({"metric": "ITE_rev_significant_groups",
                          "value": (                                                                        
                              ite_rev_summary[ite_rev_summary["significant"] == "yes"]
                              [["category", "query_len_bucket"]].to_dict("records")                         
                          )})                                                                               
                                                                                                            
results_df = pd.DataFrame(results_summary).sort_values("metric").reset_index(drop=True)                   
                                                                                                            
print("Results summary:")                                                                                 
print(results_df.to_string(index=False))                                                                  
results_df                                                                                                
results_summary.append({"metric": "ITE_rev_total_slices",
                          "value": int(len(ite_rev_summary))})
results_summary.append({"metric": "ITE_rev_significant_groups",
                          "value": (
                              ite_rev_summary[ite_rev_summary["significant"] == "yes"]
                              [["category", "query_len_bucket"]].to_dict("records")
                          )})

results_df = pd.DataFrame(results_summary).sort_values("metric").reset_index(drop=True)

print("Results summary:")
print(results_df.to_string(index=False))
results_df



Results summary:
                     metric                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            

Unnamed: 0,metric,value
0,DiD_rev_ATT_ai,-0.040565
1,DiD_rev_coef_post,-0.00417
2,DiD_time_ATT_ai,-4.801788
3,DiD_time_coef_post,-0.77835
4,ITE_rev_significant_groups,"[{'category': 'Entertainment', 'query_len_buck..."
5,ITE_rev_significant_groups,"[{'category': 'Entertainment', 'query_len_buck..."
6,ITE_rev_significant_slices,19
7,ITE_rev_total_slices,20
8,ITE_rev_total_slices,20
9,ITE_time_significant_groups,"[{'category': 'Entertainment', 'query_len_buck..."


In [None]:
# Results Summary Explained
                                                                                                            
#   - DiD_rev_ATT_ai  -0.040565: After controlling for group and time effects, AI overviews cut average post- 
#     period revenue per query by about $0.04 relative to the non-AI trend (ATT).           
#                   
#   - DiD_rev_coef_post  -0.00417: Even without AI exposure, revenue dropped by roughly $0.004 between pre and
#     post periods— a common time shock affecting everyone.                                                   

#   - DiD_time_ATT_ai  -4.801788: AI overviews shortened time-on-search by ~4.8 seconds versus the non-AI     
#     counterfactual— a large DiD effect.                                                                     

#   - DiD_time_coef_post  -0.77835: Across the board, search sessions became about 0.78 seconds shorter post  
#     period, regardless of AI exposure.                                                                      

#   - ITE_rev_significant_groups …: Outlines each (category, query_len_bucket) slice where the T-learner      
#     estimated revenue uplift is statistically significant (19 entries, e.g., Entertainment/short).          

#   - ITE_rev_significant_slices  19: Confirms the count of significant revenue slices—19 out of the total    
#     evaluated.                                                                                              

#   - ITE_rev_total_slices  20: Indicates there were 20 category×query-length slices with enough data; 19     
#     showed significant revenue effects (the repetition in the table is just due to the verbose summary).    

#   - ITE_time_significant_groups …: Lists the slices with significant time-on-search effects (again 19       
#     entries, parallel to the revenue list).                                                                 

#   - ITE_time_significant_slices  19: Shows 19 slices had significant time effects.

#   - ITE_time_total_slices  20: Out of 20 slices checked, 19 were significant for time.

#   - T-learner R2 (revenue) {r2_treated≈0.12, r2_control≈0.15}: The random-forest models explain about 12%   
#     and 15% of revenue variance for the treated and control groups respectively—moderate explanatory power. 

#   - T-learner R2 (time) {r2_treated≈0.09, r2_control≈0.09}: The time-on-search models explain ~9% of        
#     variance; predictive strength is low, so individual effect estimates should be treated with caution.    
#     counterfactual— a large DiD effect.

# Interpretation Notes:
#   Revenue: the models explain ~12% of variation for AI queries and ~15% for non-AI. That’s a modest     
    # but useful signal—features like category, CPC, and ads viewed do capture some revenue dynamics, so the  
    # slice-level treatment effects you’re seeing likely reflect real differences, even if there’s plenty of  
    # noise left unexplained.
# Time on search: the models explain only ~9% of variance for either group. User-level dwell time is    
    # harder to predict from the available features, so the individualized ITE estimates are less reliable.   
    # Treat the aggregate “significant slices” as directional guidance rather than precise numbers, and       
    # consider collecting richer behavioral features if you need stronger causal granularity.
