In [None]:
import os
import json
import pandas as pd
import numpy as np
import seaborn as sns
import shap
import matplotlib.pyplot as plt
import google.generativeai as genai

from datetime import datetime
from sklearn.model_selection import train_test_split, GridSearchCV
from lightgbm import LGBMClassifier
from econml.solutions.causal_analysis import CausalAnalysis

from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4

In [None]:
df = pd.read_csv("dataset.csv", index_col=0)

In [None]:
df.head()

In [None]:
df = df.drop(["client_id"], axis=1)

In [None]:
# **4.1. Train a predictive model to identify the most important features**

TARGET_COL = "loan_conversion_next30d" #input your target

X = df.drop(columns=[TARGET_COL])
y = df[TARGET_COL]

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

param_grid = {
    "learning_rate": [0.1, 0.05, 0.01],
    "max_depth": [3, 5, 10]}

clf = LGBMClassifier(
    n_estimators=300,
    objective="binary",
    random_state=42,
    n_jobs=-1)

search = GridSearchCV(clf, param_grid, n_jobs=-1, cv=3, scoring="roc_auc")
search.fit(x_train, y_train)

best_model = search.best_estimator_
#best_params = search.best_params_

In [None]:
explainer = shap.TreeExplainer(best_model)
shap_exp = explainer(x_train, check_additivity=False)
shap.summary_plot(shap_exp.values, x_train)

In [None]:
vals = np.abs(shap_exp.values).mean(axis=0)

fi_shap = (
    pd.DataFrame({
        "feature": x_train.columns,
        "importance": vals
    })
    .sort_values("importance", ascending=False)
    .reset_index(drop=True))

In [None]:
top_k = 5
top_features = fi_shap.head(top_k)["feature"].tolist()

In [None]:
# **4.2. Train a causal model to estimate causal relationships between the selected features and the target**

MAX_CAUSAL_ROWS = 50000

df_causal = df.copy()
if len(df_causal) > MAX_CAUSAL_ROWS:
    df_causal = df_causal.sample(MAX_CAUSAL_ROWS, random_state=42)

In [None]:
X_c = df_causal[top_features].copy()
y_c = df_causal[TARGET_COL].astype(int).values

categorical = [c for c in X_c.columns if str(X_c[c].dtype) in ("object", "category")]

ca = CausalAnalysis(
    top_features,
    categorical=categorical,
    heterogeneity_inds=None,
    classification=True,
    nuisance_models="automl",
    heterogeneity_model="forest",
    n_jobs=-1,
    random_state=123,
)
ca.fit(X_c, y_c)

In [None]:
global_summ = ca.global_causal_effect(alpha=0.05) 
global_summ

In [None]:
def errorbar(res: pd.DataFrame):
    xticks = res.index.get_level_values(0) if isinstance(res.index, pd.MultiIndex) else res.index
    lowererr = res["point"] - res["ci_lower"]
    uppererr = res["ci_upper"] - res["point"]
    xtick_labels = [
        (f"{t}***" if p < 1e-6 else (f"{t}**" if p < 1e-3 else (f"{t}*" if p < 1e-2 else f"{t}")))
        for t, p in zip(xticks, res["p_value"])
    ]
    plt.figure(figsize=(15, 5))
    plt.errorbar(
        np.arange(len(xtick_labels)),
        res["point"].values,
        yerr=[lowererr.values, uppererr.values],
        fmt="o", capsize=5, capthick=1, barsabove=True,
    )
    plt.xticks(np.arange(len(xtick_labels)), xtick_labels, rotation=45, ha="right")
    plt.title("Direct Causal Effect of Each Feature (95% CI)")
    plt.axhline(0, color="r", linestyle="--", alpha=0.5)
    plt.ylabel("Average Treatment Effect")
    plt.tight_layout()
    plt.show()
    plt.close()


In [None]:
errorbar(global_summ)

In [None]:
# collect the columns names for get it to AI 

column_descriptions = {}
for col in X_c.columns:
    desc = input(f"Whire the column description '{col}': ")
    column_descriptions[col] = desc

In [None]:
prompt = f"""
You are a Senior Product Data Scientist and Experimentation Expert.

Your task:
Generate a complete, high-quality **A/B Test Design Document**, using ONLY the provided data and following strict experimentation best practices.

===========================================================
INPUT DATA (USE FOR ALL JUSTIFICATIONS)
===========================================================

1) **Top Features (LightGBM Feature Importance):**
{fi_shap.to_markdown(index=False)}

2) **Causal Analysis Summary (feature → effect on target):**
{global_summ.to_markdown(index=False)}

3) **Column Descriptions (JSON Metadata):**
{json.dumps(column_descriptions, ensure_ascii=False, indent=2)}

4) **Target metric used in causal analysis:**
{TARGET_COL}

===========================================================
OUTPUT REQUIREMENTS
===========================================================

Write a concise, professional **A/B Test Design Document** (max 1 page).
Target audience: product managers, data analysts, and executives.

THE DOCUMENT MUST CONTAIN THE FOLLOWING SECTIONS (MANDATORY):

### 1. Objective
Define the business objective and why improving the target metric matters.
Use insights from causal analysis and feature importance to justify relevance.
We need to affect on the target metric {TARGET_COL} not directly, but throught changes user's features only with highest detectable causal effect. 

### 2. Hypothesis
Write a clear, falsifiable, directional hypothesis.
It must be directly grounded in:
- feature importance (drivers of the target)
- causal effects (significant causal contributors)

### 3. Experiment Design (Control vs Treatment)
Describe:
- what exactly changes in Treatment
- why this change can influence the target (via features/causal drivers)
- how users are exposed
- expected behavioral mechanism

### 4. Target Audience / Eligibility Rules
Define inclusion/exclusion based on metadata.
Explain *why this specific audience* is appropriate.

### 5. Metrics
Use column descriptions + model outputs to pick:
- **Primary metric** (must be the target or direct proxy)
- **Secondary metrics** (diagnostic or leading indicators)
- **Expected direction** ↑ or ↓
- **Business interpretation**

Metrics must align with causal results.

### 6. Success Criteria
Provide **clear, measurable** criteria.
If numeric threshold unavailable, define directional criteria with rationale.

### 7. Duration & Sample Size
Provide:
- estimation logic (simplified reasoning is enough)
- expected runtime (days or weeks)
- minimal detectable effect assumptions (conceptual, not calculated)

### 8. Guardrails
Add 2–3 guardrails relating to:
- conversion quality
- risk of negative behavioral shifts
- operational constraints  
Explain *why each guardrail matters*.

### 9. Risks & Mitigations
Provide 3–5 realistic risks.
Each risk must have a corresponding mitigation strategy.

===========================================================
STYLE RULES (STRICT)
===========================================================

- Use Markdown formatting.
- Be concise and avoid generic boilerplate.
- Every claim must be grounded in **importance, causal impact, or feature metadata**.
- Do NOT invent variables or metrics not present in the input.
- Write in a real-world, product-focused tone (no academic verbosity).
- The result must be production-ready, as if included in a real A/B test briefing.

Begin now.
"""

In [None]:
# Select the model and input an API key
LLM_MODEL = "models/gemini-2.5-flash"
model = genai.GenerativeModel(LLM_MODEL)

os.environ["GOOGLE_API_KEY"] = "___" # input your API here
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [None]:
OUT_MD = "./ab_test_design.md"
try:
    response = model.generate_content(prompt)
    text = getattr(response, "text", "") or "No text generated."

    with open(OUT_MD, "w", encoding="utf-8") as f:
        f.write(text)

    print(f"[Gemini] ✅ Markdown saved to {OUT_MD}")

except Exception as e:
    print(f"[Gemini] ❌ Error: {e}")