In [None]:
# =============================================================
#  FREE HACKATHON CONJOINT - GPT DATA COLLECTION
#  Generates synthetic respondent data comparing hackathon designs
# =============================================================
!pip -q install openai==0.28.1 pandas

import os, random, json, pandas as pd, openai

# ‚îÄ‚îÄ API Configuration ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
openai.api_key = "YOUR_OPENAI_API_KEY"

# ‚îÄ‚îÄ Attribute Definitions ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# Levels ordered weakest ‚Üí strongest (first level = baseline in analysis)
ATTRIBUTES = {
    "Rewards_Resources": [
        "Recognition only",
        "API credits only (quantified, usable post-event)",
        "Cash prizes only (~$20k total prizes)",
        "Cash + API credits (~$20k total prizes; clear usage terms)"
    ],
    "Peer_Quality_Prestige": [
        "Local university hub / general public",
        "Cross-school and international entrants",
        "Flagship hubs (e.g., MIT/Stanford) + named judges"
    ],
    "Challenge_Structure": [
        "Open innovation + one headline partner challenge",
        "2‚Äì3 curated tracks (pre-published briefs; selection ‚â•1 week pre-event)",
        "Single themed track with explicit rubric"
    ],
    "Sprint_Format": [
        "One-week async with daily stand-ups",
        "3‚Äì5 day evening sprint",
        "24-hour live sprint"
    ],
    "Venture_Track": [
        "No formal venture path",
        "Finalists receive demo-day invite",
        "6-week accelerator (top cohort benefits, mentorship, investor intros)"
    ],
    "Network_Career_Access": [
        "No career access",
        "Curated: company talks + optional 1:1 mentor slots",
        "Structured: 1:1 mentor per team + recruiter r√©sum√© drop",
        "Guaranteed interviews with partner companies"
    ]
}

print(json.dumps(ATTRIBUTES, indent=2))

# ‚îÄ‚îÄ Generate Choice Tasks ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
NUM_TASKS = 300  # Number of A vs B comparisons
N_RESPONSES = 3   # Synthetic respondents per task

# Define strong vs weak levels for balanced profile generation
strong = {
    "Rewards_Resources": ["Cash + API credits (~$20k total prizes; clear usage terms)"],
    "Peer_Quality_Prestige": ["Flagship hubs (e.g., MIT/Stanford) + named judges"],
    "Challenge_Structure": ["Single themed track with explicit rubric"],
    "Sprint_Format": ["24-hour live sprint"],
    "Venture_Track": ["6-week accelerator (top cohort benefits, mentorship, investor intros)"],
    "Network_Career_Access": ["Guaranteed interviews with partner companies"],
}

weak = {k: [lvl for lvl in v if lvl not in strong[k]] for k, v in ATTRIBUTES.items()}

def mediocre_profile():
    """Generate profile with 70% weak, 30% strong features"""
    prof = {}
    for k in ATTRIBUTES:
        pool = weak[k] if random.random() < 0.7 else strong[k]
        prof[k] = random.choice(pool)
    return prof

CHOICE_TASK_PROFILES = {
    i+1: {"A": mediocre_profile(), "B": mediocre_profile()}
    for i in range(NUM_TASKS)
}

print(f"\n‚úì Generated {NUM_TASKS} choice tasks")

# ‚îÄ‚îÄ Define Prompt Template ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
def build_prompt(pair):
    A, B = pair
    fmt = lambda opt: "; ".join([f"{k}: {v}" for k, v in opt.items()])
    return (
        f"Option A ‚Äì {fmt(A)}\n"
        f"Option B ‚Äì {fmt(B)}\n\n"
        "Which hackathon would you prefer? Reply with either 'Option A' or 'Option B'."
    )

SYSTEM_MSG = {
    "role": "system",
    "content": (
        "You're a pragmatic CS student evaluating hackathons. "
        "Compare the two options and pick the one that offers better value for your time and career. "
        "Consider factors like career impact, learning opportunities, and resources. "
        "Reply with just: Option A or Option B."
    )
}

# ‚îÄ‚îÄ Query GPT-3.5 for Choices ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
collected_observations = []

for task_id in range(1, NUM_TASKS + 1):
    pair = (CHOICE_TASK_PROFILES[task_id]["A"], CHOICE_TASK_PROFILES[task_id]["B"])
    prompt = build_prompt(pair)

    if task_id <= 3:
        print(f"\n--- Task {task_id} Preview ---\n{prompt[:180]}...\n")

    for r in range(N_RESPONSES):
        try:
            resp = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[SYSTEM_MSG, {"role": "user", "content": prompt}],
                temperature=0.6,
                max_tokens=10,
            )
            text = resp["choices"][0]["message"]["content"].strip().lower()

            # Parse choice from response
            if "option a" in text or (text == "a"):
                choice = "A"
            elif "option b" in text or (text == "b"):
                choice = "B"
            else:
                choice = "A"  # Default to A if unclear

            observation = {
                "task": task_id,
                "resp": r,
                "choice": choice,
                "raw_text": resp["choices"][0]["message"]["content"].strip()
            }

            chosen_profile = CHOICE_TASK_PROFILES[task_id][choice]
            observation.update(chosen_profile)
            collected_observations.append(observation)

        except Exception as e:
            print(f"Error Task {task_id}, Resp {r}: {e}")

print(f"\n‚úì Collected {len(collected_observations)} observations")

# ‚îÄ‚îÄ Convert to Long Format ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# Each observation becomes 2 rows (one for A, one for B)
long_rows = []
for rec in collected_observations:
    t, r, pick = rec["task"], rec["resp"], rec["choice"]

    for alt in ["A", "B"]:
        prof = CHOICE_TASK_PROFILES[t][alt]
        long_rows.append({
            "task": t,
            "resp": r,
            "alt": alt,
            "chosen": int(alt == pick),  # 1 if chosen, 0 otherwise
            **prof
        })

df_long = pd.DataFrame(long_rows)

print(f"\nRows: {len(df_long)} (should be {len(collected_observations) * 2})")
print(f"Choices: A={df_long[df_long['alt']=='A']['chosen'].sum()}, B={df_long[df_long['alt']=='B']['chosen'].sum()}")

# ‚îÄ‚îÄ Save Dataset ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
output_path = "/content/free_hackathon_conjoint_long.csv"
df_long.to_csv(output_path, index=False)
print(f"\n‚úì File written: {output_path}")

print("\nFirst 8 rows:")
display(df_long.head(8))

{
  "Rewards_Resources": [
    "Recognition only",
    "API credits only (quantified, usable post-event)",
    "Cash prizes only (~$20k total prizes)",
    "Cash + API credits (~$20k total prizes; clear usage terms)"
  ],
  "Peer_Quality_Prestige": [
    "Local university hub / general public",
    "Cross-school and international entrants",
    "Flagship hubs (e.g., MIT/Stanford) + named judges"
  ],
  "Challenge_Structure": [
    "Open innovation + one headline partner challenge",
    "2\u20133 curated tracks (pre-published briefs; selection \u22651 week pre-event)",
    "Single themed track with explicit rubric"
  ],
  "Sprint_Format": [
    "One-week async with daily stand-ups",
    "3\u20135 day evening sprint",
    "24-hour live sprint"
  ],
  "Venture_Track": [
    "No formal venture path",
    "Finalists receive demo-day invite",
    "6-week accelerator (top cohort benefits, mentorship, investor intros)"
  ],
  "Network_Career_Access": [
    "No career access",
    "Curated: com

Unnamed: 0,task,resp,alt,chosen,Rewards_Resources,Peer_Quality_Prestige,Challenge_Structure,Sprint_Format,Venture_Track,Network_Career_Access
0,1,0,A,1,Cash + API credits (~$20k total prizes; clear ...,"Flagship hubs (e.g., MIT/Stanford) + named judges",Open innovation + one headline partner challenge,24-hour live sprint,"6-week accelerator (top cohort benefits, mento...",Curated: company talks + optional 1:1 mentor s...
1,1,0,B,0,Cash + API credits (~$20k total prizes; clear ...,Local university hub / general public,2‚Äì3 curated tracks (pre-published briefs; sele...,3‚Äì5 day evening sprint,No formal venture path,Guaranteed interviews with partner companies
2,1,1,A,1,Cash + API credits (~$20k total prizes; clear ...,"Flagship hubs (e.g., MIT/Stanford) + named judges",Open innovation + one headline partner challenge,24-hour live sprint,"6-week accelerator (top cohort benefits, mento...",Curated: company talks + optional 1:1 mentor s...
3,1,1,B,0,Cash + API credits (~$20k total prizes; clear ...,Local university hub / general public,2‚Äì3 curated tracks (pre-published briefs; sele...,3‚Äì5 day evening sprint,No formal venture path,Guaranteed interviews with partner companies
4,1,2,A,1,Cash + API credits (~$20k total prizes; clear ...,"Flagship hubs (e.g., MIT/Stanford) + named judges",Open innovation + one headline partner challenge,24-hour live sprint,"6-week accelerator (top cohort benefits, mento...",Curated: company talks + optional 1:1 mentor s...
5,1,2,B,0,Cash + API credits (~$20k total prizes; clear ...,Local university hub / general public,2‚Äì3 curated tracks (pre-published briefs; sele...,3‚Äì5 day evening sprint,No formal venture path,Guaranteed interviews with partner companies
6,2,0,A,1,Recognition only,"Flagship hubs (e.g., MIT/Stanford) + named judges",2‚Äì3 curated tracks (pre-published briefs; sele...,One-week async with daily stand-ups,Finalists receive demo-day invite,Curated: company talks + optional 1:1 mentor s...
7,2,0,B,0,Recognition only,Cross-school and international entrants,2‚Äì3 curated tracks (pre-published briefs; sele...,One-week async with daily stand-ups,No formal venture path,No career access


In [None]:
# =============================================================
#  FREE HACKATHON PREFERENCE ANALYSIS
#  Discrete choice model with regularized logistic regression
# =============================================================
import pandas as pd, numpy as np, statsmodels.api as sm

# ‚îÄ‚îÄ Load Data ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
df = pd.read_csv("/content/free_hackathon_conjoint_long.csv")

print("="*60)
print("DATA SUMMARY")
print("="*60)
print(f"Total rows: {len(df)}")
print(f"Choices: A={df[df['alt']=='A']['chosen'].sum()}, B={df[df['alt']=='B']['chosen'].sum()}")
print(f"Total observations: {df['chosen'].sum()}\n")

# ‚îÄ‚îÄ One-Hot Encode (Force First Level as Baseline) ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
attr_cols = list(ATTRIBUTES.keys())

# Create dummy variables manually to control baseline
dummy_dfs = []
for attr in attr_cols:
    dummies = pd.get_dummies(df[attr], prefix=attr, dtype=float)

    # Drop the first level from ATTRIBUTES dict (desired baseline)
    first_level = ATTRIBUTES[attr][0]
    col_to_drop = f"{attr}_{first_level}"

    if col_to_drop in dummies.columns:
        dummies = dummies.drop(columns=[col_to_drop])

    dummy_dfs.append(dummies)

X = pd.concat(dummy_dfs, axis=1)
X = sm.add_constant(X).astype(float)
y = df["chosen"]

print(f"Feature matrix: {X.shape[0]} rows √ó {X.shape[1]} columns")

# ‚îÄ‚îÄ Fit Regularized Logistic Regression ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
print("\n" + "="*60)
print("FITTING REGULARIZED LOGISTIC REGRESSION")
print("="*60)

logit = sm.Logit(y, X).fit_regularized(
    alpha=0.3,    # Ridge penalty strength
    L1_wt=0,      # Pure ridge (no lasso)
    refit=False,  # Keep regularization in final model
    disp=False
)

print(logit.summary())

# ‚îÄ‚îÄ Extract Utilities ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
util = pd.Series(logit.params, index=X.columns)
intercept = util.get('const', 0)
util = util.drop('const', errors='ignore')

print("\n" + "="*60)
print("PART-WORTH UTILITIES (All Features)")
print("="*60)
print(f"Intercept: {intercept:.4f}\n")

util_df = pd.DataFrame({
    'Feature': util.index,
    'Utility': util.values
}).sort_values('Utility', ascending=False).reset_index(drop=True)

print(util_df.to_string(index=False))

# ‚îÄ‚îÄ Top and Bottom Features ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
print("\n" + "="*60)
print("TOP 10 MOST VALUED FEATURES")
print("="*60)
top10 = util.sort_values(ascending=False).head(10)
for i, (feature, value) in enumerate(top10.items(), 1):
    print(f"{i:2d}. {feature:55s} {value:+.4f}")

print("\n" + "="*60)
print("TOP 10 LEAST VALUED FEATURES")
print("="*60)
bottom10 = util.sort_values(ascending=True).head(10)
for i, (feature, value) in enumerate(bottom10.items(), 1):
    print(f"{i:2d}. {feature:55s} {value:+.4f}")

# ‚îÄ‚îÄ Utilities by Attribute ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
print("\n" + "="*60)
print("UTILITIES BY ATTRIBUTE")
print("="*60)

ZERO_THRESHOLD = 0.0001

for attr in ATTRIBUTES.keys():
    print(f"\n{attr}:")

    all_levels = ATTRIBUTES[attr]
    baseline = all_levels[0]

    print(f"  ‚Ä¢ {baseline:50s}  0.0000 (baseline)")

    for level in all_levels[1:]:
        feature_name = f"{attr}_{level}"
        if feature_name in util.index:
            value = util[feature_name]
            if abs(value) < ZERO_THRESHOLD:
                print(f"  ‚Ä¢ {level:50s} ~0.0000 (no effect)")
            else:
                print(f"  ‚Ä¢ {level:50s} {value:+.4f}")

# ‚îÄ‚îÄ Key Insights ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
print("\n" + "="*60)
print("KEY INSIGHTS")
print("="*60)

util_nonzero = util[util.abs() > ZERO_THRESHOLD]

if len(util_nonzero) > 0:
    most_valued = util_nonzero.idxmax()
    least_valued = util_nonzero.idxmin()

    print(f"\nüèÜ MOST VALUED: {most_valued}")
    print(f"   Utility: {util[most_valued]:+.4f}")

    print(f"\n‚ùå LEAST VALUED: {least_valued}")
    print(f"   Utility: {util[least_valued]:+.4f}")

    print(f"\nüìä Range: {util_nonzero.max() - util_nonzero.min():.4f}")
    print(f"üìä Mean absolute utility: {util_nonzero.abs().mean():.4f}")
    print(f"üìä Non-zero features: {len(util_nonzero)}/{len(util)}")

# ‚îÄ‚îÄ Save Results ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
util_df.to_csv('/content/free_hackathon_utilities.csv', index=False)
print(f"\n‚úì Utilities saved to: /content/free_hackathon_utilities.csv")

# ‚îÄ‚îÄ Interpretation Guide ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
print("\n" + "="*60)
print("INTERPRETATION")
print("="*60)
print("‚úì Each attribute has one baseline level (utility = 0)")
print("‚úì Baselines are the FIRST level listed in ATTRIBUTES dict")
print("‚úì Other levels show utility relative to baseline")
print("‚úì Positive = better than baseline | Negative = worse")
print("‚úì Compare utilities WITHIN attributes for preferences")

DATA SUMMARY
Total rows: 1800
Choices: A=539, B=361
Total observations: 900

Feature matrix: 1800 rows √ó 15 columns

FITTING REGULARIZED LOGISTIC REGRESSION
                           Logit Regression Results                           
Dep. Variable:                 chosen   No. Observations:                 1800
Model:                          Logit   Df Residuals:                     1785
Method:                           MLE   Df Model:                           14
Date:                Mon, 10 Nov 2025   Pseudo R-squ.:                  0.2549
Time:                        21:16:43   Log-Likelihood:                -929.58
converged:                       True   LL-Null:                       -1247.7
Covariance Type:            nonrobust   LLR p-value:                1.061e-126
                                                                                                 coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------

In [None]:
#############################################################################################################################################################################################################################################################################################################################################################################
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#############################################################################################################################################################################################################################################################################################################################################################################



In [None]:
# =============================================================
#  PAID ADD-ON CONJOINT - GPT DATA COLLECTION
#  Generates synthetic respondent data for paid hackathon add-ons
# =============================================================
!pip -q install openai==0.28.1 pandas

import os, random, json, pandas as pd, openai, numpy as np

# ‚îÄ‚îÄ API Configuration ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
openai.api_key = "YOUR_OPENAI_API_KEY"

# ‚îÄ‚îÄ Attribute Definitions ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
ATTRIBUTES = {
    "Demand_Learning":     ["Yes", "No"],
    "Live_Classes":        ["Yes", "No"],
    "Mentoring_1on1":      ["Yes", "No"],
    "Exclusive_Workshops": ["Yes", "No"],
    "Exclusive_Events":    ["Yes", "No"],
    "Price":               ["$0", "$9", "$19", "$29", "$39", "$49"]
}

print("Add-on attribute levels:\n", json.dumps(ATTRIBUTES, indent=2))

# ‚îÄ‚îÄ Generate Choice Tasks ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
NUM_TASKS = 300
N_RESPONSES = 3

def random_profile():
    """Generate random add-on package with at least one feature"""
    prof = {k: random.choice(v) for k, v in ATTRIBUTES.items()}
    # Ensure at least one "Yes" feature
    if all(prof[srv] == "No" for srv in ATTRIBUTES if srv != "Price"):
        srv = random.choice(list(ATTRIBUTES.keys())[:-1])
        prof[srv] = "Yes"
    return prof

CHOICE_TASK_PROFILES = {}
for i in range(NUM_TASKS):
    A = random_profile()
    B = random_profile()
    # Ensure different prices for A and B
    while B["Price"] == A["Price"]:
        B["Price"] = random.choice(ATTRIBUTES["Price"])
    CHOICE_TASK_PROFILES[i+1] = {"A": A, "B": B}

print(f"\n‚úì Generated {NUM_TASKS} choice tasks")

# ‚îÄ‚îÄ Define Prompt Template ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
def build_prompt(pair):
    A, B = pair
    fmt = lambda d: "; ".join(f"{k}: {v}" for k, v in d.items())
    return (
        f"Option A ‚Äì {fmt(A)}\n"
        f"Option B ‚Äì {fmt(B)}\n"
        f"Option C ‚Äì I would not buy any add-on. I would rather participate in a free hackathon without these add-ons.\n\n"
        "Which do you choose? Respond with Option A, Option B, or Option C."
    )

SYSTEM_MSG = {
    "role": "system",
    "content":
        "You are an AI-savvy computer science student on a tight budget. "
        "You only pay for an add-on if the included features clearly justify the price. "
        "If value for money is low, choose Option C (no add-on). "
        "Roughly 70% of the time you skip (Option C). "
        "Be consistent with your logic ‚Äî prioritize free or high-value bundles."
}

# ‚îÄ‚îÄ Query GPT-3.5 for Choices ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
collected_observations = []

for task_id, pair in CHOICE_TASK_PROFILES.items():
    prompt = build_prompt((pair["A"], pair["B"]))

    for r in range(N_RESPONSES):
        try:
            reply = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[SYSTEM_MSG, {"role": "user", "content": prompt}],
                temperature=0.2,
                max_tokens=10,
            )["choices"][0]["message"]["content"].strip().lower()

            # Parse choice from response
            choice = "A" if reply.startswith("option a") else \
                     "B" if reply.startswith("option b") else "C"

            obs = {"task": task_id, "resp": r, "choice": choice}

            # Attach profile of chosen alternative (or "None" for Option C)
            if choice in ("A", "B"):
                obs.update(pair[choice])
            else:
                obs.update({k: "None" for k in ATTRIBUTES})

            collected_observations.append(obs)

        except Exception as e:
            print(f"Error Task {task_id}, Resp {r}: {e}")

print(f"\n‚úì Collected {len(collected_observations)} observations")

# ‚îÄ‚îÄ Save Wide Format ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
df_wide = pd.DataFrame(collected_observations)
df_wide["chosen"] = 1
df_wide.to_csv("/content/paid_addon_conjoint_wide.csv", index=False)
print(f"‚úì File written: /content/paid_addon_conjoint_wide.csv | Rows: {len(df_wide)}")

display(df_wide.head())

Add-on attribute levels:
 {
  "Demand_Learning": [
    "Yes",
    "No"
  ],
  "Live_Classes": [
    "Yes",
    "No"
  ],
  "Mentoring_1on1": [
    "Yes",
    "No"
  ],
  "Exclusive_Workshops": [
    "Yes",
    "No"
  ],
  "Exclusive_Events": [
    "Yes",
    "No"
  ],
  "Price": [
    "$0",
    "$9",
    "$19",
    "$29",
    "$39",
    "$49"
  ]
}

‚úì Generated 300 choice tasks

‚úì Collected 900 observations
‚úì File written: /content/paid_addon_conjoint_wide.csv | Rows: 900


Unnamed: 0,task,resp,choice,Demand_Learning,Live_Classes,Mentoring_1on1,Exclusive_Workshops,Exclusive_Events,Price,chosen
0,1,0,A,Yes,No,Yes,No,Yes,$39,1
1,1,1,A,Yes,No,Yes,No,Yes,$39,1
2,1,2,A,Yes,No,Yes,No,Yes,$39,1
3,2,0,A,Yes,Yes,Yes,Yes,No,$0,1
4,2,1,A,Yes,Yes,Yes,Yes,No,$0,1


In [None]:
# =============================================================
#  PAID ADD-ON ANALYSIS - RIDGE LOGIT WITH WILLINGNESS TO PAY
#  Converts to long format, fits discrete choice model, calculates WTP
# =============================================================
import pandas as pd, numpy as np, statsmodels.api as sm, sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# ‚îÄ‚îÄ Expand to Long Format ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# Each observation becomes 3 rows (A, B, C)
long_rows = []
for rec in collected_observations:
    t, r, pick = rec["task"], rec["resp"], rec["choice"]

    for alt, prof in CHOICE_TASK_PROFILES[t].items():
        long_rows.append({
            "task": t, "resp": r, "alt": alt,
            "chosen": int(alt == pick),
            **prof
        })

df = pd.DataFrame(long_rows)

print(f"Rows: {len(df)} | Chosen: {df['chosen'].sum()}, Not chosen: {(df['chosen']==0).sum()}")

df.to_csv("/content/paid_addon_conjoint_long.csv", index=False)
print("‚úì Saved: /content/paid_addon_conjoint_long.csv")

# ‚îÄ‚îÄ Prepare Features ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
bin_cols = ["Demand_Learning", "Live_Classes", "Mentoring_1on1",
            "Exclusive_Workshops", "Exclusive_Events"]

# Normalize binaries (convert "None" to "No")
for c in bin_cols:
    df[c] = df[c].replace({"None": "No"}).astype(str)

# Create numeric price variable
df["Price_num"] = df["Price"].map({
    "$0": 0, "$9": 9, "$19": 19, "$29": 29, "$39": 39, "$49": 49
}).astype(float)

# ‚îÄ‚îÄ One-Hot Encode ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
attr_cols = bin_cols + ["Price_num"]
ohe_kw = dict(drop="first", handle_unknown="ignore")
ohe_kw["sparse_output" if sklearn.__version__ >= "1.4" else "sparse"] = False

enc = ColumnTransformer(
    [("bin", OneHotEncoder(**ohe_kw), bin_cols)],
    remainder="passthrough"
)

X_enc = enc.fit_transform(df[attr_cols])
feat_names = list(enc.named_transformers_["bin"].get_feature_names_out(bin_cols)) + ["Price_num"]
X = sm.add_constant(pd.DataFrame(X_enc, columns=feat_names))
y = df["chosen"]

print(f"\nFeature matrix: {X.shape[0]} rows √ó {X.shape[1]} features")

# ‚îÄ‚îÄ Fit Ridge Logistic Regression ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
print("\n" + "="*60)
print("FITTING REGULARIZED LOGISTIC REGRESSION")
print("="*60)

logit = sm.Logit(y, X).fit_regularized(
    alpha=0.02,   # Weak regularization
    L1_wt=0,      # Pure ridge
    refit=True    # Refit without penalty for final estimates
)

print(logit.summary())

# ‚îÄ‚îÄ Extract Utilities ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
util = pd.Series(logit.params, index=X.columns).drop("const")

print("\n" + "="*60)
print("FEATURE UTILITIES")
print("="*60)
print(util.sort_values(ascending=False))

# ‚îÄ‚îÄ Calculate Willingness to Pay ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
print("\n" + "="*60)
print("WILLINGNESS TO PAY CALCULATION")
print("="*60)

beta_price = util.get("Price_num", np.nan)

if pd.isna(beta_price) or np.isclose(beta_price, 0):
    print("‚ö†Ô∏è  Price coefficient near zero, trying lighter regularization...")
    logit = sm.Logit(y, X).fit_regularized(alpha=0.002, L1_wt=0, refit=False)
    util = pd.Series(logit.params, index=X.columns).drop("const")
    beta_price = util.get("Price_num", np.nan)

if pd.isna(beta_price) or np.isclose(beta_price, 0):
    print("‚ùå Price slope not identified. Cannot compute WTP.")
else:
    # WTP = utility / |price coefficient|
    wtp = (util.drop("Price_num") / abs(beta_price)).sort_values(ascending=False).round(2)

    print(f"\nPrice coefficient: {beta_price:.4f}")
    print("\nImplied Willingness to Pay ($):")
    print(wtp)

    # ‚îÄ‚îÄ Save WTP Results ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    wtp_df = pd.DataFrame({
        'Feature': wtp.index,
        'WTP_USD': wtp.values
    })
    wtp_df.to_csv('/content/paid_addon_wtp.csv', index=False)
    print(f"\n‚úì WTP estimates saved to: /content/paid_addon_wtp.csv")

# ‚îÄ‚îÄ Save Utilities ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
util_df = pd.DataFrame({
    'Feature': util.index,
    'Utility': util.values
}).sort_values('Utility', ascending=False)

util_df.to_csv('/content/paid_addon_utilities.csv', index=False)
print(f"‚úì Utilities saved to: /content/paid_addon_utilities.csv")

Rows: 1800 | Chosen: 838, Not chosen: 962
‚úì Saved: /content/paid_addon_conjoint_long.csv

Feature matrix: 1800 rows √ó 7 features

FITTING REGULARIZED LOGISTIC REGRESSION
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.6434714322439482
            Iterations: 92
            Function evaluations: 94
            Gradient evaluations: 92
                           Logit Regression Results                           
Dep. Variable:                 chosen   No. Observations:                 1800
Model:                          Logit   Df Residuals:                     1793
Method:                           MLE   Df Model:                            6
Date:                Mon, 10 Nov 2025   Pseudo R-squ.:                 0.06851
Time:                        21:23:19   Log-Likelihood:                -1158.2
converged:                       True   LL-Null:                       -1243.4
Covariance Type:            nonrobust   LLR p-value:            

In [None]:
# =============================================================
#  PAID ADD-ON ANALYSIS - SKLEARN ALTERNATIVE
#  Uses sklearn pipeline for comparison with statsmodels approach
# =============================================================
import numpy as np, pandas as pd, sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

# ‚îÄ‚îÄ Prepare Data ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
y = df["chosen"].astype(int)

binary = ["Demand_Learning", "Live_Classes", "Mentoring_1on1",
          "Exclusive_Workshops", "Exclusive_Events"]

# Normalize binaries
for col in binary:
    df[col] = df[col].replace({"None": "No"})

# Create numeric price
price_map = {"$0": 0, "$9": 9, "$19": 19, "$29": 29, "$39": 39, "$49": 49}
df["Price_num"] = df["Price"].map(price_map).astype(float)

# ‚îÄ‚îÄ Build sklearn Pipeline ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
ohe_kwargs = dict(drop="first", handle_unknown="ignore")
ohe_kwargs["sparse_output" if sklearn.__version__ >= "1.4" else "sparse"] = False

enc = ColumnTransformer(
    transformers=[("bin", OneHotEncoder(**ohe_kwargs), binary)],
    remainder="passthrough"
)

logit = LogisticRegression(
    solver="lbfgs",
    penalty="l2",
    C=1.0,
    max_iter=5000,
    class_weight=None
)

pipe = Pipeline([
    ("enc", enc),
    ("clf", logit)
])

# ‚îÄ‚îÄ Fit Model ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
X = df[binary + ["Price_num"]]
pipe.fit(X, y)
print("‚úî sklearn logit fitted successfully")

# ‚îÄ‚îÄ Extract Coefficients ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
bin_names = pipe.named_steps["enc"].named_transformers_["bin"].get_feature_names_out(binary)
feat_names = list(bin_names) + ["Price_num"]
coefs = pipe.named_steps["clf"].coef_[0]
util = pd.Series(coefs, index=feat_names).sort_values(ascending=False)

print("\n" + "="*60)
print("FEATURE UTILITIES (sklearn)")
print("="*60)
print(util)

# ‚îÄ‚îÄ Calculate WTP ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
beta_price = util.loc["Price_num"]

if np.isclose(beta_price, 0):
    print("\n‚ö†Ô∏è Price slope ~ 0; cannot compute WTP")
else:
    wtp = (util.drop("Price_num") / abs(beta_price)).sort_values(ascending=False)
    print("\n" + "="*60)
    print("WILLINGNESS TO PAY (sklearn)")
    print("="*60)
    print(wtp.round(2))

    # ‚îÄ‚îÄ Save sklearn Results ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    wtp_sklearn_df = pd.DataFrame({
        'Feature': wtp.index,
        'WTP_USD': wtp.values.round(2)
    })
    wtp_sklearn_df.to_csv('/content/paid_addon_wtp_sklearn.csv', index=False)
    print(f"\n‚úì sklearn WTP estimates saved to: /content/paid_addon_wtp_sklearn.csv")

‚úî sklearn logit fitted successfully

FEATURE UTILITIES (sklearn)
Exclusive_Events_Yes       0.574923
Demand_Learning_Yes        0.487242
Exclusive_Workshops_Yes    0.455807
Mentoring_1on1_Yes         0.391720
Price_num                 -0.028250
Live_Classes_Yes          -0.037517
dtype: float64

WILLINGNESS TO PAY (sklearn)
Exclusive_Events_Yes       20.35
Demand_Learning_Yes        17.25
Exclusive_Workshops_Yes    16.13
Mentoring_1on1_Yes         13.87
Live_Classes_Yes           -1.33
dtype: float64

‚úì sklearn WTP estimates saved to: /content/paid_addon_wtp_sklearn.csv
