In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import statsmodels.formula.api as smf

In [None]:
no = pd.read_csv('no_discount.csv')
all = pd.read_csv('all_discount.csv')
half = pd.read_csv('half_discount.csv')


In [None]:
# Map user features to number
dataframes = {'all': all, 'half': half, 'no': no}
feature_cols = ['user_feature_1', 'user_feature_2', 'user_feature_3']
category_maps = {}

# 1. Build mapping for each feature across all dataframes
for col in feature_cols:
    # Collect unique values from all dataframes for this column
    unique_vals = pd.concat([df[col] for df in dataframes.values()]).unique()
    cat_map = {cat: i+1 for i, cat in enumerate(sorted(unique_vals))}
    category_maps[col] = cat_map

# 2. Apply the mapping to each dataframe
for df in dataframes.values():
    for col in feature_cols:
        df[col + '_num'] = df[col].map(category_maps[col])

# 3. Show mapping for each feature
for col in feature_cols:
    print(f"\nMapping for {col}:")
    for cat, idx in category_maps[col].items():
        print(f"{idx}: {cat}")


Mapping for user_feature_1:
1: Female
2: Male
3: Non-binary / third gender
4: Prefer not to say

Mapping for user_feature_2:
1: 1-2 years ago
2: 2-3 years ago
3: 3-4 yeas ago
4: Currently, I don't have a phone
5: Less than 1 year ago
6: More than 4 years ago

Mapping for user_feature_3:
1: Brand reputation
2: Design and appearance
3: Good price
4: Product quality


In [4]:
# Map choice to number (no discount)
# Define the mapping for 'choice'
choice_map = {
    "Do not purchase; save the $30 or use it for other expenses ($30 is roughly enough to cover two meals at a fast-food restaurant in the U.S.)": 0,
    "Color: Black; Style: Solid color; Weight: 2.66 pounds; Price: $24 (MSRP: $24)": 1,
    "Color: Black; Style: Gradient color; Weight: 2.66 pounds; Price: $26 (MSRP: $26)": 2,
    "Color: Dark blue; Style: Solid color; Weight: 2.61 pounds; Price: $25 (MSRP: $25)": 3,
    "Color: Dark blue; Style: Gradient color; Weight: 2.64 pounds; Price: $25 (MSRP: $25)": 4,
    "Color: Light blue; Style: Gradient color; Weight: 2.68 pounds; Price: $26 (MSRP: $26)": 5,
    "Color: White; Style: Solid color; Weight: 2.68 pounds; Price: $27 (MSRP: $27)": 6
}

# Apply the mapping
no['choice_num'] = no['choice'].map(choice_map)

In [5]:
# Map choice to number (all discount)
# Define the mapping for 'choice'
choice_map = {
    "Do not purchase; save the $30 or use it for other expenses ($30 is roughly enough to cover two meals at a fast-food restaurant in the U.S.)": 0,
    "Color: Black; Style: Solid color; Weight: 2.66 pounds; Price: $4.8 (MSRP: $24)": 1,
    "Color: Black; Style: Gradient color; Weight: 2.66 pounds; Price: $5.2 (MSRP: $26)": 2,
    "Color: Dark blue; Style: Solid color; Weight: 2.61 pounds; Price: $5 (MSRP: $25)": 3,
    "Color: Dark blue; Style: Gradient color; Weight: 2.64 pounds; Price: $5 (MSRP: $25)": 4,
    "Color: Light blue; Style: Gradient color; Weight: 2.68 pounds; Price: $5.2 (MSRP: $26)": 5,
    "Color: White; Style: Solid color; Weight: 2.68 pounds; Price: $5.4 (MSRP: $27)": 6
}

# Apply the mapping
all['choice_num'] = all['choice'].map(choice_map)

In [None]:
# Normalization: user features
# Define the columns to scale
columns_to_scale = ['user_feature_1_num', 'user_feature_2_num', 'user_feature_3_num']

# Initialize the scaler
scaler = MinMaxScaler()

# Fit and transform the selected columns
half[columns_to_scale] = scaler.fit_transform(half[columns_to_scale])
all[columns_to_scale] = scaler.fit_transform(all[columns_to_scale])
no[columns_to_scale] = scaler.fit_transform(no[columns_to_scale])

In [7]:
no['if_purchase'] = no['choice_num'].apply(lambda x: 1 if x != 0 else x)
all['if_purchase'] = all['choice_num'].apply(lambda x: 1 if x != 0 else x)

In [8]:
no['if_treated'] = 0
all['if_treated'] = 1

In [9]:
merge = pd.concat([no, all], axis=0)

In [10]:
# Fit the OLS model
model = smf.ols('if_purchase ~ if_treated + user_feature_1_num + user_feature_2_num + user_feature_3_num', data=merge)
results = model.fit()

# Print the regression results
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:            if_purchase   R-squared:                       0.030
Model:                            OLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     4.337
Date:                Mon, 15 Dec 2025   Prob (F-statistic):            0.00183
Time:                        15:12:40   Log-Likelihood:                -397.31
No. Observations:                 569   AIC:                             804.6
Df Residuals:                     564   BIC:                             826.3
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept              0.3494      0

In [11]:
# Calculate relative effect size
# Relative effect size = treatment effect / baseline (control group) purchase rate

# Get the coefficient for if_treated (treatment effect)
treatment_effect = results.params['if_treated']

# Get the intercept (baseline purchase rate for control group)
baseline_rate = results.params['Intercept']

# Calculate relative effect size
relative_effect_size = treatment_effect / baseline_rate

print("=" * 60)
print("Relative Effect Size")
print("=" * 60)
print(f"Baseline purchase rate (control): {baseline_rate:.4f}")
print(f"Treatment effect: {treatment_effect:.4f}")
print(f"Relative effect size: {relative_effect_size:.4f} ({relative_effect_size*100:.2f}%)")
print("=" * 60)
print(f"\nInterpretation: The discount treatment increases purchase rate by {relative_effect_size*100:.2f}% relative to the control group baseline.")


Relative Effect Size
Baseline purchase rate (control): 0.3494
Treatment effect: 0.1517
Relative effect size: 0.4342 (43.42%)

Interpretation: The discount treatment increases purchase rate by 43.42% relative to the control group baseline.
