In [None]:
import pandas as pd
import statsmodels.api as sm

df=pd.read_csv('../../data/cleaned/cleaned_survey_data.csv')

# Define independent and dependent variables
X_vars = ['sa_privacy_1', 'sa_privacy_2', 'sa_payment_1', 'sa_payment_2']
y_var = 'opi_behavior_change'

# Define product category filters (respondents who used at least one platform from each category)
def category_filter(prefixes):
    cols = [col for col in df.columns if any(col.startswith(prefix) for prefix in prefixes)]
    return df[cols].sum(axis=1) > 0

categories = {
    "general_merchandise": ["gecp_"],
    "grocery": ["gds_"],
    "pharmacy": ["op_"],
    "fashion": ["fabr_"]
}

results = {}

for category, prefixes in categories.items():
    cat_df = df[category_filter(prefixes)].dropna(subset=X_vars + [y_var])
    X = cat_df[X_vars]
    y = cat_df[y_var]
    X = sm.add_constant(X)  
    model = sm.OLS(y, X).fit()
    results[category] = model
    print(f"\n===== {category.upper()} =====")
    print(model.summary())



===== GENERAL_MERCHANDISE =====
                             OLS Regression Results                            
Dep. Variable:     opi_behavior_change   R-squared:                       0.084
Model:                             OLS   Adj. R-squared:                  0.080
Method:                  Least Squares   F-statistic:                     18.81
Date:                 Mon, 12 May 2025   Prob (F-statistic):           8.35e-15
Time:                         16:46:40   Log-Likelihood:                -853.99
No. Observations:                  824   AIC:                             1718.
Df Residuals:                      819   BIC:                             1742.
Df Model:                            4                                         
Covariance Type:             nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const

In [None]:
"""
Based on multiple regression analysis across product categories, 
we conclude that structural assurance—specifically trust in secure 
payment—has a positive and significant impact on online purchase intention. 
However, this impact is consistently observed across all categories 
(general merchandise, grocery, pharmacy, and fashion), indicating no substantial 
difference in how structural assurance influences consumer behavior by category. 
Therefore, the hypothesis is only partially supported.



The regression results support the hypothesis partially. Structural assurance does impact purchase intention, but:

The influence is consistent across product categories, not significantly different. This is seen in the nearly identical coefficients and R² across all categories.

Only one variable—sa_payment_1—has a meaningful and statistically significant impact on consumer behavior.

Variables related to privacy protection (sa_privacy_1, sa_privacy_2) showed no significant effect.
"""