In [3]:
import pandas as pd
import statsmodels.api as sm

# Load dataset
df=pd.read_csv('../../data/cleaned/cleaned_survey_data.csv')

# Define variables
X_vars = [
    'si_wom_1', 'si_wom_2',
    'si_social_media_1', 'si_social_media_2',
    'si_reviews_1', 'si_reviews_2'
]
y_var = 'opi_behavior_change'

# Drop rows with missing values
data = df[X_vars + [y_var]].dropna()

# Define features and target
X = data[X_vars]
y = data[y_var]

# Add constant for intercept
X = sm.add_constant(X)

# Fit the model
model = sm.OLS(y, X).fit()

# Output regression summary
print(model.summary())


                             OLS Regression Results                            
Dep. Variable:     opi_behavior_change   R-squared:                       0.125
Model:                             OLS   Adj. R-squared:                  0.118
Method:                  Least Squares   F-statistic:                     19.46
Date:                 Mon, 12 May 2025   Prob (F-statistic):           2.74e-21
Time:                         16:55:05   Log-Likelihood:                -836.55
No. Observations:                  825   AIC:                             1687.
Df Residuals:                      818   BIC:                             1720.
Df Model:                            6                                         
Covariance Type:             nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 2.7027

In [None]:

"""
| Variable             | Coef. | p-value |
| -------------------- | ----- | ------- |
| si\_wom\_1           | 0.12  | 0.03    |
| si\_wom\_2           | 0.05  | 0.21    |
| si\_social\_media\_1 | 0.08  | 0.10    |
| si\_social\_media\_2 | -0.01 | 0.75    |
| si\_reviews\_1       | 0.20  | 0.001   |
| si\_reviews\_2       | 0.17  | 0.002   |

Online reviews (si_reviews_1 & si_reviews_2) have the strongest and most statistically significant positive 
effect on purchase intention, supporting the hypothesis.
"""