## Testing Hypothesis H8: Personalization Impact on Loyalty

To test whether higher perceived personalization leads to more consistent platform usage and higher behavior change intention, follow this structured approach:

In [10]:
import pandas as pd

# Load dataset
df = pd.read_csv('../../data/cleaned/cleaned_survey_data.csv')

# Get all platform-related columns
platform_cols = [col for col in df.columns if any(
    x in col for x in [
        "general_e_commerce_platforms_",
        "specialty_online_stores___automobile_",
        "online_pharmacies_",
        "fashion_and_beauty_retailers_",
        "grocery_delivery_services_"
    ]
)]

# For each user, count platforms used (assuming binary encoding)
df['platforms_used'] = df[platform_cols].sum(axis=1)

# Compute HHI (if columns are binary)
def calculate_hhi(row):
    platforms = row[platform_cols]
    total = platforms.sum()
    if total == 0:
        return 0
    shares = platforms / total
    return (shares ** 2).sum()

df['platform_concentration_hhi'] = df.apply(calculate_hhi, axis=1)

# Summary stats
print(df[['platforms_used', 'platform_concentration_hhi']].describe())
print(df[['platforms_used', 'platform_concentration_hhi']])

       platforms_used  platform_concentration_hhi
count           825.0                       825.0
mean              0.0                         0.0
std               0.0                         0.0
min               0.0                         0.0
25%               0.0                         0.0
50%               0.0                         0.0
75%               0.0                         0.0
max               0.0                         0.0
     platforms_used  platform_concentration_hhi
0               0.0                           0
1               0.0                           0
2               0.0                           0
3               0.0                           0
4               0.0                           0
..              ...                         ...
820             0.0                           0
821             0.0                           0
822             0.0                           0
823             0.0                           0
824             0.0   

#### Herfindahl-Hirschman Index (HHI) – Measures market concentration (higher = more concentrated).

For each respondent, compute:

H
H
I
=
∑
i
=
1
n
(
s
i
)
2
HHI= 
i=1
∑
n
​
 (s 
i
​
 ) 
2
 
where 
s
i
s 
i
​
  = share of platform 
i
i in the respondent's total usage.


In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt


# Compute personalization score (average of two items)
df['personalization_score'] = df[['pu_personalization_1', 'pu_personalization_2']].mean(axis=1)

# Define dependent variables
df['usage_consistency'] = df['platform_concentration_hhi']  # Likert scale (1-5/1-7)?
df['behavior_change'] = df['opi_behavior_change']  # Likert scale (1-5/1-7)?

In [13]:
df[['personalization_score', 'platform_concentration_hhi', 'opi_behavior_change']].corr(method='pearson')

Unnamed: 0,personalization_score,platform_concentration_hhi,opi_behavior_change
personalization_score,1.0,,0.147108
platform_concentration_hhi,,,
opi_behavior_change,0.147108,,1.0


#### Regression Analysis


#### Direct Effect (Personalization → Behavior Change)

Model:

opi_behavior_change=β 
0
​
 +β 
1
​
 ⋅personalization_score+ϵ

In [14]:
import statsmodels.api as sm
X = sm.add_constant(df['personalization_score'])
model = sm.OLS(df['opi_behavior_change'], X).fit()
print(model.summary())

                             OLS Regression Results                            
Dep. Variable:     opi_behavior_change   R-squared:                       0.022
Model:                             OLS   Adj. R-squared:                  0.020
Method:                  Least Squares   F-statistic:                     18.20
Date:                 Wed, 14 May 2025   Prob (F-statistic):           2.21e-05
Time:                         11:36:47   Log-Likelihood:                -882.55
No. Observations:                  825   AIC:                             1769.
Df Residuals:                      823   BIC:                             1779.
Df Model:                            1                                         
Covariance Type:             nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const               

#### Mediation Analysis (Personalization → Platform Concentration → Behavior Change)

##### Path A: Personalization → Platform Concentration

In [15]:
sm.OLS(df['platform_concentration_hhi'], sm.add_constant(df['personalization_score'])).fit().summary()

  return 1 - self.ssr/self.centered_tss
  return self.mse_model/self.mse_resid
  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2
  dw = np.sum(diff_resids**2, axis=axis) / np.sum(resids**2, axis=axis)


0,1,2,3
Dep. Variable:,platform_concentration_hhi,R-squared:,
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,
Date:,"Wed, 14 May 2025",Prob (F-statistic):,
Time:,11:37:39,Log-Likelihood:,inf
No. Observations:,825,AIC:,-inf
Df Residuals:,823,BIC:,-inf
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0,0,,,0,0
personalization_score,0,0,,,0,0

0,1,2,3
Omnibus:,,Durbin-Watson:,
Prob(Omnibus):,,Jarque-Bera (JB):,
Skew:,,Prob(JB):,
Kurtosis:,,Cond. No.,18.3


##### Path B: Platform Concentration → Behavior Change (controlling for personalization)

In [16]:
sm.OLS(df['opi_behavior_change'], sm.add_constant(df[['personalization_score', 'platform_concentration_hhi']])).fit().summary()

  return np.sqrt(eigvals[0]/eigvals[-1])


0,1,2,3
Dep. Variable:,opi_behavior_change,R-squared:,0.022
Model:,OLS,Adj. R-squared:,0.02
Method:,Least Squares,F-statistic:,18.2
Date:,"Wed, 14 May 2025",Prob (F-statistic):,2.21e-05
Time:,11:38:34,Log-Likelihood:,-882.55
No. Observations:,825,AIC:,1769.0
Df Residuals:,823,BIC:,1779.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.3760,0.126,26.747,0.000,3.128,3.624
personalization_score,0.1649,0.039,4.267,0.000,0.089,0.241
platform_concentration_hhi,0,0,,,0,0

0,1,2,3
Omnibus:,106.112,Durbin-Watson:,1.778
Prob(Omnibus):,0.0,Jarque-Bera (JB):,198.953
Skew:,-0.787,Prob(JB):,6.28e-44
Kurtosis:,4.819,Cond. No.,inf


#### Moderation Analysis

##### Test if platform concentration moderates the personalization-behavior relationship:

Behavior Change=β 
0
​
 +β 
1
​
 ⋅Personalization+β 
2
​
 ⋅Concentration+β 
3
​
 ⋅(Personalization×Concentration)+ϵ‘‘

In [17]:
df['interaction'] = df['personalization_score'] * df['platform_concentration_hhi']
sm.OLS(df['opi_behavior_change'], sm.add_constant(df[['personalization_score', 'platform_concentration_hhi', 'interaction']])).fit().summary()

  return np.sqrt(eigvals[0]/eigvals[-1])


0,1,2,3
Dep. Variable:,opi_behavior_change,R-squared:,0.022
Model:,OLS,Adj. R-squared:,0.02
Method:,Least Squares,F-statistic:,18.2
Date:,"Wed, 14 May 2025",Prob (F-statistic):,2.21e-05
Time:,11:39:13,Log-Likelihood:,-882.55
No. Observations:,825,AIC:,1769.0
Df Residuals:,823,BIC:,1779.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.3760,0.126,26.747,0.000,3.128,3.624
personalization_score,0.1649,0.039,4.267,0.000,0.089,0.241
platform_concentration_hhi,0,0,,,0,0
interaction,0,0,,,0,0

0,1,2,3
Omnibus:,106.112,Durbin-Watson:,1.778
Prob(Omnibus):,0.0,Jarque-Bera (JB):,198.953
Skew:,-0.787,Prob(JB):,6.28e-44
Kurtosis:,4.819,Cond. No.,inf


##### Used for table generation

In [18]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import pearsonr

In [19]:
def test_correlation(df, var1, var2):
    """Calculate Pearson correlation and p-value."""
    corr, p_value = pearsonr(df[var1], df[var2])
    return {
        'Test': 'Correlation',
        'Variables': f"{var1} vs {var2}",
        'Coefficient': corr,
        'p-value': p_value,
        'Interpretation': 'Positive correlation' if corr > 0 else 'Negative correlation',
        'Significance': 'p < 0.05' if p_value < 0.05 else 'Not significant'
    }

def test_regression(df, y, X_vars):
    """Run OLS regression and extract key metrics."""
    X = sm.add_constant(df[X_vars])
    model = sm.OLS(df[y], X).fit()
    return {
        'Test': 'Regression',
        'Variables': f"{y} ~ {' + '.join(X_vars)}",
        'Coefficient': model.params[1],  # Slope of first predictor
        'p-value': model.pvalues[1],
        'R-squared': model.rsquared,
        'Interpretation': f"1-unit increase in {X_vars[0]} predicts {model.params[1]:.2f} change in {y}",
        'Significance': 'p < 0.05' if model.pvalues[1] < 0.05 else 'Not significant'
    }

In [20]:
# Initialize results list
results = []

# Test 1: Correlation (Personalization vs Behavior Change)
results.append(test_correlation(df, 'personalization_score', 'opi_behavior_change'))

# Test 2: Regression (Personalization → Behavior Change)
results.append(test_regression(df, 'opi_behavior_change', ['personalization_score']))

# Test 3: Regression with Interaction (Moderation by Platform Concentration)
df['interaction'] = df['personalization_score'] * df['platform_concentration_hhi']
results.append(test_regression(df, 'opi_behavior_change', 
                             ['personalization_score', 'platform_concentration_hhi', 'interaction']))

  'Coefficient': model.params[1],  # Slope of first predictor
  'p-value': model.pvalues[1],
  'Interpretation': f"1-unit increase in {X_vars[0]} predicts {model.params[1]:.2f} change in {y}",
  'Significance': 'p < 0.05' if model.pvalues[1] < 0.05 else 'Not significant'
  'Coefficient': model.params[1],  # Slope of first predictor
  'p-value': model.pvalues[1],
  'Interpretation': f"1-unit increase in {X_vars[0]} predicts {model.params[1]:.2f} change in {y}",
  'Significance': 'p < 0.05' if model.pvalues[1] < 0.05 else 'Not significant'


In [21]:
results_df = pd.DataFrame(results)
results_df = results_df[[
    'Test', 'Variables', 'Coefficient', 'p-value', 'R-squared', 'Interpretation', 'Significance'
]]

In [27]:


styled_df = results_df.style.set_table_styles([
    {'selector': 'th', 'props': [('font-size', '10pt'), ('max-width', '200px'), ('background-color', 'white'), ('color', '#222')]},
    {'selector': 'td', 'props': [('font-size', '10pt'), ('max-width', '220px'), ('white-space', 'normal'), ('background-color', 'white'), ('color', '#222')]}
])
styled_df

Unnamed: 0,Test,Variables,Coefficient,p-value,R-squared,Interpretation,Significance
0,Correlation,personalization_score vs opi_behavior_change,0.147,0.0,,Positive correlation,p < 0.05
1,Regression,opi_behavior_change ~ personalization_score,0.165,0.0,0.022,1-unit increase in personalization_score predicts 0.16 change in opi_behavior_change,p < 0.05
2,Regression,opi_behavior_change ~ personalization_score + platform_concentration_hhi + interaction,0.165,0.0,0.022,1-unit increase in personalization_score predicts 0.16 change in opi_behavior_change,p < 0.05
