In [1]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

In [2]:
df = pd.read_csv("data.csv")
#df = df.dropna()

In [3]:
# List of campaign response columns
campaign_cols = ['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5','Response']


In [4]:
# Create a contingency table of country and campaign responses for each campaign column
contingency_tables = {}
for col in campaign_cols:
    contingency_tables[col] = pd.crosstab(df['Country'], df[col])

In [5]:
# Perform the chi-squared test for each campaign column
results = []
for col in campaign_cols:
    table = contingency_tables[col]
    chi2_stat, p_val, dof, expected = chi2_contingency(table)
    results.append({'Campaign': col, 'Chi-squared statistic': chi2_stat, 'p-value': p_val})


In [6]:
# Create dataframes for contingency tables and results
contingency_tables_df = pd.concat(contingency_tables.values(), keys=contingency_tables.keys())
results_df = pd.DataFrame(results)

In [7]:
contingency_tables_df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
Unnamed: 0_level_1,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
AcceptedCmp1,AUS,153,7
AcceptedCmp1,CA,250,18
AcceptedCmp1,GER,113,7
AcceptedCmp1,IND,141,7
AcceptedCmp1,ME,3,0
AcceptedCmp1,SA,317,20
AcceptedCmp1,SP,1017,78
AcceptedCmp1,US,102,7
AcceptedCmp2,AUS,160,0
AcceptedCmp2,CA,262,6


In [8]:
results_df

Unnamed: 0,Campaign,Chi-squared statistic,p-value
0,AcceptedCmp1,3.160705,0.869748
1,AcceptedCmp2,5.613257,0.58556
2,AcceptedCmp3,5.182668,0.637682
3,AcceptedCmp4,7.257259,0.402595
4,AcceptedCmp5,5.764426,0.56751
5,Response,12.917241,0.074151


In [9]:
import statsmodels.api as sm

# Create a binary response variable for campaign success
df['CampaignSuccess'] = df['AcceptedCmp1'].apply(lambda x: 1 if x==1 else 0)

# Select variables to include in the logistic regression model
X = df[['Country', 'Year_Birth', 'Education', 'Marital_Status', 'Kidhome', 'Teenhome',
        'Recency', 'NumDealsPurchases', 'NumWebPurchases', 'NumCatalogPurchases',
        'NumStorePurchases', 'NumWebVisitsMonth']]

# Create dummy variables for categorical variables
X = pd.get_dummies(X, columns=['Country', 'Education', 'Marital_Status'], drop_first=True)

# Fit the logistic regression model
logit_model = sm.Logit(df['CampaignSuccess'], X)
logit_results = logit_model.fit()

# Print the summary of the logistic regression model
print(logit_results.summary())


         Current function value: 0.179068
         Iterations: 35
                           Logit Regression Results                           
Dep. Variable:        CampaignSuccess   No. Observations:                 2240
Model:                          Logit   Df Residuals:                     2213
Method:                           MLE   Df Model:                           26
Date:                Mon, 20 Mar 2023   Pseudo R-squ.:                  0.2495
Time:                        22:15:11   Log-Likelihood:                -401.11
converged:                      False   LL-Null:                       -534.47
Covariance Type:            nonrobust   LLR p-value:                 8.821e-42
                              coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------
Year_Birth                 -0.0011      0.001     -1.382      0.167      -0.003       0.000
Kidhome                   

