In [21]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

In [22]:
df = pd.read_csv("data.csv")
df = df.dropna()

In [23]:
# List of campaign response columns
campaign_cols = ['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5','Response']


In [25]:
# Create a contingency table of country and campaign responses for each campaign column
contingency_tables = {}
for col in campaign_cols:
    contingency_tables[col] = pd.crosstab(df['Country'], df[col])

In [26]:
# Perform the chi-squared test for each campaign column
results = []
for col in campaign_cols:
    table = contingency_tables[col]
    chi2_stat, p_val, dof, expected = chi2_contingency(table)
    results.append({'Campaign': col, 'Chi-squared statistic': chi2_stat, 'p-value': p_val})


In [27]:
# Create dataframes for contingency tables and results
contingency_tables_df = pd.concat(contingency_tables.values(), keys=contingency_tables.keys())
results_df = pd.DataFrame(results)

In [28]:
contingency_tables_df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
Unnamed: 0_level_1,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
AcceptedCmp1,AUS,140,7
AcceptedCmp1,CA,248,18
AcceptedCmp1,GER,109,7
AcceptedCmp1,IND,140,7
AcceptedCmp1,ME,3,0
AcceptedCmp1,SA,317,20
AcceptedCmp1,SP,1017,76
AcceptedCmp1,US,100,7
AcceptedCmp2,AUS,147,0
AcceptedCmp2,CA,260,6


In [29]:
results_df

Unnamed: 0,Campaign,Chi-squared statistic,p-value
0,AcceptedCmp1,2.288906,0.942137
1,AcceptedCmp2,5.43559,0.606964
2,AcceptedCmp3,4.902441,0.671868
3,AcceptedCmp4,6.491628,0.483654
4,AcceptedCmp5,5.614269,0.585439
5,Response,12.521193,0.08467


In [42]:
import statsmodels.api as sm

# Create a binary response variable for campaign success
df['CampaignSuccess'] = df['AcceptedCmp1'].apply(lambda x: 1 if x==1 else 0)

# Select variables to include in the logistic regression model
X = df[['Country', 'Year_Birth', 'Education', 'Marital_Status', 'Kidhome', 'Teenhome',
        'Recency', 'NumDealsPurchases', 'NumWebPurchases', 'NumCatalogPurchases',
        'NumStorePurchases', 'NumWebVisitsMonth']]

# Create dummy variables for categorical variables
X = pd.get_dummies(X, columns=['Country', 'Education', 'Marital_Status'], drop_first=True)

# Fit the logistic regression model
logit_model = sm.Logit(df['CampaignSuccess'], X)
logit_results = logit_model.fit()

# Print the summary of the logistic regression model
print(logit_results.summary())


         Current function value: 0.177141
         Iterations: 35
                           Logit Regression Results                           
Dep. Variable:        CampaignSuccess   No. Observations:                 2216
Model:                          Logit   Df Residuals:                     2189
Method:                           MLE   Df Model:                           26
Date:                Fri, 17 Mar 2023   Pseudo R-squ.:                  0.2559
Time:                        20:59:04   Log-Likelihood:                -392.55
converged:                      False   LL-Null:                       -527.51
Covariance Type:            nonrobust   LLR p-value:                 2.025e-42
                              coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------
Year_Birth                 -0.0011      0.001     -1.295      0.195      -0.003       0.001
Kidhome                   

