In [2]:
import pandas as pd
from scipy.stats import shapiro


df=pd.read_csv('Political Interest.csv')

df.head()

Unnamed: 0,gender,education_level,political_interest
0,1,1,38.0
1,1,1,39.0
2,1,1,35.0
3,1,1,38.0
4,1,1,41.0


# Normality test

In [8]:
from IPython.display import display


normality ={}
for i in df['gender'].unique():
    for j in df['education_level'].unique():
        group_normal=df[(df['gender'] == i) & (df['education_level'] == j)]['political_interest']
        stat, p_value = shapiro(group_normal)
        normality[(int(i), int(j))] = {'Statistic': float(stat), 'p-value': float(p_value)}

display(normality)



{(1, 1): {'Statistic': 0.9813390134795488, 'p-value': 0.9708070387442351},
 (1, 2): {'Statistic': 0.9565019530188729, 'p-value': 0.7610940646763964},
 (1, 3): {'Statistic': 0.9153413250787927, 'p-value': 0.31973071050675683},
 (2, 1): {'Statistic': 0.9629531035675938, 'p-value': 0.8189494017694237},
 (2, 2): {'Statistic': 0.9629531035675938, 'p-value': 0.8189494017694237},
 (2, 3): {'Statistic': 0.9499896853336705, 'p-value': 0.6683785084587048}}

# Levene's test

In [15]:
from scipy.stats import levene


group_levene = [df[(df['gender'] == i) & (df['education_level'] == j)]['political_interest']
          for i in df['gender'].unique() for j in df['education_level'].unique()]

levene_stat, levene_p = levene(*group_levene)
float(levene_stat), float(levene_p)
print(f"Statistic: {levene_stat} p-value: {levene_p}")

Statistic: 2.20536094868572 p-value: 0.06764955900365917


# Two-way ANOVA

In [18]:
import statsmodels.api as sm 
from statsmodels.formula.api import ols 
model = ols('political_interest ~ C(gender) * C(education_level)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(gender),10.704737,1.0,0.744533,0.3921748
C(education_level),5409.958966,2.0,188.136131,1.553704e-24
C(gender):C(education_level),210.337661,2.0,7.314679,0.001587744
Residual,747.644444,52.0,,


# Post Hoc 

In [6]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

tukey_education = pairwise_tukeyhsd(df['political_interest'], df['education_level'])

print(tukey_education.summary())



Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
     1      2   5.2368 0.0009  1.9571  8.5166   True
     1      3  22.4711    0.0 19.2326 25.7095   True
     2      3  17.2342    0.0 13.9957 20.4727   True
----------------------------------------------------


In [7]:
df['gender_education'] = df['gender'].astype(str) + "_" + df['education_level'].astype(str)

tukey_interaction = pairwise_tukeyhsd(df['political_interest'], df['gender_education'])

print(tukey_interaction.summary())


Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   1_1    1_2      5.5 0.0371  0.2116 10.7884   True
   1_1    1_3  26.6556    0.0  21.501 31.8101   True
   1_1    2_1   2.1556 0.8165  -2.999  7.3101  False
   1_1    2_2   7.1556 0.0019   2.001 12.3101   True
   1_1    2_3  20.5556    0.0  15.401 25.7101   True
   1_2    1_3  21.1556    0.0  16.001 26.3101   True
   1_2    2_1  -3.3444 0.4021  -8.499  1.8101  False
   1_2    2_2   1.6556 0.9312  -3.499  6.8101  False
   1_2    2_3  15.0556    0.0   9.901 20.2101   True
   1_3    2_1    -24.5    0.0 -29.517 -19.483   True
   1_3    2_2    -19.5    0.0 -24.517 -14.483   True
   1_3    2_3     -6.1 0.0089 -11.117  -1.083   True
   2_1    2_2      5.0 0.0513  -0.017  10.017  False
   2_1    2_3     18.4    0.0  13.383  23.417   True
   2_2    2_3     13.4    0.0   8.383  18.417   True
----------------------------------------------