In [11]:
import pandas as pd
from scipy.stats import chi2_contingency

df = pd.read_spss("dataset.sav")  

demographic_vars = ['gender', 'sexual_id', 'race_NIH', 'ethnicity' ]

alpha = 0.05

for var in demographic_vars:
    print(f"Testing relationship between {var} and experiencing sexual violence:\n")
    
    # Creating a contingency table
    contingency_table = pd.crosstab(df[var], df['SESV63_any'])
    
    # Perform the chi-square test
    chi2, p, dof, expected = chi2_contingency(contingency_table)
    
    print(f"Chi-square statistic: {chi2}")
    print(f"P-value: {p}")
    print(f"Degrees of freedom: {dof}\n")
    print("Expected frequencies if no relationship:")
    print(expected)
    print("\n" + "="*50 + "\n")
    
    if p < alpha:
        print("\nReject the null hypothesis - There is a significant association between the variables.")
    else:
        print("\nFail to reject the null hypothesis - There is no significant association between the variables.")
    
    print("\n" + "="*50 + "\n")

Testing relationship between gender and experiencing sexual violence:

Chi-square statistic: 818.3527313853451
P-value: 5.513766551360789e-07
Degrees of freedom: 630

Expected frequencies if no relationship:
[[5.16425121e+00 1.30434783e-01 1.93236715e-01 2.70531401e-01
  3.09178744e-01 1.49758454e-01 2.89855072e-01 1.44927536e-01
  1.59420290e-01 8.69565217e-02 1.44927536e-01 9.66183575e-02
  1.93236715e-01 7.72946860e-02 1.30434783e-01 2.36714976e-01
  7.24637681e-02 7.72946860e-02 6.28019324e-02 7.24637681e-02
  8.21256039e-02 9.66183575e-02 3.38164251e-02 3.86473430e-02
  8.69565217e-02 3.38164251e-02 3.86473430e-02 8.21256039e-02
  2.89855072e-02 2.41545894e-02 5.79710145e-02 2.89855072e-02
  2.41545894e-02 7.24637681e-02 5.31400966e-02 1.44927536e-02
  9.17874396e-02 9.66183575e-03 2.89855072e-02 4.34782609e-02
  2.89855072e-02 9.66183575e-03 1.01449275e-01 3.86473430e-02
  1.44927536e-02 3.86473430e-02 2.89855072e-02 1.44927536e-02
  3.86473430e-02 9.66183575e-03 9.66183575e-03 1