In [1]:
import pandas as pd

# The U.S. National Epidemiological Survey on Alcohol and Related Conditions (NESARC) is a survey
# designed to determine the magnitude of alcohol use and psychiatric disorders in the U.S. population. 
# It is a representative sample of the non-institutionalized population 18 years and older.
#
# Selected variables for the test (that is, is major depression and alcohol consumption 
# independent or dependent?)
# MAJORDEP12 - MAJOR DEPRESSION IN THE LAST 12 MONTHS (NON-HIERARCHICAL) (
#     0 - No,
#     1 - Yes)
# ALCABDEP12DX - ALCOHOL ABUSE/DEPENDENCE IN THE LAST 12 MONTHS (
#     0 - No alcohol diagnosis,
#     1 - Alcohol abuse only,
#     2 - Alcohol dependence only,
#     3 - Alcohol abuse and dependence)

cols = ['MAJORDEP12', 'ALCABDEP12DX']

# Load the data
df = pd.read_csv('nesarc.csv', usecols=cols, low_memory=False)

In [2]:
# Print first 5 rows of the data set
print(df.head())

   MAJORDEP12  ALCABDEP12DX
0           0             0
1           0             0
2           0             0
3           0             0
4           0             0


In [3]:
# Create a contingency table of observed counts
contab = pd.crosstab(df.MAJORDEP12, df.ALCABDEP12DX)
print(contab)

ALCABDEP12DX      0     1    2    3
MAJORDEP12                         
0             36816  1652  443  697
1              2950   191  110  234


In [4]:
# Calculate column percentages
import numpy as np

colsum = contab.sum(axis=0)
colpct = contab/colsum
print(np.around(colpct, decimals=3))

ALCABDEP12DX      0      1      2      3
MAJORDEP12                              
0             0.926  0.896  0.801  0.749
1             0.074  0.104  0.199  0.251


In [5]:
# Calculate chi-square statistic 
from scipy import stats

chi2, p_val, dof, exp = stats.chi2_contingency(contab)

print 'Chi-square statistic: {0}'.format(round(chi2, 2))
print 'p-value: {0}'.format(round(p_val, 2))
print 'Degrees of freedom: {0}'.format(dof)
print 'Expected frequencies:\n{0}'.format(np.around(exp, decimals=0))

Chi-square statistic: 504.42
p-value: 0.0
Degrees of freedom: 3
Expected frequencies:
[[ 36550.   1694.    508.    856.]
 [  3216.    149.     45.     75.]]


In [6]:
# Conduct post hoc tests for all paired comparisons using the adjusted Bonferroni p-value, 0.08
# Show the post hoc test for group 0 and 1
recode = {0: 0, 1: 1}
df.COMP = df.ALCABDEP12DX.map(recode)

contab = pd.crosstab(df.MAJORDEP12, df.COMP)

colsum = contab.sum(axis=0)
colpct = contab/colsum

chi2, p_val, dof, exp = stats.chi2_contingency(contab)

print 'Chi-square statistic: {0}'.format(round(chi2, 2))
print 'p-value: {0}'.format(round(p_val, 6))
print 'Degrees of freedom: {0}'.format(dof)
print 'Expected frequencies:\n{0}'.format(np.around(exp, decimals=0))

Chi-square statistic: 21.47
p-value: 4e-06
Degrees of freedom: 1
Expected frequencies:
[[ 36764.   1704.]
 [  3002.    139.]]


In [7]:
# Show the post hoc test for group 2 and 3
recode = {2: 2, 3: 3}
df.COMP = df.ALCABDEP12DX.map(recode)

contab = pd.crosstab(df.MAJORDEP12, df.COMP)

colsum = contab.sum(axis=0)
colpct = contab/colsum

chi2, p_val, dof, exp = stats.chi2_contingency(contab)

print 'Chi-square statistic: {0}'.format(round(chi2, 2))
print 'p-value: {0}'.format(round(p_val, 2))
print 'Degrees of freedom: {0}'.format(dof)
print 'Expected frequencies:\n{0}'.format(np.around(exp, decimals=0))

Chi-square statistic: 5.06
p-value: 0.02
Degrees of freedom: 1
Expected frequencies:
[[ 425.  715.]
 [ 128.  216.]]


In [8]:
# Model interpretation for the chi-square test results
#
# The test showed the F-statistic of 504.42 and the p-value of 0 which is less than
# alpha level of 0.05. According to these results we can reject the null hypothesis and say that
# major depression and the amount of alcohol consumed are significantly associated.
#
# Model interpretation for the post hoc chi-square test results
#
# Post hoc test results for all paired comparisons showed significant difference between 5 out of 6
# groups. In comparison, prevalence of major depression was statistically similar among those groups
# with alcohol dependence only and alcohol abuse and dependence.