In [1]:
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.api as sms

In [2]:
url = 'nhanes_2015_2016.csv'
da = pd.read_csv(url)

In [4]:
da["SMQ020x"] = da.SMQ020.replace({1: "Yes", 2: "No", 7: np.nan, 9: np.nan})

In [5]:
da["RIAGENDRx"] = da.RIAGENDR.replace({1: "Male", 2: "Female"})

In [6]:
da.RIAGENDRx

0         Male
1         Male
2         Male
3       Female
4       Female
         ...  
5730    Female
5731      Male
5732    Female
5733      Male
5734    Female
Name: RIAGENDRx, Length: 5735, dtype: object

In [9]:
dx = da[["SMQ020x", "RIAGENDRx"]].dropna()

In [12]:
pd.crosstab(dx.SMQ020x, dx.RIAGENDRx)

RIAGENDRx,Female,Male
SMQ020x,Unnamed: 1_level_1,Unnamed: 2_level_1
No,2066,1340
Yes,906,1413


In [15]:
dx['SMQ020x'] = dx.SMQ020x.replace({'Yes': 1, 'No': 0})

dz = dx.groupby("RIAGENDRx").agg({"SMQ020x": [np.mean, np.size ]})
dz.columns = ['Proportion', 'Total n']

In [16]:
dz

Unnamed: 0_level_0,Proportion,Total n
RIAGENDRx,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,0.304845,2972
Male,0.513258,2753


### Difference of Two Population Proportions

In [17]:
p = .304845
n = 2972
se_female = np.sqrt(p * (1-p)/n)
se_female

0.00844415041930423

In [18]:
p = .513258
n = 2753
se_male = np.sqrt(p*(1-p)/n)
se_male

0.009526078787008965

In [19]:
se_diff = np.sqrt(se_female**2 + se_male**2)
se_diff

0.012729880335656654

In [20]:
d = .304845 - .513258
lcb = d - 1.96 * se_diff
ucb = d + 1.96 * se_diff 
(lcb, ucb)

(-0.23336356545788706, -0.18346243454211297)

In [21]:
da['BMXBMI'].head()

0    27.8
1    30.8
2    28.8
3    42.4
4    20.3
Name: BMXBMI, dtype: float64

In [22]:
da.groupby('RIAGENDRx').agg({"BMXBMI": [np.mean, np.std, np.size]})

Unnamed: 0_level_0,BMXBMI,BMXBMI,BMXBMI
Unnamed: 0_level_1,mean,std,size
RIAGENDRx,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Female,29.939946,7.753319,2976
Male,28.778072,6.252568,2759


In [23]:
sem_female = 7.753319 / np.sqrt(2976)
sem_male = 6.252568 / np.sqrt(2759)
(sem_female, sem_male)

(0.14212523289878048, 0.11903716451870151)

In [24]:
sem_diff = np.sqrt(sem_female**2 + sem_male**2)
sem_diff

0.18538993598139303

In [25]:
d = 29.939946 - 28.778072

In [26]:
lcb = d - 1.96 * sem_diff 
ucb = d + 1.96 * sem_diff 
(lcb, ucb)

(0.798509725476467, 1.5252382745235278)

In [27]:
da

Unnamed: 0,SEQN,ALQ101,ALQ110,ALQ130,SMQ020,RIAGENDR,RIDAGEYR,RIDRETH1,DMDCITZN,DMDEDUC2,...,BMXWT,BMXHT,BMXBMI,BMXLEG,BMXARML,BMXARMC,BMXWAIST,HIQ210,SMQ020x,RIAGENDRx
0,83732,1.0,,1.0,1,1,62,3,1.0,5.0,...,94.8,184.5,27.8,43.3,43.6,35.9,101.1,2.0,Yes,Male
1,83733,1.0,,6.0,1,1,53,3,2.0,3.0,...,90.4,171.4,30.8,38.0,40.0,33.2,107.9,,Yes,Male
2,83734,1.0,,,1,1,78,3,1.0,3.0,...,83.4,170.1,28.8,35.6,37.0,31.0,116.5,2.0,Yes,Male
3,83735,2.0,1.0,1.0,2,2,56,3,1.0,5.0,...,109.8,160.9,42.4,38.5,37.7,38.3,110.1,2.0,No,Female
4,83736,2.0,1.0,1.0,2,2,42,4,1.0,4.0,...,55.2,164.9,20.3,37.4,36.0,27.2,80.4,2.0,No,Female
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5730,93695,2.0,2.0,,1,2,76,3,1.0,3.0,...,59.1,165.8,21.5,38.2,37.0,29.5,95.0,2.0,Yes,Female
5731,93696,2.0,2.0,,2,1,26,3,1.0,5.0,...,112.1,182.2,33.8,43.4,41.8,42.3,110.2,2.0,No,Male
5732,93697,1.0,,1.0,1,2,80,3,1.0,4.0,...,71.7,152.2,31.0,31.3,37.5,28.8,,2.0,Yes,Female
5733,93700,,,,1,1,35,3,2.0,1.0,...,78.2,173.3,26.0,40.3,37.5,30.6,98.9,2.0,Yes,Male


In [28]:
da.columns

Index(['SEQN', 'ALQ101', 'ALQ110', 'ALQ130', 'SMQ020', 'RIAGENDR', 'RIDAGEYR',
       'RIDRETH1', 'DMDCITZN', 'DMDEDUC2', 'DMDMARTL', 'DMDHHSIZ', 'WTINT2YR',
       'SDMVPSU', 'SDMVSTRA', 'INDFMPIR', 'BPXSY1', 'BPXDI1', 'BPXSY2',
       'BPXDI2', 'BMXWT', 'BMXHT', 'BMXBMI', 'BMXLEG', 'BMXARML', 'BMXARMC',
       'BMXWAIST', 'HIQ210', 'SMQ020x', 'RIAGENDRx'],
      dtype='object')

In [29]:
nap = pd.read_csv("nap_no_nap.csv")

In [30]:
nap.head()

Unnamed: 0,id,sex,age (months),dlmo time,days napped,napping,nap lights outl time,nap sleep onset,nap midsleep,nap sleep offset,...,sleep onset latency,night midsleep time,night wake time,night sleep duration,night time in bed,24 h sleep duration,bedtime phase difference,sleep onset phase difference,midsleep phase difference,wake time phase difference
0,1,female,33.7,19.24,0,0,,,,,...,0.23,1.92,7.17,629.4,643.0,629.4,-1.21,-1.44,6.68,11.93
1,2,female,31.5,18.27,0,0,,,,,...,0.25,1.09,6.69,672.4,700.4,672.4,-0.96,-1.21,6.82,12.42
2,3,male,31.9,19.14,0,0,,,,,...,0.45,1.29,6.53,628.8,682.6,628.8,-0.46,-0.91,6.15,11.39
3,4,female,31.6,19.69,0,0,,,,,...,0.05,1.89,8.28,766.6,784.0,766.6,0.23,0.19,6.2,12.59
4,5,female,33.0,19.52,0,0,,,,,...,0.45,1.3,6.95,678.0,718.0,678.0,0.31,-0.13,5.78,11.43


In [35]:
nap.describe()

Unnamed: 0,id,age (months),dlmo time,days napped,napping,nap lights outl time,nap sleep onset,nap midsleep,nap sleep offset,nap wake time,...,sleep onset latency,night midsleep time,night wake time,night sleep duration,night time in bed,24 h sleep duration,bedtime phase difference,sleep onset phase difference,midsleep phase difference,wake time phase difference
count,20.0,20.0,20.0,20.0,20.0,15.0,15.0,15.0,15.0,15.0,...,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,10.5,34.185,19.6375,2.7,0.75,13.508,13.862667,14.716667,15.572,15.648,...,0.483,1.8045,7.0005,623.464,669.0865,700.409,-0.4835,-0.9735,6.1695,11.363
std,5.91608,2.029331,0.703419,1.894591,0.444262,0.763556,0.745735,0.773154,0.831773,0.660186,...,0.275052,0.502944,0.543889,54.908707,46.340104,45.592934,0.578994,0.588408,0.463641,0.71446
min,1.0,30.0,18.22,0.0,0.0,12.47,12.56,13.3,14.05,14.25,...,0.05,1.09,6.01,526.8,582.4,628.8,-1.73,-2.03,5.07,9.85
25%,5.75,32.975,19.27,0.75,0.75,12.9,13.425,14.305,15.09,15.265,...,0.28,1.33,6.65,599.9625,651.1,662.2375,-0.8825,-1.2675,5.92,11.06
50%,10.5,33.75,19.685,3.0,1.0,13.18,13.87,14.49,15.46,15.78,...,0.45,1.905,7.06,618.6,662.1,706.065,-0.425,-0.995,6.155,11.41
75%,15.25,36.225,19.995,4.0,1.0,13.995,14.45,15.36,16.05,16.15,...,0.645,2.1375,7.2175,651.15,696.2,724.1,-0.04,-0.8275,6.3475,11.945
max,20.0,37.1,21.06,5.0,1.0,14.75,15.03,15.92,17.01,16.6,...,1.19,2.92,8.28,766.6,784.0,782.3,0.33,0.19,7.05,12.59


In [52]:
nap.groupby("napping").agg({"night bedtime":[np.mean, np.std]})

Unnamed: 0_level_0,night bedtime,night bedtime
Unnamed: 0_level_1,mean,std
napping,Unnamed: 1_level_2,Unnamed: 2_level_2
0,19.59,0.507592
1,20.304,0.591062


In [50]:
nap[nap.napping == 1]["night bedtime"].mean()
nap[nap.napping == 0]["night bedtime"].mean()

19.590000000000003

In [49]:
ucb = 20.304 + se * 2.144
lcb = 20.304 - se * 2.144
(lcb, ucb)

(19.976800832143795, 20.631199167856202)

In [53]:
se2 = 0.507592 / np.sqrt(5)

In [54]:
ucb = 19.590 + se2 * 2.776
lcb = 19.590 - se2 * 2.776
(lcb, ucb)

(18.959842327613167, 20.220157672386833)