A data users guide for [inputs/NISPUF17.csv](inputs/NISPUF17.csv) is available at [inputs/NIS-PUF17-DUG.pdf](inputs/NIS-PUF17-DUG.pdf).

In [2]:
import pandas as pd
import scipy.stats as stats

# Mothers' Education Level
Getting the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.

In [19]:
df = pd.read_csv('inputs/NISPUF17_reduced.csv')
# with open('inputs/NISPUF17_reduced.csv', 'w') as f:
#     import csv
#     df = df.rename({'Unnamed: 0': ''}, axis=1)[['', 'EDUC1','CBF_01','P_NUMVRC','SEX','HAD_CPOX']]
#     writer = csv.writer(f)
#     writer.writerow(df.columns)
#     for i in range(df.shape[0]):
#         writer.writerow(df.loc[i])
df

Unnamed: 0.1,Unnamed: 0,EDUC1,CBF_01,P_NUMVRC,SEX,HAD_CPOX
0,1.0,4.0,1.0,,1.0,2.0
1,2.0,3.0,2.0,,1.0,2.0
2,3.0,3.0,2.0,,2.0,2.0
3,4.0,4.0,2.0,1.0,2.0,2.0
4,5.0,1.0,1.0,0.0,2.0,2.0
...,...,...,...,...,...,...
28460,28461.0,3.0,2.0,,2.0,2.0
28461,28462.0,2.0,1.0,,2.0,2.0
28462,28463.0,3.0,1.0,,2.0,2.0
28463,28464.0,2.0,2.0,,2.0,2.0


In [4]:
total = len(df)
edu = df['EDUC1']

{'less than high school': len(df[edu == 1]) / total,
'high school': len(df[edu == 2]) / total,
'more than high school but not college': len(df[edu == 3]) / total,
'college': len(df[edu == 4]) / total}

{'less than high school': 0.10202002459160373,
 'high school': 0.172352011241876,
 'more than high school but not college': 0.24588090637625154,
 'college': 0.47974705779026877}

# Average Influenza Doses
Exploring the relationship between being fed breastmilk as a child and getting a seasonal influenza vaccine from a healthcare provider.

In [5]:
infl1 = df[df['CBF_01'] == 1]['P_NUMFLU'].mean()
infl2 = df[df['CBF_01'] == 2]['P_NUMFLU'].mean()
(infl1, infl2)

(np.float64(1.8799187420058687), np.float64(1.5963945918878317))

# Gender Vaccine Effectiveness
Linking vaccine effectiveness and sex of the child.

In [6]:
vaccinated = df[df['P_NUMVRC'] > 0]
    
sex = lambda x: (x[x['SEX'] == 1], x[x['SEX'] == 2])
males, females = sex(vaccinated)
    
ratio = lambda x: len(x[x['HAD_CPOX'] == 1]) / len(x[x['HAD_CPOX'] == 2])

{'male': ratio(males),
'female': ratio(females)}

{'male': 0.009675583380762664, 'female': 0.0077918259335489565}

# Chickenpox Correlation
Calculating the correlation between the use of the vaccine and whether it results in prevention of the infection or disease.

In [7]:
ldf = df[df["HAD_CPOX"].isin([1,2]) & df["P_NUMVRC"].ge(0)]
    
stats.pearsonr(ldf["HAD_CPOX"], ldf["P_NUMVRC"]).correlation

np.float64(0.07044873460147985)