# NHANES Data Exploration

CC-1089

Using data from the CDC/NHANES 2017-March 2020 Examination data:
https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=Examination&Cycle=2017-2020

Attempts at visualizing health data from NHANES.

The first data being looked at is anthropometric, or body measurements. These are physical measurements of lengths, circumferences, other anatomical aspects of the human body.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Body Measure Data

In [None]:
bmdata = pd.read_sas('data/P_BMX.XPT')
bmnames = {
    'SEQN' : 'Respondent sequence number',
    'BMDSTATS' : 'Body Measures Component Status Code',
    'BMXWT' : 'Weight (kg)',
    'BMIWT' : 'Weight Comment',
    'BMXRECUM' : 'Recumbent Length (cm)',
    'BMIRECUM' : 'Recumbent Length Comment',
    'BMXHEAD' : 'Head Circumference (cm)',
    'BMIHEAD' : 'Head Circumference Comment',
    'BMXHT' : 'Standing Height (cm)',
    'BMIHT' : 'Standing Height Comment',
    'BMXBMI' : r'Body Mass Index ($kg/m^2$)',
    'BMDBMIC' : 'BMI Category - Children/Youth',
    'BMXLEG' : 'Upper Leg Length (cm)',
    'BMILEG' : 'Upper Leg Length Comment',
    'BMXARML' : 'Upper Arm Length (cm)',
    'BMIARML' : 'Upper Arm Length Comment',
    'BMXARMC' : 'Arm Circumference (cm)',
    'BMIARMC' : 'Arm Circumference Comment',
    'BMXWAIST' : 'Waist Circumference (cm)',
    'BMIWAIST' : 'Waist Circumference Comment',
    'BMXHIP' : 'Hip Circumference (cm)',
    'BMIHIP' : 'Hip Circumference Comment'}
bmdata.rename(columns=bmnames, inplace=True)
bmdata

In [None]:
# Count missing values for each feature (out of 14300 observations)
bmdata.isnull().sum(axis=0)

In [None]:
# Filter dataset to only include features missing less than 20% of values
bmdata2 = bmdata.dropna(axis=1, thresh=int(0.2*len(bmdata)))
bmdata2 = bmdata2.drop(['Respondent sequence number', 'Body Measures Component Status Code'], axis=1)
bmdata2

In [None]:
# Run correlations between all features
bmdata2.corr()

In [None]:
# Create heatmap
bmcorr = bmdata2.corr()
sns.set(rc={'figure.figsize':(8,8)})
ax = sns.heatmap(
    bmcorr, 
    vmin=-1, vmax=1,
    cmap='Blues',
    square=True,
    annot=True
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
)
ax.set_title('Heatmap of correlations of Body Measure data', fontsize=16)
plt.show()

# Blood Pressure Data

In [None]:
bpdata = pd.read_sas('data/P_BPXO.XPT')
bpnames = { 
    'SEQN' : 'Respondent sequence number',
    'BPAOARM' : 'Arm selected - oscillometric',
    'BPAOCSZ' :'Coded cuff size - oscillometric',
    'BPXOSY1' : 'Systolic - 1st oscillometric reading',
    'BPXODI1' : 'Diastolic - 1st oscillometric reading',
    'BPXOSY2' : 'Systolic - 2nd oscillometric reading',
    'BPXODI2' : 'Diastolic - 2nd oscillometric reading',
    'BPXOSY3' : 'Systolic - 3rd oscillometric reading',
    'BPXODI3' : 'Diastolic - 3rd oscillometric reading',
    'BPXOPLS1' : 'Pulse - 1st oscillometric reading',
    'BPXOPLS2' : 'Pulse - 2nd oscillometric reading',
    'BPXOPLS3' : 'Pulse - 3rd oscillometric reading'
}
bpdata = bpdata.rename(columns=bpnames)
bpdata

In [None]:
bpdata2 = bpdata.drop(['Respondent sequence number', 'Arm selected - oscillometric'], axis=1)
bpdata2

In [None]:
# Mean values of each feature
bpdata2.mean()

In [None]:
# Correlations
bpdata2.corr()

In [None]:
# Correlation heatmap
bpcorr = bpdata2.corr()
sns.set(rc={'figure.figsize':(8,8)})
ax2 = sns.heatmap(
    bpcorr, 
    vmin=-1, vmax=1,
    cmap='Blues',
    square=True,
    annot=True
)
ax2.set_xticklabels(
    ax2.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
)
ax2.set_title('Heatmap of correlations of Blood Pressure data', fontsize=16)
plt.show()

In [None]:
# Histogram of blood pressure values
for i in ['Systolic - 1st oscillometric reading',
          'Systolic - 2nd oscillometric reading',
          'Systolic - 3rd oscillometric reading']:
    plt.hist(bpdata2[i], bins=100, label=i)
plt.title('Distribution of sequential systolic BP readings', fontsize=16)

# Optionally add references points for BP thresholds [1]:
plt.axvline(x=120,color='k', linestyle='--', label='Healthy/At-risk')
plt.axvline(x=135,color='k', linestyle='-', label='At-risk/High')

plt.legend()
plt.show()

In [None]:
for i in ['Diastolic - 1st oscillometric reading',
          'Diastolic - 2nd oscillometric reading',
          'Diastolic - 3rd oscillometric reading']:
    plt.hist(bpdata2[i], bins=100, label=i)
plt.title('Distribution of sequential diastolic BP readings', fontsize=16)

# Optionally add references points for BP thresholds [1]:
plt.axvline(x=80,color='k', linestyle='--', label='Healthy/At-risk')
plt.axvline(x=85,color='k', linestyle='-', label='At-risk/High')

plt.legend()
plt.show()

In [None]:
for i in ['Pulse - 1st oscillometric reading',
          'Pulse - 2nd oscillometric reading',
          'Pulse - 3rd oscillometric reading']:
    plt.hist(bpdata2[i], bins=100, label=i)
plt.title('Distribution of sequential HR readings', fontsize=16)
plt.legend()
plt.show()

## References
1. https://www.heartandstroke.ca/-/media/pdf-files/canada/health-information-catalogue/en-managing-your-blood-pressure.ashx