# Setup and Imports

These cells will import necessary libraries and configure the notebook's visual style.

In [1]:
# Efficient math and data management
import numpy as np
import pandas as pd

# You may import useful modules and functions from the Python Standard Library.
import os
from functools import reduce  

# Visualization libraries
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Inline figures. Can swap comments to use interactive figures. Use inline figures for assignment submission.
%matplotlib inline
# %matplotlib notebook

In [3]:
# Set seaborn visual style
sns.set()
sns.set_context('talk')
plt.rcParams["patch.force_edgecolor"] = False  # Turn off histogram borders

# Load Data

Load the combined data file.

In [11]:
#Kidney questionnaire (https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_KIQ_U.htm)
kidney_data = pd.read_sas('P_KIQ_U.XPT')

#Demographic data (https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_DEMO.htm)
dem_data = pd.read_sas('P_DEMO.XPT')

#Standard biochemistry profile
bio_pro = pd.read_sas('P_BIOPRO.XPT')

#Blood metal data (https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_PBCD.htm)
metal_data = pd.read_sas('P_PBCD.XPT')

#Albumin Creatin (https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_ALB_CR.htm)
acr_data = pd.read_sas('P_ALB_CR.XPT')
print(acr_data)

           SEQN  URXUMA  URXUMS      URDUMALC  URXUCR   URXCRS      URDUCRLC  \
0      109264.0     NaN     NaN           NaN     NaN      NaN           NaN   
1      109266.0     5.5     5.5  5.397605e-79    36.0   3182.4  5.397605e-79   
2      109270.0     4.0     4.0  5.397605e-79   165.0  14586.0  5.397605e-79   
3      109271.0     2.4     2.4  5.397605e-79    32.0   2828.8  5.397605e-79   
4      109273.0     4.9     4.9  5.397605e-79   121.0  10696.4  5.397605e-79   
...         ...     ...     ...           ...     ...      ...           ...   
13022  124817.0    20.1    20.1  5.397605e-79    63.0   5569.2  5.397605e-79   
13023  124818.0    12.8    12.8  5.397605e-79   370.0  32708.0  5.397605e-79   
13024  124820.0    10.8    10.8  5.397605e-79    68.0   6011.2  5.397605e-79   
13025  124821.0     6.1     6.1  5.397605e-79   191.0  16884.4  5.397605e-79   
13026  124822.0     4.0     4.0  5.397605e-79   100.0   8840.0  5.397605e-79   

       URDACT  
0         NaN  
1      

In [None]:
def gfr(seq_num):
    user_dem_data = dem_data[dem_data['SEQN'] == seq_num]
    race = user_dem_data['RIDRETH1']
    gender = user_dem_data['RIAGENDR']
    age = user_dem_data['RIDAGEYR']
    sc = bio_pro[bio_pro['SEQN'] == seq_num]['LBXSCR']
    k = 0.7 if gender == 1 else 0.9
    a = -0.329 if gender == 1 else -0.411
    t = 1.159 if race == 4 else 1
    s = 1.018 if gender == 1 else 1
    return 141 * (min(sc/k, 1)**a) * (max(sc/k, 1)**-1.209) * (0.993**age) * s * t


In [5]:
#Count the nuber of non-nan values in the dataset and divide by length of dataset
non_null_kidney = kidney_data['KIQ022'][pd.notnull(kidney_data['KIQ022'])]
with_kidney_disease = non_null_kidney[non_null_kidney == 1]
print(len(with_kidney_disease))
# percent_tested = num_tested / len(data)
# print(f'{round(percent_tested * 100,4)}%')

383


In [6]:
non_null_lead = metal_data['LBXBPB'][pd.notnull(metal_data['LBXBPB'])]
with_high_lead = non_null_lead[non_null_lead >= 5]
print(len(with_high_lead))

124


In [7]:
non_null_mercury = metal_data['LBXTHG'][pd.notnull(metal_data['LBXTHG'])]
with_high_mercury = non_null_mercury[non_null_mercury >= 5]
print(len(with_high_mercury))

398


In [8]:
non_null_manganese = metal_data['LBXBMN'][pd.notnull(metal_data['LBXBMN'])]
with_high_manganese = non_null_manganese[non_null_manganese > 15]
print(len(with_high_manganese))

1051


In [9]:
num_both = np.count_nonzero((non_null_kidney == 1) & ((non_null_lead >= 5) | (non_null_manganese > 15)))
print(num_both)

20


In [10]:
non_null_acr = acr_data['URDACT'][pd.notnull(acr_data['URDACT'])]
with_high_acr = non_null_acr[non_null_acr > 30]
print(len(with_high_acr))

1656
