# Import modules
CDC data set description link [here](https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=Questionnaire&CycleBeginYear=2015)

In [1]:
import pdb
import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import VarianceThreshold, SelectFromModel, SelectKBest, chi2

import nhanes as nhanes

%matplotlib notebook

## Settings

In [2]:
DATA_PATH = 'C:/Users/allen/Documents/Git-Repos/Opportunistic/CDC/NHANES/'
DATASET = 'cancer'

### Note: 
The code below loads each dataset: dataset_features, dataset_targets

Here, all datasets are defined explicitly (see nhanes.py).

In [3]:
ds = nhanes.Dataset(DATA_PATH)
ds.load_cancer()
n_fe = ds.features.shape[1]
n_classes = 2

dataset_features = ds.features
dataset_targets = ds.targets

Processing: Dietary\DSQIDS_G.XPT                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        

Processing: Laboratory\SSVARI_B.XPT                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     

Processing: Laboratory\SSVARI_C.XPT                                                                                Processing: Laboratory\TB_G.XPT                                                                                Processing: Laboratory\TCHOL_D.XPT                                                                                Processing: Laboratory\TCHOL_E.XPT                                                                                Processing: Laboratory\TCHOL_F.XPT                                                                                Processing: Laboratory\TCHOL_G.XPT                                                                                Processing: Laboratory\TCHOL_H.XPT                                                                                Processing: Laboratory\TCHOL_I.XPT                                                                                Processing: Laboratory\TELO_A.XPT                                        

Processing: Laboratory\TST_G.XPT                                                                                Processing: Laboratory\UAM_E.XPT                                                                                Processing: Laboratory\UASS_G.XPT                                                                                Processing: Laboratory\UASS_H.XPT                                                                                Processing: Laboratory\UAS_D.XPT                                                                                Processing: Laboratory\UAS_E.XPT                                                                                Processing: Laboratory\UAS_F.XPT                                                                                Processing: Laboratory\UAS_G.XPT                                                                                Processing: Laboratory\UAS_H.XPT                                                     

                                                                                Processing: Laboratory\WPIN_D.XPT                                                                                Processing: Laboratory\ALB_CR_D.XPT                                                                                Processing: Laboratory\ALB_CR_E.XPT                                                                                Processing: Laboratory\ALB_CR_F.XPT                                                                                Processing: Laboratory\ALB_CR_G.XPT                                                                                Processing: Laboratory\ALB_CR_H.XPT                                                                                Processing: Laboratory\ALB_CR_I.XPT                                                                                Processing: Laboratory\ALDUST_D.XPT                                                                  

Processing: Laboratory\UIO_D.XPT                                                                                Processing: Laboratory\UIO_E.XPT                                                                                Processing: Laboratory\UIO_F.XPT                                                                                Processing: Laboratory\UIO_G.XPT                                                                                Processing: Laboratory\UIO_H.XPT                                                                                Processing: Laboratory\UMS_H.XPT                                                                                Processing: Laboratory\UM_H.XPT                                                                                Processing: Laboratory\UPHOPM_E.XPT                                                                                Processing: Laboratory\UPHOPM_F.XPT                                                 

Processing: Laboratory\SSCMVG_A.XPT                                                                                Processing: Laboratory\SSCMV_A.XPT                                                                                Processing: Laboratory\SSCMV_B.XPT                                                                                Processing: Laboratory\SSCMV_C.XPT                                                                                Processing: Laboratory\SSCYST_A.XPT                                                                                Processing: Laboratory\SSCYST_B.XPT                                                                                Processing: Laboratory\SSEBV_C.XPT                                                                                Processing: Laboratory\SSEBV_D.XPT                                                                                Processing: Laboratory\SSEBV_E.XPT                                  

                                                                                Processing: Laboratory\UHMS_G.XPT                                                                                Processing: Laboratory\UHM_D.XPT                                                                                Processing: Laboratory\UHM_E.XPT                                                                                Processing: Laboratory\UHM_F.XPT                                                                                Processing: Laboratory\UHM_G.XPT                                                                                Processing: Laboratory\UIO_D.XPT                                                                                Processing: Laboratory\UIO_E.XPT                                                                                Processing: Laboratory\UIO_F.XPT                                                                                Proce

Processing: Laboratory\UHM_G.XPT                                                                                Processing: Laboratory\UIO_D.XPT                                                                                Processing: Laboratory\UIO_E.XPT                                                                                Processing: Laboratory\UIO_F.XPT                                                                                Processing: Laboratory\UIO_G.XPT                                                                                Processing: Laboratory\UIO_H.XPT                                                                                Processing: Laboratory\UMS_H.XPT                                                                                Processing: Laboratory\UM_H.XPT                                                                                Processing: Laboratory\UPHOPM_E.XPT                                                    

                                                                                Processing: Laboratory\UHM_F.XPT                                                                                Processing: Laboratory\UHM_G.XPT                                                                                Processing: Laboratory\UIO_D.XPT                                                                                Processing: Laboratory\UIO_E.XPT                                                                                Processing: Laboratory\UIO_F.XPT                                                                                Processing: Laboratory\UIO_G.XPT                                                                                Processing: Laboratory\UIO_H.XPT                                                                                Processing: Laboratory\UMS_H.XPT                                                                                Proces

Processing: Laboratory\ALB_CR_D.XPT                                                                                Processing: Laboratory\ALB_CR_E.XPT                                                                                Processing: Laboratory\ALB_CR_F.XPT                                                                                Processing: Laboratory\ALB_CR_G.XPT                                                                                Processing: Laboratory\ALB_CR_H.XPT                                                                                Processing: Laboratory\ALB_CR_I.XPT                                                                                Processing: Laboratory\ALDUST_D.XPT                                                                                Processing: Laboratory\AL_IGE_D.XPT                                                                                Processing: Laboratory\AMDGYD_D.XPT                           

                                                                                Processing: Laboratory\HCAA_H.XPT                                                                                Processing: Laboratory\HCY_D.XPT                                                                                Processing: Laboratory\HDL_D.XPT                                                                                Processing: Laboratory\HDL_E.XPT                                                                                Processing: Laboratory\HDL_F.XPT                                                                                Processing: Laboratory\HDL_G.XPT                                                                                Processing: Laboratory\HDL_H.XPT                                                                                Processing: Laboratory\HDL_I.XPT                                                                                Proce

Processing: Laboratory\VOCWB_D.XPT                                                                                Processing: Laboratory\VOCWB_E.XPT                                                                                Processing: Laboratory\VOCWB_F.XPT                                                                                Processing: Laboratory\VOCWB_G.XPT                                                                                Processing: Laboratory\VOCWB_H.XPT                                                                                Processing: Laboratory\VOC_D.XPT                                                                                Processing: Laboratory\VOC_E.XPT                                                                                Processing: Laboratory\VOC_F.XPT                                                                                Processing: Laboratory\WPIN_D.XPT                                            

                                                                                Processing: Laboratory\UPHOPM_E.XPT                                                                                Processing: Laboratory\UPHOPM_F.XPT                                                                                Processing: Laboratory\UPP_D.XPT                                                                                Processing: Laboratory\UPP_E.XPT                                                                                Processing: Laboratory\UTASS_H.XPT                                                                                Processing: Laboratory\UTAS_H.XPT                                                                                Processing: Laboratory\UVOCS_G.XPT                                                                                Processing: Laboratory\UVOC_G.XPT                                                                            

Processing: Laboratory\UIO_G.XPT                                                                                Processing: Laboratory\UIO_H.XPT                                                                                Processing: Laboratory\UMS_H.XPT                                                                                Processing: Laboratory\UM_H.XPT                                                                                Processing: Laboratory\UPHOPM_E.XPT                                                                                Processing: Laboratory\UPHOPM_F.XPT                                                                                Processing: Laboratory\UPP_D.XPT                                                                                Processing: Laboratory\UPP_E.XPT                                                                                Processing: Laboratory\UTASS_H.XPT                                               

Processing: Questionnaire\AGQ_D.XPT                                                                                Processing: Questionnaire\ALQ.XPT                                                                                Processing: Questionnaire\ALQY_F.XPT                                                                                Processing: Questionnaire\ALQ_B.XPT                                                                                Processing: Questionnaire\ALQ_C.XPT                                                                                Processing: Questionnaire\ALQ_D.XPT                                                                                Processing: Questionnaire\ALQ_E.XPT                                                                                Processing: Questionnaire\ALQ_F.XPT                                                                                Processing: Questionnaire\ALQ_G.XPT                            

                                                                                Processing: Questionnaire\HOQ_B.XPT                                                                                Processing: Questionnaire\HOQ_C.XPT                                                                                Processing: Questionnaire\HOQ_D.XPT                                                                                Processing: Questionnaire\HOQ_E.XPT                                                                                Processing: Questionnaire\HOQ_F.XPT                                                                                Processing: Questionnaire\HOQ_G.XPT                                                                                Processing: Questionnaire\HOQ_H.XPT                                                                                Processing: Questionnaire\HOQ_I.XPT                                                                

Processing: Questionnaire\SXQ_G.XPT                                                                                Processing: Questionnaire\SXQ_H.XPT                                                                                Processing: Questionnaire\SXQ_I.XPT                                                                                Processing: Questionnaire\TBQ.XPT                                                                                Processing: Questionnaire\TBQ_G.XPT                                                                                Processing: Questionnaire\VIQ.XPT                                                                                Processing: Questionnaire\VIQ_B.XPT                                                                                Processing: Questionnaire\VIQ_C.XPT                                                                                Processing: Questionnaire\VIQ_D.XPT                               

                                                                                Processing: Questionnaire\INQ_F.XPT                                                                                Processing: Questionnaire\INQ_G.XPT                                                                                Processing: Questionnaire\INQ_H.XPT                                                                                Processing: Questionnaire\INQ_I.XPT                                                                                Processing: Questionnaire\KIQ.XPT                                                                                Processing: Questionnaire\KIQ_P_B.XPT                                                                                Processing: Questionnaire\KIQ_P_C.XPT                                                                                Processing: Questionnaire\KIQ_P_D.XPT                                                            

                                                                                Processing: Questionnaire\ACQ.XPT                                                                                Processing: Questionnaire\ACQ_B.XPT                                                                                Processing: Questionnaire\ACQ_C.XPT                                                                                Processing: Questionnaire\ACQ_D.XPT                                                                                Processing: Questionnaire\ACQ_E.XPT                                                                                Processing: Questionnaire\ACQ_F.XPT                                                                                Processing: Questionnaire\ACQ_G.XPT                                                                                Processing: Questionnaire\ACQ_H.XPT                                                                  

Processing: Questionnaire\ACQ.XPT                                                                                Processing: Questionnaire\ACQ_B.XPT                                                                                Processing: Questionnaire\ACQ_C.XPT                                                                                Processing: Questionnaire\ACQ_D.XPT                                                                                Processing: Questionnaire\ACQ_E.XPT                                                                                Processing: Questionnaire\ACQ_F.XPT                                                                                Processing: Questionnaire\ACQ_G.XPT                                                                                Processing: Questionnaire\ACQ_H.XPT                                                                                Processing: Questionnaire\ACQ_I.XPT                             

                                                                                Processing: Questionnaire\INQ_G.XPT                                                                                Processing: Questionnaire\INQ_H.XPT                                                                                Processing: Questionnaire\INQ_I.XPT                                                                                Processing: Questionnaire\KIQ.XPT                                                                                Processing: Questionnaire\KIQ_P_B.XPT                                                                                Processing: Questionnaire\KIQ_P_C.XPT                                                                                Processing: Questionnaire\KIQ_P_D.XPT                                                                                Processing: Questionnaire\KIQ_P_E.XPT                                                          

                                                                                Processing: Questionnaire\KIQ.XPT                                                                                Processing: Questionnaire\KIQ_P_B.XPT                                                                                Processing: Questionnaire\KIQ_P_C.XPT                                                                                Processing: Questionnaire\KIQ_P_D.XPT                                                                                Processing: Questionnaire\KIQ_P_E.XPT                                                                                Processing: Questionnaire\KIQ_U_B.XPT                                                                                Processing: Questionnaire\KIQ_U_C.XPT                                                                                Processing: Questionnaire\KIQ_U_D.XPT                                                    

                                                                                Processing: Questionnaire\OCQ_G.XPT                                                                                Processing: Questionnaire\OCQ_H.XPT                                                                                Processing: Questionnaire\OHQ.XPT                                                                                Processing: Questionnaire\OHQ_B.XPT                                                                                Processing: Questionnaire\OHQ_C.XPT                                                                                Processing: Questionnaire\OHQ_D.XPT                                                                                Processing: Questionnaire\OHQ_E.XPT                                                                                Processing: Questionnaire\OHQ_F.XPT                                                                  

                                                                                Processing: Questionnaire\AGQ_D.XPT                                                                                Processing: Questionnaire\ALQ.XPT                                                                                Processing: Questionnaire\ALQY_F.XPT                                                                                Processing: Questionnaire\ALQ_B.XPT                                                                                Processing: Questionnaire\ALQ_C.XPT                                                                                Processing: Questionnaire\ALQ_D.XPT                                                                                Processing: Questionnaire\ALQ_E.XPT                                                                                Processing: Questionnaire\ALQ_F.XPT                                                                 

Processing: Questionnaire\ECQ_H.XPT                                                                                Processing: Questionnaire\ECQ_I.XPT                                                                                Processing: Questionnaire\FSQ.XPT                                                                                Processing: Questionnaire\FSQ_B.XPT                                                                                Processing: Questionnaire\FSQ_C.XPT                                                                                Processing: Questionnaire\FSQ_D.XPT                                                                                Processing: Questionnaire\FSQ_E.XPT                                                                                Processing: Questionnaire\FSQ_F.XPT                                                                                Processing: Questionnaire\FSQ_G.XPT                             

                                                                                Processing: Questionnaire\SMQ_C.XPT                                                                                Processing: Questionnaire\SMQ_D.XPT                                                                                Processing: Questionnaire\SMQ_E.XPT                                                                                Processing: Questionnaire\SMQ_F.XPT                                                                                Processing: Questionnaire\SMQ_G.XPT                                                                                Processing: Questionnaire\SMQ_H.XPT                                                                                Processing: Questionnaire\SMQ_I.XPT                                                                                Processing: Questionnaire\SSQ.XPT                                                                  

Processing: Questionnaire\ACQ.XPT                                                                                Processing: Questionnaire\ACQ_B.XPT                                                                                Processing: Questionnaire\ACQ_C.XPT                                                                                Processing: Questionnaire\ACQ_D.XPT                                                                                Processing: Questionnaire\ACQ_E.XPT                                                                                Processing: Questionnaire\ACQ_F.XPT                                                                                Processing: Questionnaire\ACQ_G.XPT                                                                                Processing: Questionnaire\ACQ_H.XPT                                                                                Processing: Questionnaire\ACQ_I.XPT                             

Processing: Questionnaire\PAQ_G.XPT                                                                                Processing: Questionnaire\PAQ_H.XPT                                                                                Processing: Questionnaire\PAQ_I.XPT                                                                                Processing: Questionnaire\PFQ.XPT                                                                                Processing: Questionnaire\PFQ_B.XPT                                                                                Processing: Questionnaire\PFQ_C.XPT                                                                                Processing: Questionnaire\PFQ_D.XPT                                                                                Processing: Questionnaire\PFQ_E.XPT                                                                                Processing: Questionnaire\PFQ_F.XPT                             

Processing: Questionnaire\PFQ_D.XPT                                                                                Processing: Questionnaire\PFQ_E.XPT                                                                                Processing: Questionnaire\PFQ_F.XPT                                                                                Processing: Questionnaire\PFQ_G.XPT                                                                                Processing: Questionnaire\PFQ_H.XPT                                                                                Processing: Questionnaire\PFQ_I.XPT                                                                                Processing: Questionnaire\PSQ_C.XPT                                                                                Processing: Questionnaire\PSQ_D.XPT                                                                                Processing: Questionnaire\PUQ.XPT                             

                                                                                Processing: Questionnaire\FSQ_D.XPT                                                                                Processing: Questionnaire\FSQ_E.XPT                                                                                Processing: Questionnaire\FSQ_F.XPT                                                                                Processing: Questionnaire\FSQ_G.XPT                                                                                Processing: Questionnaire\FSQ_H.XPT                                                                                Processing: Questionnaire\HCQ_B.XPT                                                                                Processing: Questionnaire\HCQ_C.XPT                                                                                Processing: Questionnaire\HCQ_D.XPT                                                                

Processing: Questionnaire\RXQANA_C.XPT                                                                                Processing: Questionnaire\RXQASA_G.XPT                                                                                Processing: Questionnaire\RXQASA_H.XPT                                                                                Processing: Questionnaire\RXQ_ANA.XPT                                                                                Processing: Questionnaire\RXQ_RX.XPT                                                                                Processing: Questionnaire\RXQ_RX_B.XPT                                                                                Processing: Questionnaire\RXQ_RX_C.XPT                                                                                Processing: Questionnaire\RXQ_RX_D.XPT                                                                                Processing: Questionnaire\RXQ_RX_E.XPT    

                                                                                Processing: Examination\BMX_F.XPT                                                                                Processing: Examination\BMX_G.XPT                                                                                Processing: Examination\BMX_H.XPT                                                                                Processing: Examination\BMX_I.XPT                                                                                Processing: Examination\BPX.XPT                                                                                Processing: Examination\BPX_B.XPT                                                                                Processing: Examination\BPX_C.XPT                                                                                Processing: Examination\BPX_D.XPT                                                                                

                                                                                Processing: Examination\ARX_F.XPT                                                                                Processing: Examination\AUX1.XPT                                                                                Processing: Examination\AUXAR.XPT                                                                                Processing: Examination\AUXAR_B.XPT                                                                                Processing: Examination\AUXAR_C.XPT                                                                                Processing: Examination\AUXAR_D.XPT                                                                                Processing: Examination\AUXAR_E.XPT                                                                                Processing: Examination\AUXAR_F.XPT                                                                       

                                                                                Processing: Laboratory\L10AM_C.XPT                                                                                Processing: Laboratory\L10_2_B.XPT                                                                                Processing: Laboratory\L10_B.XPT                                                                                Processing: Laboratory\L10_C.XPT                                                                                Processing: Laboratory\L11PSA_B.XPT                                                                                Processing: Laboratory\L11PSA_C.XPT                                                                                Processing: Laboratory\L11P_2_B.XPT                                                                                Processing: Laboratory\L11_2_B.XPT                                                                         

                                                                                Processing: Questionnaire\HIQ_F.XPT                                                                                Processing: Questionnaire\HIQ_G.XPT                                                                                Processing: Questionnaire\HIQ_H.XPT                                                                                Processing: Questionnaire\HIQ_I.XPT                                                                                Processing: Questionnaire\HOQ.XPT                                                                                Processing: Questionnaire\HOQ_B.XPT                                                                                Processing: Questionnaire\HOQ_C.XPT                                                                                Processing: Questionnaire\HOQ_D.XPT                                                                  

Processing: Questionnaire\PAQ_E.XPT                                                                                Processing: Questionnaire\PAQ_F.XPT                                                                                Processing: Questionnaire\PAQ_G.XPT                                                                                Processing: Questionnaire\PAQ_H.XPT                                                                                Processing: Questionnaire\PAQ_I.XPT                                                                                Processing: Questionnaire\PFQ.XPT                                                                                Processing: Questionnaire\PFQ_B.XPT                                                                                Processing: Questionnaire\PFQ_C.XPT                                                                                Processing: Questionnaire\PFQ_D.XPT                              

Processing: Questionnaire\ALQ.XPT                                                                                Processing: Questionnaire\ALQY_F.XPT                                                                                Processing: Questionnaire\ALQ_B.XPT                                                                                Processing: Questionnaire\ALQ_C.XPT                                                                                Processing: Questionnaire\ALQ_D.XPT                                                                                Processing: Questionnaire\ALQ_E.XPT                                                                                Processing: Questionnaire\ALQ_F.XPT                                                                                Processing: Questionnaire\ALQ_G.XPT                                                                                Processing: Questionnaire\ALQ_H.XPT                            

                                                                                Processing: Questionnaire\PUQ.XPT                                                                                Processing: Questionnaire\PUQMEC_D.XPT                                                                                Processing: Questionnaire\PUQMEC_E.XPT                                                                                Processing: Questionnaire\PUQMEC_F.XPT                                                                                Processing: Questionnaire\PUQMEC_G.XPT                                                                                Processing: Questionnaire\PUQMEC_H.XPT                                                                                Processing: Questionnaire\PUQ_B.XPT                                                                                Processing: Questionnaire\PUQ_C.XPT                                                   

                                                                                Processing: Questionnaire\MPQ_B.XPT                                                                                Processing: Questionnaire\MPQ_C.XPT                                                                                Processing: Questionnaire\OCQ.XPT                                                                                Processing: Questionnaire\OCQ_B.XPT                                                                                Processing: Questionnaire\OCQ_C.XPT                                                                                Processing: Questionnaire\OCQ_D.XPT                                                                                Processing: Questionnaire\OCQ_E.XPT                                                                                Processing: Questionnaire\OCQ_F.XPT                                                                  

## Preprocessing of Data

In [12]:
dataset_features_sel = dataset_features.copy()
# print(ds.feature_labels)
# Trim low variance features, left with 87
sel = VarianceThreshold()
out_features = sel.fit_transform(dataset_features_sel)
print("Original Shape: %s, New Shape: %s" % (ds.features.shape, out_features.shape))


Original Shape: (49509, 119), New Shape: (49509, 60)


## Train/Test Separation

In [5]:
perm = np.random.permutation(dataset_targets.shape[0])
dataset_features = out_features[perm]
dataset_targets = dataset_targets[perm]

def get_batch(n_size, phase):
    # select indices
    n_samples = dataset_features.shape[0]
    n_classes = int(dataset_targets.max() + 1)
    if phase == 'test':
        inds_sel = np.arange(0, int(n_samples*0.15), 1)
    elif phase == 'validation':
        n_samples = dataset_features.shape[0]
        inds_sel = np.arange(int(n_samples*0.15), int(n_samples*0.30), 1)
    elif phase == 'train':
        n_samples = dataset_features.shape[0]
        inds_sel = np.arange(int(n_samples*0.30), n_samples, 1)
    else:
        raise NotImplementedError
    inds_sel = np.random.permutation(inds_sel)
    batch_inds = []
    for cl in range(n_classes):
        inds_cl = inds_sel[dataset_targets[inds_sel] == cl]
        batch_inds.extend(inds_cl[:n_size//n_classes])
    batch_inds = np.random.permutation(batch_inds)
    
    return dataset_features[batch_inds], dataset_targets[batch_inds]
    
features_trn, targets_trn = get_batch(n_size=5000, phase='train')
features_tst, targets_tst = get_batch(n_size=1000, phase='test')

## Classification

In [6]:
clf = RandomForestClassifier(n_estimators=100)
clf.fit(features_trn, targets_trn)
preds_tst = clf.predict(features_tst)
accu = np.mean(preds_tst==targets_tst)
print('accu_tst_RFC', accu)

clf = SVC(gamma='auto')
clf.fit(features_trn, targets_trn)
preds_tst = clf.predict(features_tst)
accu = np.mean(preds_tst==targets_tst)
print('accu_tst_SVC', accu)

clf = LogisticRegression(solver='lbfgs', max_iter=200)
clf.fit(features_trn, targets_trn)
preds_tst = clf.predict(features_tst)
accu = np.mean(preds_tst==targets_tst)
print('accu_tst_LR', accu)


accu_tst_RFC 0.743
accu_tst_SVC 0.734
accu_tst_LR 0.738


### Accuracies from baseline: 
#### Cancer (ds.load_cancer()):
* accu_tst_RFC 0.758
* accu_tst_SVC 0.759
* accu_tst_LR 0.768

#### Arthiritis (ds.load_arthiritis()):
* accu_tst_RFC 0.753
* accu_tst_SVC 0.754
* accu_tst_LR 0.773