**Dietary Data: Cleaning**

This Jupyter notebook compiles the dietary data from the respective csv files and produces a comprehensive macronutrient analysis

In [53]:
#Import all necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob #this library is used to read in multiple file names
import re
from collections import defaultdict

In [54]:
#Use one ./ if running from run_notebooks.py
#Use two ../if running directly from Jupyter Notebooks
#file_beg = '../NHANES-Downloader/data/csv_data/'

file_beg = '../NHANES-Downloader/data/csv_data/'

In [55]:
#Import all the files
files1 = glob.glob(file_beg+'1999-2000/*/*.csv')
files2 = glob.glob(file_beg+'2001-2002/*/*.csv')
files3 = glob.glob(file_beg+'2003-2004/*/*.csv')
files4 = glob.glob(file_beg+'2005-2006/*/*.csv')
files5 = glob.glob(file_beg+'2007-2008/*/*.csv')
files6 = glob.glob(file_beg+'2009-2010/*/*.csv')
files7 = glob.glob(file_beg+'2011-2012/*/*.csv')
files8 = glob.glob(file_beg+'2013-2014/*/*.csv')
files9 = glob.glob(file_beg+'2015-2016/*/*.csv')
files10 = glob.glob(file_beg+'2017-2018/*/*.csv')

In [56]:
#Add files into a list
file_list = [files1, files2, files3, files4, files5, 
          files6, files7, files8, files9, files10]

In [57]:
#Add files into a list of sorted dictionaries
file_list_dict = []
for x in file_list:
    x.sort()
    file_list_dict.append(dict(enumerate(x)))

In [58]:
drxtot_indx = [3,3,2,2,2,2,2,2,2,3]

In [59]:
dfs = defaultdict(int)
for i in range(0,len(drxtot_indx)):
    dfs[i] = pd.read_csv(file_list_dict[i][drxtot_indx[i]])

## Important Variables

In [60]:
#1999-2000
var_names = ['SEQN', 'WTDR4YR', 'DRXTKCAL', 'DRXTPROT', 'DRXTCARB', 'DRXTTFAT', 'DRXTSFAT', 'DRXTMFAT', 'DRXTPFAT', 'DRXTCHOL', 'DRXTFIBE', 'DRXTVARE', 'DRXTVB1', 'DRXTVB2', 'DRXTVB6', 'DRXTVB12', 'DRXTVC', 'DRXTCALC', 'DRXTPHOS', 'DRXTMAGN', 'DRXTIRON', 'DRXTZINC', 'DRXTCOPP', 'DRDTSODI', 'DRXTPOTA', 'DRXTCAFF', 'DRXTALCO', 'DRD320GW']

#2001-2002
var_names1 = ['SEQN', 'WTDR4YR', 'DRXTKCAL', 'DRXTPROT', 'DRXTCARB', 'DRXTTFAT', 'DRXTSFAT', 'DRXTMFAT', 'DRXTPFAT', 'DRXTCHOL', 'DRXTFIBE', 'DRXTVARA', 'DRXTVB1', 'DRXTVB2', 'DRXTVB6', 'DRXTVB12', 'DRXTVC', 'DRXTCALC', 'DRXTPHOS', 'DRXTMAGN', 'DRXTIRON', 'DRXTZINC', 'DRXTCOPP', 'DRDTSODI', 'DRXTPOTA', 'DRXTCAFF', 'DRXTALCO', 'DRD320GW']

#2003-2004
var_names3 = ['SEQN', 'WTDRD1', 'DR1TKCAL', 'DR1TPROT', 'DR1TCARB', 'DR1TTFAT', 'DR1TSFAT', 'DR1TMFAT', 'DR1TPFAT', 'DR1TCHOL', 'DR1TFIBE', 'DR1TVARA', 'DR1TVB1', 'DR1TVB2', 'DR1TVB6', 'DR1TVB12', 'DR1TVC', 'DR1TCALC', 'DR1TPHOS', 'DR1TMAGN', 'DR1TIRON', 'DR1TZINC', 'DR1TCOPP', 'DR1TSODI', 'DR1TPOTA', 'DR1TCAFF', 'DR1TALCO', 'DR1_320']

#2005-2018
var_names4 = ['SEQN', 'WTDRD1', 'DR1TKCAL', 'DR1TPROT', 'DR1TCARB', 'DR1TTFAT', 'DR1TSFAT', 'DR1TMFAT', 'DR1TPFAT', 'DR1TCHOL', 'DR1TFIBE', 'DR1TVARA', 'DR1TVB1', 'DR1TVB2', 'DR1TVB6', 'DR1TVB12', 'DR1TVC', 'DR1TCALC', 'DR1TPHOS', 'DR1TMAGN', 'DR1TIRON', 'DR1TZINC', 'DR1TCOPP', 'DR1TSODI', 'DR1TPOTA', 'DR1TCAFF', 'DR1TALCO', 'DR1_320Z']

descr = ['SEQN', 'Weight Day One', 'Energy KCal', 'Energy (Protein)', 'Energy (Carbohydrates)', 'Energy (Total Fat)','Total polyunsaturated fatty acids (gm)', 'Cholesterol (mg)','Dietary fiber (gm)', 'Vitamin A, RAE (mcg)', 'Thiamin (Vitamin B1) (mg)', 'Riboflavin (Vitamin B2) (mg)', 'Vitamin B6 (mg)', 'Vitamin B12 (mcg)', 'Vitamin C (mg)', 'Calcium (mg)', 'Phosphorus (mg)', 'Magnesium (mg)', 'Iron (mg)', 'Zinc (mg)', 'Copper (mg)', 'Sodium (mg)', 'Potassium (mg)', 'Caffeine (mg)', 'Alcohol (gm)', 'Total plain water drank yesterday (gm)']

In [61]:
#To display all columns in Jupyter Notebooks
pd.set_option('display.max_columns', 500)

## Functions: Recategorize values, Count Values, Drop Rows

In [62]:
#Recategorize function
def recategorize(df, name, replace_dict):
    df[name].replace(
    to_replace=replace_dict,
    inplace=True
)
    
#Count values function
def count_vals(df, name):
    df_count = df.groupby(name)['SEQN'].nunique()
    print(df_count,"\n\n","NaN: ", df[name].isnull().sum())
    
#Drop rows that include certain values
def drop_rows(df, name, val_list):
    df.drop(df[df[name].isin(val_list)].index, inplace=True)

## Make a copy of the dataframes

In [63]:
df0 = dfs[0][var_names].copy() #1999-2000
df1 = dfs[1][var_names1].copy() #2001-2002
df2 = dfs[2][var_names3].copy() #2003-2004
df3 = dfs[3][var_names4].copy() #2005-2006
df4 = dfs[4][var_names4].copy() #2007-2008
df5 = dfs[5][var_names4].copy() #2009-2010
df6 = dfs[6][var_names4].copy() #2011-2012
df7 = dfs[7][var_names4].copy() #2013-2014
df8 = dfs[8][var_names4].copy() #2015-2016
df9 = dfs[9][var_names4].copy() #2017-2018

## Weight for 18-years: 1999 - 2018

In [64]:
#Create column for weight
def reweight(df, col_name, year_wt, weight):
    df[col_name] = df[year_wt]*weight

In [65]:
reweight(df0, 'DRX18YR', 'WTDR4YR', 2/9)
reweight(df1, 'DRX18YR', 'WTDR4YR', 2/9)
reweight(df2, 'DRX18YR', 'WTDRD1', 1/9)
reweight(df3, 'DRX18YR', 'WTDRD1', 1/9)
reweight(df4, 'DRX18YR', 'WTDRD1', 1/9)
reweight(df5, 'DRX18YR', 'WTDRD1', 1/9)
reweight(df6, 'DRX18YR', 'WTDRD1', 1/9)
reweight(df7, 'DRX18YR', 'WTDRD1', 1/9)
reweight(df8, 'DRX18YR', 'WTDRD1', 1/9)
reweight(df9, 'DRX18YR', 'WTDRD1', 1/9)

## Remove 2-yr and 4-yr weights

In [66]:
#Drop columns
def drop_cols(df, cols):
    df.drop(cols, axis=1, inplace=True)

In [67]:
drop_cols(df0,['WTDR4YR'])
drop_cols(df1,['WTDR4YR'])

In [68]:
drop_cols(df2,['WTDRD1'])
drop_cols(df3,['WTDRD1'])
drop_cols(df4,['WTDRD1'])
drop_cols(df5,['WTDRD1'])
drop_cols(df6,['WTDRD1'])
drop_cols(df7,['WTDRD1'])
drop_cols(df8,['WTDRD1'])
drop_cols(df9,['WTDRD1'])

In [69]:
df0[:5]

Unnamed: 0,SEQN,DRXTKCAL,DRXTPROT,DRXTCARB,DRXTTFAT,DRXTSFAT,DRXTMFAT,DRXTPFAT,DRXTCHOL,DRXTFIBE,DRXTVARE,DRXTVB1,DRXTVB2,DRXTVB6,DRXTVB12,DRXTVC,DRXTCALC,DRXTPHOS,DRXTMAGN,DRXTIRON,DRXTZINC,DRXTCOPP,DRDTSODI,DRXTPOTA,DRXTCAFF,DRXTALCO,DRD320GW,DRX18YR
0,1.0,1358.88,31.96,250.36,27.24,9.4,9.0,6.4,46.55,7.41,604.33,1.5,1.55,1.43,2.42,220.64,541.3,503.75,126.8,12.18,4.69,0.41,1621.35,1387.7,5.397605e-79,5.397605e-79,243.38,1348.028592
1,2.0,2463.0,123.16,350.37,71.95,24.34,26.48,11.94,313.95,36.99,923.91,2.11,3.25,2.9,8.68,119.12,925.37,1974.57,502.25,37.29,41.61,2.08,5710.03,4672.48,530.45,5.397605e-79,5.397605e-79,3315.985398
2,3.0,1517.69,40.19,233.63,49.94,15.61,19.57,11.84,86.22,11.16,885.72,1.21,1.43,1.37,1.59,57.95,415.27,674.82,172.58,9.78,5.65,0.85,1676.51,1487.16,35.4,5.397605e-79,531.0,3525.877253
3,4.0,1474.93,56.16,191.03,56.2,27.54,20.13,4.67,194.78,5.45,337.56,0.96,2.23,0.95,4.25,65.85,1315.39,1266.8,204.1,5.79,6.72,0.52,1277.31,2245.42,5.397605e-79,5.397605e-79,1652.0,715.133313
4,5.0,2658.14,97.13,253.98,114.52,36.0,60.55,11.35,180.57,17.28,1298.44,2.62,3.05,2.92,6.66,112.19,1626.38,1811.55,367.72,27.22,10.17,1.95,3756.36,3743.15,5.397605e-79,34.56,1298.0,13105.246918


## Recategorize values

In [70]:
#Nothing to recategorize

## Rename columns 1999 - 2018

In [71]:
#New column names
col_names = ['SEQN', 'DRXTKCAL', 'DRXTPROT', 'DRXTCARB', 'DRXTTFAT', 'DRXTSFAT', 'DRXTMFAT', 'DRXTPFAT', 'DRXTCHOL', 'DRXTFIBE', 'DRXTVARE', 'DRXTVB1', 'DRXTVB2', 'DRXTVB6', 'DRXTVB12', 'DRXTVC', 'DRXTCALC', 'DRXTPHOS', 'DRXTMAGN', 'DRXTIRON', 'DRXTZINC', 'DRXTCOPP', 'DRDTSODI', 'DRXTPOTA', 'DRXTCAFF', 'DRXTALCO', 'DRD320GW', 'DRX18YR']

In [72]:
df0.columns = col_names
df1.columns = col_names
df2.columns = col_names
df3.columns = col_names
df4.columns = col_names
df5.columns = col_names
df6.columns = col_names
df7.columns = col_names
df8.columns = col_names
df9.columns = col_names

## Append years 1999 - 2018

In [73]:
years = ["1999-2000","2001-2002","2003-2004","2005-2006","2007-2008", 
        "2009-2010","2011-2012","2013-2014","2015-2016","2017-2018"]

In [74]:
frames = [df0, df1, df2, df3, df4, df5, df6, df7, df8, df9]

In [75]:
#Add years as a column
for i, df in enumerate(frames):
    df["Year"] = years[i]

In [76]:
result_1999_2018 = pd.concat(frames, keys = years)

In [77]:
result_1999_2018_cleaned = result_1999_2018.copy()

In [78]:
len(result_1999_2018)

95418

## Count values for each column

In [79]:
len(col_names)

28

In [80]:
len(result_1999_2018)

95418

In [81]:
result_1999_2018_cleaned[:5]

Unnamed: 0,Unnamed: 1,SEQN,DRXTKCAL,DRXTPROT,DRXTCARB,DRXTTFAT,DRXTSFAT,DRXTMFAT,DRXTPFAT,DRXTCHOL,DRXTFIBE,DRXTVARE,DRXTVB1,DRXTVB2,DRXTVB6,DRXTVB12,DRXTVC,DRXTCALC,DRXTPHOS,DRXTMAGN,DRXTIRON,DRXTZINC,DRXTCOPP,DRDTSODI,DRXTPOTA,DRXTCAFF,DRXTALCO,DRD320GW,DRX18YR,Year
1999-2000,0,1.0,1358.88,31.96,250.36,27.24,9.4,9.0,6.4,46.55,7.41,604.33,1.5,1.55,1.43,2.42,220.64,541.3,503.75,126.8,12.18,4.69,0.41,1621.35,1387.7,5.397605e-79,5.397605e-79,243.38,1348.028592,1999-2000
1999-2000,1,2.0,2463.0,123.16,350.37,71.95,24.34,26.48,11.94,313.95,36.99,923.91,2.11,3.25,2.9,8.68,119.12,925.37,1974.57,502.25,37.29,41.61,2.08,5710.03,4672.48,530.45,5.397605e-79,5.397605e-79,3315.985398,1999-2000
1999-2000,2,3.0,1517.69,40.19,233.63,49.94,15.61,19.57,11.84,86.22,11.16,885.72,1.21,1.43,1.37,1.59,57.95,415.27,674.82,172.58,9.78,5.65,0.85,1676.51,1487.16,35.4,5.397605e-79,531.0,3525.877253,1999-2000
1999-2000,3,4.0,1474.93,56.16,191.03,56.2,27.54,20.13,4.67,194.78,5.45,337.56,0.96,2.23,0.95,4.25,65.85,1315.39,1266.8,204.1,5.79,6.72,0.52,1277.31,2245.42,5.397605e-79,5.397605e-79,1652.0,715.133313,1999-2000
1999-2000,4,5.0,2658.14,97.13,253.98,114.52,36.0,60.55,11.35,180.57,17.28,1298.44,2.62,3.05,2.92,6.66,112.19,1626.38,1811.55,367.72,27.22,10.17,1.95,3756.36,3743.15,5.397605e-79,34.56,1298.0,13105.246918,1999-2000


## Remove correlated features

In [82]:
cols_remove = ['DRXTSFAT', 'DRXTMFAT', 'DRXTPFAT']
result_1999_2018_cleaned = result_1999_2018_cleaned[result_1999_2018_cleaned.columns.difference(cols_remove)]

In [83]:
result_1999_2018_cleaned.head()

Unnamed: 0,Unnamed: 1,DRD320GW,DRDTSODI,DRX18YR,DRXTALCO,DRXTCAFF,DRXTCALC,DRXTCARB,DRXTCHOL,DRXTCOPP,DRXTFIBE,DRXTIRON,DRXTKCAL,DRXTMAGN,DRXTPHOS,DRXTPOTA,DRXTPROT,DRXTTFAT,DRXTVARE,DRXTVB1,DRXTVB12,DRXTVB2,DRXTVB6,DRXTVC,DRXTZINC,SEQN,Year
1999-2000,0,243.38,1621.35,1348.028592,5.397605e-79,5.397605e-79,541.3,250.36,46.55,0.41,7.41,12.18,1358.88,126.8,503.75,1387.7,31.96,27.24,604.33,1.5,2.42,1.55,1.43,220.64,4.69,1.0,1999-2000
1999-2000,1,5.397605e-79,5710.03,3315.985398,5.397605e-79,530.45,925.37,350.37,313.95,2.08,36.99,37.29,2463.0,502.25,1974.57,4672.48,123.16,71.95,923.91,2.11,8.68,3.25,2.9,119.12,41.61,2.0,1999-2000
1999-2000,2,531.0,1676.51,3525.877253,5.397605e-79,35.4,415.27,233.63,86.22,0.85,11.16,9.78,1517.69,172.58,674.82,1487.16,40.19,49.94,885.72,1.21,1.59,1.43,1.37,57.95,5.65,3.0,1999-2000
1999-2000,3,1652.0,1277.31,715.133313,5.397605e-79,5.397605e-79,1315.39,191.03,194.78,0.52,5.45,5.79,1474.93,204.1,1266.8,2245.42,56.16,56.2,337.56,0.96,4.25,2.23,0.95,65.85,6.72,4.0,1999-2000
1999-2000,4,1298.0,3756.36,13105.246918,34.56,5.397605e-79,1626.38,253.98,180.57,1.95,17.28,27.22,2658.14,367.72,1811.55,3743.15,97.13,114.52,1298.44,2.62,6.66,3.05,2.92,112.19,10.17,5.0,1999-2000


## Remove rows with missing values:

In [84]:
col_names = list(result_1999_2018_cleaned.columns)

In [85]:
for i in range (0, len(col_names)):
    drop_rows(result_1999_2018_cleaned, col_names[i], [np.nan])

In [86]:
len(result_1999_2018_cleaned)

86464

In [87]:
result_1999_2018_cleaned.isnull().values.any()

False

## See percentage of removed values

In [88]:
before = len(result_1999_2018)
before

95418

In [89]:
after = len(result_1999_2018_cleaned)
after

86464

In [90]:
(before-after)/after

0.10355754996299038

In [91]:
result_1999_2018_cleaned

Unnamed: 0,Unnamed: 1,DRD320GW,DRDTSODI,DRX18YR,DRXTALCO,DRXTCAFF,DRXTCALC,DRXTCARB,DRXTCHOL,DRXTCOPP,DRXTFIBE,DRXTIRON,DRXTKCAL,DRXTMAGN,DRXTPHOS,DRXTPOTA,DRXTPROT,DRXTTFAT,DRXTVARE,DRXTVB1,DRXTVB12,DRXTVB2,DRXTVB6,DRXTVC,DRXTZINC,SEQN,Year
1999-2000,0,2.433800e+02,1621.35,1348.028592,5.397605e-79,5.397605e-79,541.30,250.36,46.55,0.410,7.41,12.18,1358.88,126.80,503.75,1387.70,31.96,27.24,604.33,1.500,2.42,1.550,1.430,220.64,4.69,1.0,1999-2000
1999-2000,1,5.397605e-79,5710.03,3315.985398,5.397605e-79,5.304500e+02,925.37,350.37,313.95,2.080,36.99,37.29,2463.00,502.25,1974.57,4672.48,123.16,71.95,923.91,2.110,8.68,3.250,2.900,119.12,41.61,2.0,1999-2000
1999-2000,2,5.310000e+02,1676.51,3525.877253,5.397605e-79,3.540000e+01,415.27,233.63,86.22,0.850,11.16,9.78,1517.69,172.58,674.82,1487.16,40.19,49.94,885.72,1.210,1.59,1.430,1.370,57.95,5.65,3.0,1999-2000
1999-2000,3,1.652000e+03,1277.31,715.133313,5.397605e-79,5.397605e-79,1315.39,191.03,194.78,0.520,5.45,5.79,1474.93,204.10,1266.80,2245.42,56.16,56.20,337.56,0.960,4.25,2.230,0.950,65.85,6.72,4.0,1999-2000
1999-2000,4,1.298000e+03,3756.36,13105.246918,3.456000e+01,5.397605e-79,1626.38,253.98,180.57,1.950,17.28,27.22,2658.14,367.72,1811.55,3743.15,97.13,114.52,1298.44,2.620,6.66,3.050,2.920,112.19,10.17,5.0,1999-2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-2018,8699,1.267500e+03,2242.00,1534.772798,5.397605e-79,1.300000e+01,831.00,139.92,56.00,1.088,28.70,11.31,1080.00,397.00,1083.00,2702.00,52.59,38.68,1324.00,1.191,1.93,1.190,1.170,89.00,7.57,102952.0,2017-2018
2017-2018,8700,2.896130e+03,6123.00,5625.817867,5.397605e-79,1.130000e+02,1574.00,333.41,306.00,2.380,57.00,28.11,3072.00,686.00,2613.00,5289.00,188.17,114.96,743.00,2.474,6.30,2.899,4.136,221.40,29.20,102953.0,2017-2018
2017-2018,8701,2.700000e+03,2993.00,1231.251836,5.397605e-79,5.397605e-79,946.00,218.10,361.00,0.768,9.50,20.55,1757.00,205.00,1085.00,1424.00,72.70,66.58,743.00,2.134,7.34,2.284,2.811,55.50,7.71,102954.0,2017-2018
2017-2018,8702,1.014000e+03,955.00,3058.872668,5.397605e-79,1.400000e+01,951.00,192.73,116.00,0.326,10.20,15.72,1256.00,146.00,798.00,1302.00,29.62,44.11,1024.00,1.613,7.79,2.478,1.880,46.40,8.02,102955.0,2017-2018


## MongoDB Insertion

In [92]:
#Import MongoClient
from pymongo import MongoClient

In [93]:
#Create a MongoClient to run the MongoDB instance
client = MongoClient("localhost", 27017)

In [94]:
client

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [95]:
#Creating a database
db = client['NHANES']
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'NHANES')

In [96]:
db.list_collection_names()

['bmx',
 'alq',
 'smqfam',
 'smq',
 'bpq',
 'diq',
 'whq',
 'demo',
 'huq',
 'bpx',
 'mcq_h',
 'tchol',
 'paq',
 'descr',
 'drxtot',
 'hiq',
 'demo_p',
 'mcq_a',
 'mcq_b',
 'rdq',
 'mcq_c']

In [97]:
#Creating a collection
drxtot = db.drxtot

In [98]:
#If collections exist, then drop
if 'drxtot' in db.list_collection_names():
    drxtot.drop()
    db.list_collection_names()

## Inputting into DB

In [99]:
result_1999_2018_cleaned.rename(columns= {'SEQN':'_id'}, inplace=True)

In [100]:
result_1999_2018_cleaned[:3]

Unnamed: 0,Unnamed: 1,DRD320GW,DRDTSODI,DRX18YR,DRXTALCO,DRXTCAFF,DRXTCALC,DRXTCARB,DRXTCHOL,DRXTCOPP,DRXTFIBE,DRXTIRON,DRXTKCAL,DRXTMAGN,DRXTPHOS,DRXTPOTA,DRXTPROT,DRXTTFAT,DRXTVARE,DRXTVB1,DRXTVB12,DRXTVB2,DRXTVB6,DRXTVC,DRXTZINC,_id,Year
1999-2000,0,243.38,1621.35,1348.028592,5.397605e-79,5.397605e-79,541.3,250.36,46.55,0.41,7.41,12.18,1358.88,126.8,503.75,1387.7,31.96,27.24,604.33,1.5,2.42,1.55,1.43,220.64,4.69,1.0,1999-2000
1999-2000,1,5.397605e-79,5710.03,3315.985398,5.397605e-79,530.45,925.37,350.37,313.95,2.08,36.99,37.29,2463.0,502.25,1974.57,4672.48,123.16,71.95,923.91,2.11,8.68,3.25,2.9,119.12,41.61,2.0,1999-2000
1999-2000,2,531.0,1676.51,3525.877253,5.397605e-79,35.4,415.27,233.63,86.22,0.85,11.16,9.78,1517.69,172.58,674.82,1487.16,40.19,49.94,885.72,1.21,1.59,1.43,1.37,57.95,5.65,3.0,1999-2000


In [101]:
drxtot_dict = result_1999_2018_cleaned.to_dict(orient='records')

In [102]:
drxtot_dict[0]

{'DRD320GW': 243.38,
 'DRDTSODI': 1621.3500000000004,
 'DRX18YR': 1348.028591862023,
 'DRXTALCO': 5.397605346934028e-79,
 'DRXTCAFF': 5.397605346934028e-79,
 'DRXTCALC': 541.3,
 'DRXTCARB': 250.36,
 'DRXTCHOL': 46.55,
 'DRXTCOPP': 0.41,
 'DRXTFIBE': 7.41,
 'DRXTIRON': 12.18,
 'DRXTKCAL': 1358.88,
 'DRXTMAGN': 126.8,
 'DRXTPHOS': 503.75,
 'DRXTPOTA': 1387.7,
 'DRXTPROT': 31.96,
 'DRXTTFAT': 27.24,
 'DRXTVARE': 604.33,
 'DRXTVB1': 1.5,
 'DRXTVB12': 2.42,
 'DRXTVB2': 1.55,
 'DRXTVB6': 1.43,
 'DRXTVC': 220.64,
 'DRXTZINC': 4.69,
 '_id': 1.0,
 'Year': '1999-2000'}

In [103]:
#Insert collection
drxtot.insert_many(drxtot_dict)

<pymongo.results.InsertManyResult at 0x11bd605b0>

In [104]:
db.list_collection_names()

['bmx',
 'alq',
 'smqfam',
 'smq',
 'bpq',
 'diq',
 'whq',
 'demo',
 'huq',
 'bpx',
 'drxtot',
 'mcq_h',
 'tchol',
 'paq',
 'descr',
 'hiq',
 'demo_p',
 'mcq_a',
 'mcq_b',
 'rdq',
 'mcq_c']