In [1]:
#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import re
from collections import defaultdict

# Description of Collections

In [2]:
descr_dict = {'alq':'Alcohol Use', 'bmx':'Body Measures','bpq':'Blood Pressure','bpx':'Blood Pressure - Measures','demo':'Demographics','demo_p':'Demographics for Vis', 'diq':'Diabetes','drxtot':'Dietary','hiq':'Health Insurance','huq':'Hospital Utilization','mcq_a':'Asthma','mcq_h':'Heart Disease','paq':'Physical Activity','smq':'Smoking','smqfam':'Household Smoking','tchol':'Cholesterol','whq':'Weight History','mcq_c':'Cancer','mcq_b':'Bronchitis','rdq':'Cough'}

## MongoDB Insertion

In [3]:
#Import MongoClient
from pymongo import MongoClient

#Create a MongoClient to run the MongoDB instance
client = MongoClient("localhost", 27017)

In [4]:
#Connect to existing database
db = client.NHANES
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'NHANES')

In [5]:
db.list_collection_names()

['mcq_h',
 'smq',
 'smqfam',
 'bpx',
 'descr',
 'mcq_b',
 'hiq',
 'demo',
 'bpq',
 'drxtot',
 'huq',
 'tchol',
 'mcq_a',
 'mcq_c',
 'demo_p',
 'paq',
 'whq',
 'alq',
 'rdq',
 'diq',
 'bmx']

In [6]:
#Creating a collection
descr = db.descr

In [7]:
#If collections exist, then drop
if 'descr' in db.list_collection_names():
    descr.drop()
    db.list_collection_names()

## Create new collection to input into database

In [8]:
descr_df = pd.DataFrame.from_dict(descr_dict, orient='index').reset_index()
#Set descr as _id (Primary Key)
descr_df.columns = ['_id', 'Description']
descr_df

Unnamed: 0,_id,Description
0,alq,Alcohol Use
1,bmx,Body Measures
2,bpq,Blood Pressure
3,bpx,Blood Pressure - Measures
4,demo,Demographics
5,demo_p,Demographics for Vis
6,diq,Diabetes
7,drxtot,Dietary
8,hiq,Health Insurance
9,huq,Hospital Utilization


In [9]:
#Dataframe to dictionary
descr_dict = descr_df.to_dict(orient='records')
descr_dict

[{'Description': 'Alcohol Use', '_id': 'alq'},
 {'Description': 'Body Measures', '_id': 'bmx'},
 {'Description': 'Blood Pressure', '_id': 'bpq'},
 {'Description': 'Blood Pressure - Measures', '_id': 'bpx'},
 {'Description': 'Demographics', '_id': 'demo'},
 {'Description': 'Demographics for Vis', '_id': 'demo_p'},
 {'Description': 'Diabetes', '_id': 'diq'},
 {'Description': 'Dietary', '_id': 'drxtot'},
 {'Description': 'Health Insurance', '_id': 'hiq'},
 {'Description': 'Hospital Utilization', '_id': 'huq'},
 {'Description': 'Asthma', '_id': 'mcq_a'},
 {'Description': 'Heart Disease', '_id': 'mcq_h'},
 {'Description': 'Physical Activity', '_id': 'paq'},
 {'Description': 'Smoking', '_id': 'smq'},
 {'Description': 'Household Smoking', '_id': 'smqfam'},
 {'Description': 'Cholesterol', '_id': 'tchol'},
 {'Description': 'Weight History', '_id': 'whq'},
 {'Description': 'Cancer', '_id': 'mcq_c'},
 {'Description': 'Bronchitis', '_id': 'mcq_b'},
 {'Description': 'Cough', '_id': 'rdq'}]

In [10]:
descr_dict[0]

{'Description': 'Alcohol Use', '_id': 'alq'}

In [11]:
#Insert collection
descr.insert_many(descr_dict)

<pymongo.results.InsertManyResult at 0x11c377308>

In [12]:
db.list_collection_names()

['mcq_h',
 'smq',
 'smqfam',
 'bpx',
 'descr',
 'mcq_b',
 'hiq',
 'demo',
 'bpq',
 'drxtot',
 'huq',
 'tchol',
 'mcq_a',
 'mcq_c',
 'demo_p',
 'paq',
 'whq',
 'alq',
 'rdq',
 'diq',
 'bmx']