In [71]:
#Dependancies

import pandas as pd
import numpy as np

In [72]:
#Reading csv files

demographicDf = pd.read_csv("Data/demographic.csv")
dietDf = pd.read_csv("Data/diet.csv")
examinationDf = pd.read_csv("Data/examination.csv")
labsDf = pd.read_csv("Data/labs.csv")
medicationsDf = pd.read_csv("Data/medications.csv", encoding="ISO-8859-1")
questionnaireDf = pd.read_csv("Data/questionnaire.csv")

In [73]:
#Drops conditions in medicationsDf that appear less than 100 times

medValues = medicationsDf["RXDRSD1"].value_counts(dropna=False)
medicationsDf = medicationsDf[medicationsDf['RXDRSD1'].isin(medValues[medValues >= 100].index)]

In [74]:
#Replaces null values with a "Healthy" string

medicationsDf["RXDRSD1"] = medicationsDf["RXDRSD1"].fillna("Healthy")


In [75]:
#Renames the columns in medicationsDf

medicationsDf = medicationsDf.rename(columns=\
                                    {"RXDUSE": "Medications_Teken_Last_30_Days",\
                                      "RXDDRUG": "Generic_Drug_Name",\
                                      "RXDDRGID": "Generic_Drug_Code", \
                                      "RXQSEEN": "Was_Prescription_Container_Seen_By_Interviewer",\
                                      "RXDDAYS": "Duration_Taken_Medications",\
                                      "RXDRSC1": "ICD_10_CM_Code_1",\
                                      "RXDRSC2": "ICD_10_CM_Code_2",\
                                      "RXDRSC3": "ICD_10_CM_Code_3",\
                                      "RXDRSD1": "ICD_10_CM_Code_1_Description",\
                                      "RXDRSD2": "ICD_10_CM_Code_2_Description",\
                                      "RXDRSD3": "ICD_10_CM_Code_3_Description",\
                                      "RXDCOUNT":"The_Number_Of_Prescription_Medicines_Reported"
                                    })

In [76]:
#Renames the columns in labsDf

labsDf = labsDf.rename(columns=\
                                    {
"URXUMA" : "Albumin, urine (ug/mL)" ,
"URXUMS" : "Albumin, urine (mg/L)" ,
"URXUCR.x" : "ERROR" ,
"URXCRS" : "Creatinine, urine (umol/L)" ,
"URDACT" : "Albumin creatinine ratio (mg/g)" ,
"WTSAF2YR.x" : "ERROR" ,
"LBXAPB" : "Apolipoprotein (B) (mg/dL)" ,
"LBDAPBSI" : "Apolipoprotein (B) (g/L)" ,
"LBXSAL" : "Albumin (g/dL)" ,
"LBDSALSI" : "Albumin (g/L)" ,
"LBXSAPSI" :"Alkaline phosphatase (IU/L)",
"LBXSASSI" : "Aspartate aminotransferase AST (IU/L)",
"LBXSATSI" : "Alanine aminotransferase ALT (IU/L)",
"LBXSBU" : "Blood urea nitrogen (mg/dL)",
"LBDSBUSI" : "Blood urea nitrogen (mmol/L)",
"LBXSC3SI" : "Bicarbonate (mmol/L)",
"LBXSCA" : "Total calcium (mg/dL)",
"LBDSCASI" : "Total calcium (mmol/L)",
"LBXSCH" : "Cholesterol (mg/dL)",
"LBDSCHSI" : "Cholesterol (mmol/L)",
"LBXSCK" : "Creatine Phosphokinase(CPK) (IU/L)",
"LBXSCLSI" : "Chloride (mmol/L)",
"LBXSCR" : "Creatinine (mg/dL)",
"LBDSCRSI" : "Creatinine (umol/L)",
"LBXSGB" : "Globulin (g/dL)",
"LBDSGBSI" : "Globulin (g/L)",
"LBXSGL" : "Glucose, refrigerated serum (mg/dL)",
"LBDSGLSI" : "Glucose, refrigerated serum (mmol/L)",
"LBXSGTSI" : "Gamma glutamyl transferase (U/L)",
"LBXSIR" : "Iron, refrigerated serum (ug/dL)",
"LBDSIRSI" : "Iron, refrigerated serum (umol/L)",
"LBXSKSI" : "Potassium (mmol/L)",
"LBXSLDSI" : "Lactate dehydrogenase (U/L)",
"LBXSNASI" : "Sodium (mmol/L)",
"LBXSOSSI" : "Osmolality (mmol/Kg)",
"LBXSPH" : "Phosphorus (mg/dL)",
"LBDSPHSI" : "Phosphorus (mmol/L)",
"LBXSTB" : "Total bilirubin (mg/dL)",
"LBDSTBSI" : "Total bilirubin (umol/L)",
"LBXSTP" : "Total protein (g/dL)",
"LBDSTPSI" : "Total protein (g/L)",
"LBXSTR" : "Triglycerides, refrigerated (mg/dL)",
"LBDSTRSI" : "Triglycerides, refrigerated (mmol/L)",
"LBXSUA" : "Uric acid (mg/dL)",
"LBDSUASI" : "Uric acid (umol/L)",
"LBXWBCSI" : "White blood cell count (1000 cells/uL)",
"LBXLYPCT" : "Lymphocyte percent (%)",
"LBXMOPCT" : "Monocyte percent (%)",
"LBXNEPCT" : "Segmented neutrophils percent (%)",
"LBXEOPCT" : "Eosinophils percent (%)",
"LBXBAPCT" : "Basophils percent (%)",
"LBDLYMNO" : "Lymphocyte number (1000 cells/uL)",
"LBDMONO" : "Monocyte number (1000 cells/uL)",
"LBDNENO" : "Segmented neutrophils num (1000 cell/uL)",
"LBDEONO" : "Eosinophils number (1000 cells/uL)",
"LBDBANO" : "Basophils number (1000 cells/uL)",
"LBXRBCSI" : "Red blood cell count (million cells/uL)",
"LBXHGB" : "Hemoglobin (g/dL)",
"LBXHCT" : "Hematocrit (%)",
"LBXMCVSI" : "Mean cell volume (fL)",
"LBXMCHSI" : "Mean cell hemoglobin (pg)",
"LBXMC" : "Mean cell hemoglobin concentration (g/dL)",
"LBXRDW" : "Red cell distribution width (%)",
"LBXPLTSI" : "Platelet count (1000 cells/uL)",
"LBXMPSI" : "Mean platelet volume (fL)",
"URXUCL" : "Chlamydia, urine",
"WTSA2YR.x" : "ERROR",
"LBXSCU" : "Serum Copper (ug/dL)",
"LBDSCUSI" : "Serum Copper (umol/L)",
"LBXSSE" : "Serum Selenium (ug/L)",
"LBDSSESI" : "Serum Selenium (umol/L)",
"LBXSZN" : "Serum Zinc (ug/dL)",
"LBDSZNSI" : "Serum Zinc (umol/L)",
"URXUCR.y" : "ERROR",
"WTSB2YR.x" : "ERROR",
"URXBP3" : "Urinary 2-Hydroxy-4-metoxybenzophenone (Benzophenone-3)",
"URDBP3LC" : "Urinary 2-Hydroxy-4-metoxybenzophenone (Benzophenone-3) comment code",
"URXBPH" : "Urinary Bisphenol A (ng/mL)",
"URDBPHLC" : "Urinary Bisphenol A comment",
"URXBPF" : "Urinary 4,4' dihydroxydiphenylmethane",
"URDBPFLC" : "Urinary 4,4' dihydroxydiphenylmethane comment code",
"URXBPS" : "Urinary 4,4'sulfonyldiphenol",
"URDBPSLC" : "Urinary 4,4' sulfonyldiphenol comment code",
"URXTLC" : "Urinary Triclocarban (ng/mL)",
"URDTLCLC" : "Urinary Triclocarban comment",
"URXTRS" : "Urinary 2,4,4'-Trichloro-2'-hydroxyphenyl ether (Triclosan)",
"URDTRSLC" : "Urinary 2,4,4'-Trichloro-2'-hydroxyphenyl ether (Triclosan) comment code",
"URXBUP" : "Butyl paraben (ng/ml)",
"URDBUPLC" : "Butyl paraben comment",
"URXEPB" : "Ethyl paraben (ng/ml)",
"URDEPBLC" : "Ethyl paraben comment",
"URXMPB" : "Methyl paraben (ng/ml)",
"URDMPBLC" : "Methyl paraben comment",
"URXPPB" : "Propyl paraben (ng/ml)",
"URDPPBLC" : "Propyl paraben comment",
"URX14D" : "2,5-dichlorophenol (ug/L)",
"URD14DLC" : "2,5-dichlorophenol comment",
"URXDCB" : "2,4-dichlorophenol (ug/L)",
"URDDCBLC" : "2,4-dichlorophenol comment",
"URXUCR" : "Urinary creatinine (mg/dL)",
"PHQ020" : "Coffee or tea with cream or sugar? [Include milk or non-dairy creamers.]",
"PHACOFHR" : "The time (in hours) since the examinee last drank coffee or tea with cream or sugar.",
"PHACOFMN" : "The time (in minutes) since the examinee last drank coffee or tea with cream or sugar.",
"PHQ030" : "Alcohol, such as beer, wine, or liquor?",
"PHAALCHR" : "The time (in hours) the examinee last drank alcohol such as beer, wine, or liquor.",
"PHAALCMN" : "The time (in minutes) the examinee last drank alcohol such as beer, wine, or liquor.",
"PHQ040" : "Gum, breath mints, lozenges or cough drops, or other cough or cold remedies?",
"PHAGUMHR" : "The time (in hours) the examinee last had gum, breath mints, lozenges or cough drops, or other cough or cold remedies.",
"PHAGUMMN" : "The time (in minutes) the examinee last had gum, breath mints, lozenges or cough drops, or other cough or cold remedies.",
"PHQ050" : "Antacids, laxatives, or anti-diarrheals?",
"PHAANTHR" : "The time (in hours) the examinee last had antacids, laxatives or anti-diarrheals.",
"PHAANTMN" : "The time (in minutes) the examinee last had antacids, laxatives or anti-diarrheals.",
"PHQ060" : "Dietary supplements such as vitamins and minerals? [Include multivitamins and single nutrient supplements.]",
"PHASUPHR" : "The time (in hours) the examinee last had dietary supplements such as vitamins and minerals.",
"PHASUPMN" : "The time (in minutes) the examinee last had dietary supplements such as vitamins and minerals.",
"PHAFSTHR.x" : "ERROR",
"PHAFSTMN.x" : "ERROR",
"PHDSESN" : "Session in which SP was examined",
"LBDPFL" : "Fluoride, plasma (umol/L) average 2 values",
"LBDWFL" : "Fluoride, water (mg/L) average 2 values",
"LBDHDD" : "Direct HDL-Cholesterol (mg/dL)",
"LBDHDDSI" : "Direct HDL-Cholesterol (mmol/L)",
"LBXHA" : "Hepatitis A antibody",
"LBXHBS" : "Hepatitis B Surface Antibody",
"LBXHBC" : "Hepatitis B core antibody",
"LBDHBG" : "Hepatitis B surface antigen",
"LBDHD" : "Hepatitis D (anti-HDV)",
"LBXHCR" : "Hepatitis C RNA (HCV-RNA)",
"LBXHCG" : "Hepatitis C genotype",
"LBDHEG" : "Hepatitis E IgG (anti-HEV)",
"LBDHEM" : "Hepatitis E IgM (anti-HEV)",
"LBXHE1" : "Herpes Simplex Virus Type 1",
"LBXHE2" : "Herpes Simplex Virus Type 2",
"LBXGH" : "Glycohemoglobin (%)",
"LBDHI" : "HIV antibody test result",
"ORXGH" : "HPV High Globulin Band result",
"ORXGL" : "HPV Low Globulin Band result",
"ORXH06" : "HPV Type 6",
"ORXH11" : "HPV Type 11",
"ORXH16" : "HPV Type 16",
"ORXH18" : "HPV Type 18",
"ORXH26" : "HPV Type 26",
"ORXH31" : "HPV Type 31",
"ORXH33" : "HPV Type 33",
"ORXH35" : "HPV Type 35",
"ORXH39" : "HPV Type 39",
"ORXH40" : "HPV Type 40",
"ORXH42" : "HPV Type 42",
"ORXH45" : "HPV Type 45",
"ORXH51" : "HPV Type 51",
"ORXH52" : "HPV Type 52",
"ORXH53" : "HPV Type 53",
"ORXH54" : "HPV Type 54",
"ORXH55" : "HPV Type 55",
"ORXH56" : "HPV Type 56",
"ORXH58" : "HPV Type 58",
"ORXH59" : "HPV Type 59",
"ORXH61" : "HPV Type 61",
"ORXH62" : "HPV Type 62",
"ORXH64" : "HPV Type 64",
"ORXH66" : "HPV Type 66",
"ORXH67" : "HPV Type 67",
"ORXH68" : "HPV Type 68",
"ORXH69" : "HPV Type 69",
"ORXH70" : "HPV Type 70",
"ORXH71" : "HPV Type 71",
"ORXH72" : "HPV Type 72",
"ORXH73" : "HPV Type 73",
"ORXH81" : "HPV Type 81",
"ORXH82" : "HPV Type 82",
"ORXH83" : "HPV Type 83",
"ORXH84" : "HPV Type 84",
"ORXHPC" : "HPV CP 6108",
"ORXHPI" : "HPV Type IS39",
"ORXHPV" : "Oral HPV Result",
"LBDRPCR.x" : "ERROR",
"LBDRHP.x" : "ERROR",
"LBDRLP.x" : "ERROR",
"LBDR06.x" : "ERROR",
"LBDR11.x" : "ERROR",
"LBDR16.x" : "ERROR",
"LBDR18.x" : "ERROR",
"LBDR26.x" : "ERROR",
"LBDR31.x" : "ERROR",
"LBDR33.x" : "ERROR",
"LBDR35.x" : "ERROR",
"LBDR39.x" : "ERROR",
"LBDR40.x" : "ERROR",
"LBDR42.x" : "ERROR",
"LBDR45.x" : "ERROR",
"LBDR51.x" : "ERROR",
"LBDR52.x" : "ERROR",
"LBDR53.x" : "ERROR",
"LBDR54.x" : "ERROR",
"LBDR55.x" : "ERROR",
"LBDR56.x" : "ERROR",
"LBDR58.x" : "ERROR",
"LBDR59.x" : "ERROR",
"LBDR61.x" : "ERROR",
"LBDR62.x" : "ERROR",
"LBDR64.x" : "ERROR",
"LBDR66.x" : "ERROR",
"LBDR67.x" : "ERROR",
"LBDR68.x" : "ERROR",
"LBDR69.x" : "ERROR",
"LBDR70.x" : "ERROR",
"LBDR71.x" : "ERROR",
"LBDR72.x" : "ERROR",
"LBDR73.x" : "ERROR",
"LBDR81.x" : "ERROR",
"LBDR82.x" : "ERROR",
"LBDR83.x" : "ERROR",
"LBDR84.x" : "ERROR",
"LBDR89.x" : "ERROR",
"LBDRPI.x" : "ERROR",
"LBXHP2C" : "Cobas HPV Swab High Risk",
"LBDRPCR.y" : "ERROR",
"LBDRHP.y" : "ERROR",
"LBDRLP.y" : "ERROR",
"LBDR06.y" : "ERROR",
"LBDR11.y" : "ERROR",
"LBDR16.y" : "ERROR",
"LBDR18.y" : "ERROR",
"LBDR26.y" : "ERROR",
"LBDR31.y" : "ERROR",
"LBDR33.y" : "ERROR",
"LBDR35.y" : "ERROR",
"LBDR39.y" : "ERROR",
"LBDR40.y" : "ERROR",
"LBDR42.y" : "ERROR",
"LBDR45.y" : "ERROR",
"LBDR51.y" : "ERROR",
"LBDR52.y" : "ERROR",
"LBDR53.y" : "ERROR",
"LBDR54.y" : "ERROR",
"LBDR55.y" : "ERROR",
"LBDR56.y" : "ERROR",
"LBDR58.y" : "ERROR",
"LBDR59.y" : "ERROR",
"LBDR61.y" : "ERROR",
"LBDR62.y" : "ERROR",
"LBDR64.y" : "ERROR",
"LBDR66.y" : "ERROR",
"LBDR67.y" : "ERROR",
"LBDR68.y" : "ERROR",
"LBDR69.y" : "ERROR",
"LBDR70.y" : "ERROR",
"LBDR71.y" : "ERROR",
"LBDR72.y" : "ERROR",
"LBDR73.y" : "ERROR",
"LBDR81.y" : "ERROR",
"LBDR82.y" : "ERROR",
"LBDR83.y" : "ERROR",
"LBDR84.y" : "ERROR",
"LBDR89.y" : "ERROR",
"LBDRPI.y" : "ERROR",
"WTSAF2YR.y" : "ERROR",
"LBXIN" : "Insulin (uU/mL)",
"LBDINSI" : "Insulin (pmol/L)",
"PHAFSTHR.y" : "ERROR",
"PHAFSTMN.y" : "ERROR",
"URXUIO" : "Iodine, urine (ug/L)",
"WTSAF2YR" : "Fasting Subsample 2 Year MEC Weight",
"LBXTR" : "Triglyceride (mg/dL)",
"LBDTRSI" : "Triglyceride (mmol/L)",
"LBDLDL" : "LDL-cholesterol (mg/dL)",
"LBDLDLSI" : "LDL-cholesterol (mmol/L)",
"WTSH2YR.x" : "ERROR",
"LBXIHG" : "Inorganic mercury, blood (ug/L)",
"LBDIHGSI" : "Mercury, inorganic (umol/L )",
"LBDIHGLC" : "Mercury, inorganic comment code",
"LBXBGE" : "Mercury, ethyl (ug/L)",
"LBDBGELC" : "Mercury, ethyl comment code",
"LBXBGM" : "Mercury, methyl (ug/L)",
"LBDBGMLC" : "Mercury, methyl comment code",
"WTSOG2YR" : "OGTT Subsample 2 Year MEC Weight",
"LBXGLT" : "Two Hour Glucose(OGTT) (mg/dL)",
"LBDGLTSI" : "Two Hour Glucose(OGTT) (mmol/L)",
"GTDSCMMN" : "Glucose challenge Administer Time in minutes",
"GTDDR1MN" : "Time from fast glucose & challenge(min)",
"GTDBL2MN" : "Time from fasting glucose & OGTT (min)",
"GTDDR2MN" : "Time from glucose challenge & OGTT(min)",
"GTXDRANK" : "Amount of glucose challenge drank",
"PHAFSTHR" : "Total length of 'food fast', hours",
"PHAFSTMN" : "Total length of 'food fast', minutes",
"GTDCODE" : "Incomplete OGTT Comment Code",
"WTSA2YR.y" : "ERROR",
"URXP01" : "1-Hydroxynaphthalene (ng/L)",
"URDP01LC" : "1-Hydroxynaphthalene Comment Code",
"URXP02" : "2-Hydroxynaphthalene (ng/L)",
"URDP02LC" : "2-Hydroxynaphthalene Comment Code",
"URXP03" : "3-Hydroxyfluorene (ng/L)",
"URDP03LC" : "3-Hydroxyfluorene Comment Code",
"URXP04" : "2-Hydroxyfluorene (ng/L)",
"URDP04LC" : "2-Hydroxyfluorene Comment Code",
"URXP06" : "1-Hydroxyphenanthrene (ng/L)",
"URDP06LC" : "1-Hydroxyphenanthrene Comment Code",
"URXP10" : "1-Hydroxypyrene (ng/L)",
"URDP10LC" : "1-Hydroxypyrene Comment Code",
"URXP25" : "2-Hydroxyphenanthrene (ng/L) & 3-Hydroxyphenanthrene (ng/L)",
"URDP25LC" : "2-Hydroxyphenanthrene (ng/L) & 3-Hydroxyphenanthrene Comment Code",
"WTSA2YR" : "Subsample A weights",
"URXUP8" : "Urinary perchlorate (ng/mL)",
"URDUP8LC" : "Urinary Perchlorate comment code",
"URXNO3" : "Urinary nitrate (ng/mL)",
"URDNO3LC" : "Urinary nitrate comment code",
"URXSCN" : "Urinary thiocyanate (ng/mL)",
"URDSCNLC" : "Urinary thiocyanate comment code",
"WTSB2YR.y" : "ERROR",
"LBXPFDE" : "Pefluorodecanoic acid (ug/L)",
"LBDPFDEL" : "Pefluorodecanoic acid comment code",
"LBXPFHS" : "Perfluorohexane sulfonic acid (ug/L)",
"LBDPFHSL" : "Perfluorohexane sulfonic acid comment",
"LBXMPAH" : "2-(N-Methyl-perfluorooctane sulfonamido) acetic acid (ug/L)",
"LBDMPAHL" : "2-(N-Methyl-perfluorooctane sulfonamido) acetic acid comment code",
"LBXPFBS" : "Perfluorobutane sulfonic acid (ug/L)",
"LBDPFBSL" : "Perfluorobutane sulfonic acid comment code",
"LBXPFHP" : "Perfluoroheptanoic acid (ug/L)",
"LBDPFHPL" : "Perfluoroheptanoic acid comment code",
"LBXPFNA" : "Perfluorononanoic acid (ug/L)",
"LBDPFNAL" : "Perfluorononanoic acid comment code",
"LBXPFUA" : "Perfluoroundecanoic acid (ug/L)",
"LBDPFUAL" : "Perfluoroundecanoic acid comment code",
"LBXPFDO" : "Perflurododecanoic acid (ug/L)",
"LBDPFDOL" : "Perflurododecanoic acid comment code",
"WTSB2YR" : "Subsample B weights",
"URXCNP" : "Mono(carboxynonyl) Phthalate (ng/mL)",
"URDCNPLC" : "Mono(carboxynonyl) phthalate comment code",
"URXCOP" : "Mono(carboxyoctyl) Phthalate (ng/mL)",
"URDCOPLC" : "Mono(carboxyoctyl) phthalate comment code",
"URXECP" : "Mono-2-ethyl-5-carboxypentyl phthalate (ng/mL)",
"URDECPLC" : "Mono-2-ethyl-5-carboxypentyl phthalate comment code",
"URXMBP" : "Mono-n-butyl phthalate (ng/mL)",
"URDMBPLC" : "Mono-n-butyl phthalate comment code",
"URXMC1" : "Mono-(3-carboxypropyl) phthalate (ng/mL)",
"URDMC1LC" : "Mono-(3-carboxypropyl) phthalate comment code",
"URXMEP" : "Mono-ethyl phthalate (ng/mL)",
"URDMEPLC" : "Mono-ethyl phthalate comment code",
"URXMHH" : "Mono-(2-ethyl-5-hydroxyhexyl) phthalate (ng/mL)",
"URDMHHLC" : "Mono-(2-ethyl-5-hydroxyhexyl) phthalate comment code",
"URXMHNC" : "Cyclohexane 1,2-dicarboxylic acid monohydroxy isononyl ester (ng/mL)",
"URDMCHLC" : "Cyclohexane 1,2-dicarboxylic acid monohydroxy isononyl ester comment code",
"URXMHP" : "Mono-(2-ethyl)-hexyl phthalate (ng/mL)",
"URDMHPLC" : "Mono-(2-ethyl)-hexyl phthalate comment code",
"URXMIB" : "Mono-isobutyl phthalate (ng/mL)",
"URDMIBLC" : "Mono-isobutyl phthalate comment code",
"URXMNP" : "Mono-isononyl phthalate (ng/mL)",
"URDMNPLC" : "Mono-isononyl phthalate comment code",
"URXMOH" : "Mono-(2-ethyl-5-oxohexyl) phthalate (ng/mL)",
"URDMOHLC" : "Mono-(2-ethyl-5-oxohexyl) phthalate comment code",
"URXMZP" : "Mono-benzyl phthalate (ng/mL)",
"URDMZPLC" : "Mono-benzyl phthalate comment code",
"LBXTC" : "Total Cholesterol( mg/dL)",
"LBDTCSI" : "Total Cholesterol( mmol/L)",
"LBXTTG" : "Tissue transglutaminase(IgA-TTG)",
"LBXEMA" : "Endomyseal antibody (IgA EMA)",
"WTSH2YR.y" : "ERROR",
"LBXBPB" : "Blood lead (ug/dL)",
"LBDBPBSI" : "Blood lead (umol/L)",
"LBDBPBLC" : "Blood lead comment code",
"LBXBCD" : "Blood cadmium (ug/L)",
"LBDBCDSI" : "Blood cadmium (umol/L)",
"LBDBCDLC" : "Blood cadmium comment code",
"LBXTHG" : "Blood mercury, total (ug/L)",
"LBDTHGSI" : "Blood mercury, total (umol/L)",
"LBDTHGLC" : "Blood mercury, total comment code",
"LBXBSE" : "Blood selenium(ug/L)",
"LBDBSESI" : "Blood selenium (umol/L)",
"LBDBSELC" : "Blood selenium comment code",
"LBXBMN" : "Blood manganese (ug/L)",
"LBDBMNSI" : "Blood manganese (umol/L)",
"LBDBMNLC" : "Blood manganese comment code",
"URXUTRI" : "Trichomonas, urine",
"URXUAS3" : "Urinary Arsenous acid (ug/L)",
"URDUA3LC" : "Urinary Arsenous acid comment code",
"URXUAS5" : "Urinary Arsenic acid (ug/L)",
"URDUA5LC" : "Urinary Arsenic acid comment code",
"URXUAB" : "Urinary Arsenobetaine (ug/L)",
"URDUABLC" : "Urinary Arsenobetaine comment code",
"URXUAC" : "Urinary Arsenocholine (ug/L)",
"URDUACLC" : "Urinary Arsenocholine comment code",
"URXUDMA" : "Urinary Dimethylarsinic acid (ug/L)",
"URDUDALC" : "Urinary Dimethylarsinic acid comment code",
"URXUMMA" : "Urinary Monomethylacrsonic acid (ug/L)",
"URDUMMAL" : "Urinary Monomethylacrsonic acid comment code",
"URXVOL1" : "The volume of urine collection #1 (mL)",
"URDFLOW1" : "Urine #1 Flow Rate (mL/min)",
"URXVOL2" : "The volume of urine collection #2 (mL)",
"URDFLOW2" : "Urine #2 Flow Rate (mL/min)",
"URXVOL3" : "The volume of urine collection #3 (mL)",
"URDFLOW3" : "Urine #3 Flow Rate (mL/min)",
"URXUHG" : "Mercury, urine (ug/L)",
"URDUHGLC" : "Mercury, urine comment code",
"URXUBA" : "Barium, urine (ug/L)",
"URDUBALC" : "Urinary Barium comment code",
"URXUCD" : "Cadmium, urine (ug/L)",
"URDUCDLC" : "Urinary Cadmium comment code",
"URXUCO" : "Cobalt, urine (ug/L)",
"URDUCOLC" : "Urinary Cobalt (ug/L) comment code",
"URXUCS" : "Cesium, urine (ug/L)",
"URDUCSLC" : "Urinary Cesium comment code",
"URXUMO" : "Molybdenum, urine (ug/L)",
"URDUMOLC" : "Urinary Molybdenum comment code",
"URXUMN" : "Manganese, urine (ug/L)",
"URDUMNLC" : "Urinary Mn comment code",
"URXUPB" : "Lead, urine (ug/L)",
"URDUPBLC" : "Urinary Lead comment code",
"URXUSB" : "Antimony, urine (ug/L)",
"URDUSBLC" : "Urinary Antimony comment code",
"URXUSN" : "Tin, urine (ug/L)",
"URDUSNLC" : "Tin comment code",
"URXUSR" : "Strontium, urine (ug/L)",
"URDUSRLC" : "Strontium comment code",
"URXUTL" : "Thallium, urine (ug/L)",
"URDUTLLC" : "Urinary Thallium comment code",
"URXUTU" : "Tungsten, urine (ug/L)",
"URDUTULC" : "Urinary Tungsten comment code",
"URXUUR" : "Uranium, urinary (ug/L)",
"URDUURLC" : "Urinary Uranium comment code",
"URXPREG" : "Pregnancy test result",
"URXUAS" : "Urinary arsenic, total (ug/L)",
"LBDB12" : "Vitamin B12(pg/mL)",
"LBDB12SI" : "Vitamin B12 (pmol/L)" })

In [77]:
# Create a new dataframe with selected columns from the demographicDF (dropping any column with a no name) 

labsDf1 = labsDf[["SEQN","Albumin, urine (ug/mL)",
"Albumin, urine (mg/L)",
"Creatinine, urine (umol/L)",
"Albumin creatinine ratio (mg/g)",
"Apolipoprotein (B) (mg/dL)",
"Apolipoprotein (B) (g/L)",
"Albumin (g/dL)",
"Albumin (g/L)",
"Alkaline phosphatase (IU/L)",
"Aspartate aminotransferase AST (IU/L)",
"Alanine aminotransferase ALT (IU/L)",
"Blood urea nitrogen (mg/dL)",
"Blood urea nitrogen (mmol/L)",
"Bicarbonate (mmol/L)",
"Total calcium (mg/dL)",
"Total calcium (mmol/L)",
"Cholesterol (mg/dL)",
"Cholesterol (mmol/L)",
"Creatine Phosphokinase(CPK) (IU/L)",
"Chloride (mmol/L)",
"Creatinine (mg/dL)",
"Creatinine (umol/L)",
"Globulin (g/dL)",
"Globulin (g/L)",
"Glucose, refrigerated serum (mg/dL)",
"Glucose, refrigerated serum (mmol/L)",
"Gamma glutamyl transferase (U/L)",
"Iron, refrigerated serum (ug/dL)",
"Iron, refrigerated serum (umol/L)",
"Potassium (mmol/L)",
"Lactate dehydrogenase (U/L)",
"Sodium (mmol/L)",
"Osmolality (mmol/Kg)",
"Phosphorus (mg/dL)",
"Phosphorus (mmol/L)",
"Total bilirubin (mg/dL)",
"Total bilirubin (umol/L)",
"Total protein (g/dL)",
"Total protein (g/L)",
"Triglycerides, refrigerated (mg/dL)",
"Triglycerides, refrigerated (mmol/L)",
"Uric acid (mg/dL)",
"Uric acid (umol/L)",
"White blood cell count (1000 cells/uL)",
"Lymphocyte percent (%)",
"Monocyte percent (%)",
"Segmented neutrophils percent (%)",
"Eosinophils percent (%)",
"Basophils percent (%)",
"Lymphocyte number (1000 cells/uL)",
"Monocyte number (1000 cells/uL)",
"Segmented neutrophils num (1000 cell/uL)",
"Eosinophils number (1000 cells/uL)",
"Basophils number (1000 cells/uL)",
"Red blood cell count (million cells/uL)",
"Hemoglobin (g/dL)",
"Hematocrit (%)",
"Mean cell volume (fL)",
"Mean cell hemoglobin (pg)",
"Mean cell hemoglobin concentration (g/dL)",
"Red cell distribution width (%)",
"Platelet count (1000 cells/uL)",
"Mean platelet volume (fL)",
"Chlamydia, urine",
"Serum Copper (ug/dL)",
"Serum Copper (umol/L)",
"Serum Selenium (ug/L)",
"Serum Selenium (umol/L)",
"Serum Zinc (ug/dL)",
"Serum Zinc (umol/L)",
"Urinary 2-Hydroxy-4-metoxybenzophenone (Benzophenone-3)",
"Urinary 2-Hydroxy-4-metoxybenzophenone (Benzophenone-3) comment code",
"Urinary Bisphenol A (ng/mL)",
"Urinary Bisphenol A comment",
"Urinary 4,4' dihydroxydiphenylmethane",
"Urinary 4,4' dihydroxydiphenylmethane comment code",
"Urinary 4,4'sulfonyldiphenol",
"Urinary 4,4' sulfonyldiphenol comment code",
"Urinary Triclocarban (ng/mL)",
"Urinary Triclocarban comment",
"Urinary 2,4,4'-Trichloro-2'-hydroxyphenyl ether (Triclosan)",
"Urinary 2,4,4'-Trichloro-2'-hydroxyphenyl ether (Triclosan) comment code",
"Butyl paraben (ng/ml)",
"Butyl paraben comment",
"Ethyl paraben (ng/ml)",
"Ethyl paraben comment",
"Methyl paraben (ng/ml)",
"Methyl paraben comment",
"Propyl paraben (ng/ml)",
"Propyl paraben comment",
"2,5-dichlorophenol (ug/L)",
"2,5-dichlorophenol comment",
"2,4-dichlorophenol (ug/L)",
"2,4-dichlorophenol comment",
"Urinary creatinine (mg/dL)",
"Coffee or tea with cream or sugar? [Include milk or non-dairy creamers.]",
"The time (in hours) since the examinee last drank coffee or tea with cream or sugar.",
"The time (in minutes) since the examinee last drank coffee or tea with cream or sugar.",
"Alcohol, such as beer, wine, or liquor?",
"The time (in hours) the examinee last drank alcohol such as beer, wine, or liquor.",
"The time (in minutes) the examinee last drank alcohol such as beer, wine, or liquor.",
"Gum, breath mints, lozenges or cough drops, or other cough or cold remedies?",
"The time (in hours) the examinee last had gum, breath mints, lozenges or cough drops, or other cough or cold remedies.",
"The time (in minutes) the examinee last had gum, breath mints, lozenges or cough drops, or other cough or cold remedies.",
"Antacids, laxatives, or anti-diarrheals?",
"The time (in hours) the examinee last had antacids, laxatives or anti-diarrheals.",
"The time (in minutes) the examinee last had antacids, laxatives or anti-diarrheals.",
"Dietary supplements such as vitamins and minerals? [Include multivitamins and single nutrient supplements.]",
"The time (in hours) the examinee last had dietary supplements such as vitamins and minerals.",
"The time (in minutes) the examinee last had dietary supplements such as vitamins and minerals.",
"Session in which SP was examined",
"Fluoride, plasma (umol/L) average 2 values",
"Fluoride, water (mg/L) average 2 values",
"Direct HDL-Cholesterol (mg/dL)",
"Direct HDL-Cholesterol (mmol/L)",
"Hepatitis A antibody",
"Hepatitis B Surface Antibody",
"Hepatitis B core antibody",
"Hepatitis B surface antigen",
"Hepatitis D (anti-HDV)",
"Hepatitis C RNA (HCV-RNA)",
"Hepatitis C genotype",
"Hepatitis E IgG (anti-HEV)",
"Hepatitis E IgM (anti-HEV)",
"Herpes Simplex Virus Type 1",
"Herpes Simplex Virus Type 2",
"Glycohemoglobin (%)",
"HIV antibody test result",
"HPV High Globulin Band result",
"HPV Low Globulin Band result",
"HPV Type 6",
"HPV Type 11",
"HPV Type 16",
"HPV Type 18",
"HPV Type 26",
"HPV Type 31",
"HPV Type 33",
"HPV Type 35",
"HPV Type 39",
"HPV Type 40",
"HPV Type 42",
"HPV Type 45",
"HPV Type 51",
"HPV Type 52",
"HPV Type 53",
"HPV Type 54",
"HPV Type 55",
"HPV Type 56",
"HPV Type 58",
"HPV Type 59",
"HPV Type 61",
"HPV Type 62",
"HPV Type 64",
"HPV Type 66",
"HPV Type 67",
"HPV Type 68",
"HPV Type 69",
"HPV Type 70",
"HPV Type 71",
"HPV Type 72",
"HPV Type 73",
"HPV Type 81",
"HPV Type 82",
"HPV Type 83",
"HPV Type 84",
"HPV CP 6108",
"HPV Type IS39",
"Oral HPV Result",
"Cobas HPV Swab High Risk",
"Insulin (uU/mL)",
"Insulin (pmol/L)",
"Iodine, urine (ug/L)",
"Fasting Subsample 2 Year MEC Weight",
"Triglyceride (mg/dL)",
"Triglyceride (mmol/L)",
"LDL-cholesterol (mg/dL)",
"LDL-cholesterol (mmol/L)",
"Inorganic mercury, blood (ug/L)",
"Mercury, inorganic (umol/L )",
"Mercury, inorganic comment code",
"Mercury, ethyl (ug/L)",
"Mercury, ethyl comment code",
"Mercury, methyl (ug/L)",
"Mercury, methyl comment code",
"OGTT Subsample 2 Year MEC Weight",
"Two Hour Glucose(OGTT) (mg/dL)",
"Two Hour Glucose(OGTT) (mmol/L)",
"Glucose challenge Administer Time in minutes",
"Time from fast glucose & challenge(min)",
"Time from fasting glucose & OGTT (min)",
"Time from glucose challenge & OGTT(min)",
"Amount of glucose challenge drank",
"Total length of 'food fast', hours",
"Total length of 'food fast', minutes",
"Incomplete OGTT Comment Code",
"1-Hydroxynaphthalene (ng/L)",
"1-Hydroxynaphthalene Comment Code",
"2-Hydroxynaphthalene (ng/L)",
"2-Hydroxynaphthalene Comment Code",
"3-Hydroxyfluorene (ng/L)",
"3-Hydroxyfluorene Comment Code",
"2-Hydroxyfluorene (ng/L)",
"2-Hydroxyfluorene Comment Code",
"1-Hydroxyphenanthrene (ng/L)",
"1-Hydroxyphenanthrene Comment Code",
"1-Hydroxypyrene (ng/L)",
"1-Hydroxypyrene Comment Code",
"2-Hydroxyphenanthrene (ng/L) & 3-Hydroxyphenanthrene (ng/L)",
"2-Hydroxyphenanthrene (ng/L) & 3-Hydroxyphenanthrene Comment Code",
"Subsample A weights",
"Urinary perchlorate (ng/mL)",
"Urinary Perchlorate comment code",
"Urinary nitrate (ng/mL)",
"Urinary nitrate comment code",
"Urinary thiocyanate (ng/mL)",
"Urinary thiocyanate comment code",
"Pefluorodecanoic acid (ug/L)",
"Pefluorodecanoic acid comment code",
"Perfluorohexane sulfonic acid (ug/L)",
"Perfluorohexane sulfonic acid comment",
"2-(N-Methyl-perfluorooctane sulfonamido) acetic acid (ug/L)",
"2-(N-Methyl-perfluorooctane sulfonamido) acetic acid comment code",
"Perfluorobutane sulfonic acid (ug/L)",
"Perfluorobutane sulfonic acid comment code",
"Perfluoroheptanoic acid (ug/L)",
"Perfluoroheptanoic acid comment code",
"Perfluorononanoic acid (ug/L)",
"Perfluorononanoic acid comment code",
"Perfluoroundecanoic acid (ug/L)",
"Perfluoroundecanoic acid comment code",
"Perflurododecanoic acid (ug/L)",
"Perflurododecanoic acid comment code",
"Subsample B weights",
"Mono(carboxynonyl) Phthalate (ng/mL)",
"Mono(carboxynonyl) phthalate comment code",
"Mono(carboxyoctyl) Phthalate (ng/mL)",
"Mono(carboxyoctyl) phthalate comment code",
"Mono-2-ethyl-5-carboxypentyl phthalate (ng/mL)",
"Mono-2-ethyl-5-carboxypentyl phthalate comment code",
"Mono-n-butyl phthalate (ng/mL)",
"Mono-n-butyl phthalate comment code",
"Mono-(3-carboxypropyl) phthalate (ng/mL)",
"Mono-(3-carboxypropyl) phthalate comment code",
"Mono-ethyl phthalate (ng/mL)",
"Mono-ethyl phthalate comment code",
"Mono-(2-ethyl-5-hydroxyhexyl) phthalate (ng/mL)",
"Mono-(2-ethyl-5-hydroxyhexyl) phthalate comment code",
"Cyclohexane 1,2-dicarboxylic acid monohydroxy isononyl ester (ng/mL)",
"Cyclohexane 1,2-dicarboxylic acid monohydroxy isononyl ester comment code",
"Mono-(2-ethyl)-hexyl phthalate (ng/mL)",
"Mono-(2-ethyl)-hexyl phthalate comment code",
"Mono-isobutyl phthalate (ng/mL)",
"Mono-isobutyl phthalate comment code",
"Mono-isononyl phthalate (ng/mL)",
"Mono-isononyl phthalate comment code",
"Mono-(2-ethyl-5-oxohexyl) phthalate (ng/mL)",
"Mono-(2-ethyl-5-oxohexyl) phthalate comment code",
"Mono-benzyl phthalate (ng/mL)",
"Mono-benzyl phthalate comment code",
"Total Cholesterol( mg/dL)",
"Total Cholesterol( mmol/L)",
"Tissue transglutaminase(IgA-TTG)",
"Endomyseal antibody (IgA EMA)",
"Blood lead (ug/dL)",
"Blood lead (umol/L)",
"Blood lead comment code",
"Blood cadmium (ug/L)",
"Blood cadmium (umol/L)",
"Blood cadmium comment code",
"Blood mercury, total (ug/L)",
"Blood mercury, total (umol/L)",
"Blood mercury, total comment code",
"Blood selenium(ug/L)",
"Blood selenium (umol/L)",
"Blood selenium comment code",
"Blood manganese (ug/L)",
"Blood manganese (umol/L)",
"Blood manganese comment code",
"Trichomonas, urine",
"Urinary Arsenous acid (ug/L)",
"Urinary Arsenous acid comment code",
"Urinary Arsenic acid (ug/L)",
"Urinary Arsenic acid comment code",
"Urinary Arsenobetaine (ug/L)",
"Urinary Arsenobetaine comment code",
"Urinary Arsenocholine (ug/L)",
"Urinary Arsenocholine comment code",
"Urinary Dimethylarsinic acid (ug/L)",
"Urinary Dimethylarsinic acid comment code",
"Urinary Monomethylacrsonic acid (ug/L)",
"Urinary Monomethylacrsonic acid comment code",
"The volume of urine collection #1 (mL)",
"Urine #1 Flow Rate (mL/min)",
"The volume of urine collection #2 (mL)",
"Urine #2 Flow Rate (mL/min)",
"The volume of urine collection #3 (mL)",
"Urine #3 Flow Rate (mL/min)",
"Mercury, urine (ug/L)",
"Mercury, urine comment code",
"Barium, urine (ug/L)",
"Urinary Barium comment code",
"Cadmium, urine (ug/L)",
"Urinary Cadmium comment code",
"Cobalt, urine (ug/L)",
"Urinary Cobalt (ug/L) comment code",
"Cesium, urine (ug/L)",
"Urinary Cesium comment code",
"Molybdenum, urine (ug/L)",
"Urinary Molybdenum comment code",
"Manganese, urine (ug/L)",
"Urinary Mn comment code",
"Lead, urine (ug/L)",
"Urinary Lead comment code",
"Antimony, urine (ug/L)",
"Urinary Antimony comment code",
"Tin, urine (ug/L)",
"Tin comment code",
"Strontium, urine (ug/L)",
"Strontium comment code",
"Thallium, urine (ug/L)",
"Urinary Thallium comment code",
"Tungsten, urine (ug/L)",
"Urinary Tungsten comment code",
"Uranium, urinary (ug/L)",
"Urinary Uranium comment code",
"Pregnancy test result",
"Urinary arsenic, total (ug/L)",
"Vitamin B12(pg/mL)",
"Vitamin B12 (pmol/L)"]]


In [78]:
#Renames the columns in demographicDf

demographicDf = demographicDf.rename(columns=\
                                    {
"SDDSRVYR":"Data release cycle",
"RIDSTATR":"Interview and examination status of the participant.",
"RIAGENDR":"Gender of the participant.",
"RIDAGEYR":"Age in years of the participant at the time of screening. Individuals 80 and over are topcoded at 80 years of age.",
"RIDAGEMN":"Age in months of the participant at the time of screening. Reported for persons aged 24 months or younger at the time of exam (or screening if not examined).",
"RIDRETH1":"Recode of reported race and Hispanic origin information",
"RIDRETH3":"Recode of reported race and Hispanic origin information, with Non-Hispanic Asian Category",
"RIDEXMON":"Six month time period when the examination was performed - two categories: November 1 through April 30, May 1 through October 31.",
"RIDEXAGM":"Age in months of the participant at the time of examination. Reported for persons aged 19 years or younger at the time of examination.",
"DMQMILIZ":"{Have you/Has SP} ever served on active duty in the U.S. Armed Forces, military Reserves, or National Guard? (Active duty does not include training for the Reserves or National Guard, but does include activation, for service in the U.S. or in a foreign country, in support of military or humanitarian operations.)",
"DMQADFC":"Did {you/SP} ever serve in a foreign country during a time of armed conflict or on a humanitarian or peace-keeping mission? (This would include National Guard or reserve or active duty monitoring or conducting peace keeping operations in Bosnia and Kosovo, in the Sinai between Egypt and Israel, or in response to the 2004 tsunami or Haiti in 2010.)",
"DMDBORN4":"In what country {were you/was SP} born?",
"DMDCITZN":"{Are you/Is SP} a citizen of the United States? [Information about citizenship is being collected by the U.S. Public Health Service to perform health related research. Providing this information is voluntary and is collected under the authority of the Public Health Service Act. There will be no effect on pending immigration or citizenship petitions.]",
"DMDYRSUS":"Length of time the participant has been in the US.",
"DMDEDUC3":"What is the highest grade or level of school {you have/SP has} completed or the highest degree {you have/s/he has} received?",
"DMDEDUC2":"What is the highest grade or level of school {you have/SP has} completed or the highest degree {you have/s/he has} received?",
"DMDMARTL":"Marital status",
"RIDEXPRG":"Pregnancy status for females between 20 and 44 years of age at the time of MEC exam.",
"SIALANG":"Language of the Sample Person Interview Instrument",
"SIAPROXY":"Was a Proxy respondent used in conducting the Sample Person (SP) interview?",
"SIAINTRP":"Was an interpreter used to conduct the Sample Person (SP) interview?",
"FIALANG":"Language of the Family Interview Instrument",
"FIAPROXY":"Was a Proxy respondent used in conducting the Family Interview?",
"FIAINTRP":"Was an interpreter used to conduct the Family interview?",
"MIALANG":"Language of the MEC CAPI Interview Instrument",
"MIAPROXY":"Was a Proxy respondent used in conducting the MEC CAPI Interview?",
"MIAINTRP":"Was an interpreter used to conduct the MEC CAPI interview?",
"AIALANGA":"Language of the MEC ACASI Interview Instrument",
"DMDHHSIZ":"Total number of people in the Household",
"DMDFMSIZ":"Total number of people in the Family",
"DMDHHSZA":"Number of children aged 5 years or younger in the household",
"DMDHHSZB":"Number of children aged 6-17 years old in the household",
"DMDHHSZE":"Number of adults aged 60 years or older in the household",
"DMDHRGND":"HH reference person's gender",
"DMDHRAGE":"HH reference person's age in years",
"DMDHRBR4":"HH reference person's country of birth",
"DMDHREDU":"HH reference person's education level",
"DMDHRMAR":"HH reference person's marital status",
"DMDHSEDU":"HH reference person's spouse's education level",
"WTINT2YR":"Full sample 2 year interview weight.",
"WTMEC2YR":"Full sample 2 year MEC exam weight.",
"SDMVPSU":"Masked variance unit pseudo-PSU variable for variance estimation",
"SDMVSTRA":"Masked variance unit pseudo-stratum variable for variance estimation",
"INDHHIN2":"Total household income (reported as a range value in dollars)",
"INDFMIN2":"Total family income (reported as a range value in dollars)",
"INDFMPIR":"A ratio of family income to poverty guidelines."})

In [79]:
# Create a new dataframe with selected columns from the demographicDF 
demographicDf1 = demographicDf[["SEQN","Gender of the participant.", "Age in years of the participant at the time of screening. Individuals 80 and over are topcoded at 80 years of age." ,"Marital status" ,"Pregnancy status for females between 20 and 44 years of age at the time of MEC exam." , "Did {you/SP} ever serve in a foreign country during a time of armed conflict or on a humanitarian or peace-keeping mission? (This would include National Guard or reserve or active duty monitoring or conducting peace keeping operations in Bosnia and Kosovo, in the Sinai between Egypt and Israel, or in response to the 2004 tsunami or Haiti in 2010.)" , "What is the highest grade or level of school {you have/SP has} completed or the highest degree {you have/s/he has} received?" , "What is the highest grade or level of school {you have/SP has} completed or the highest degree {you have/s/he has} received?"]]

In [80]:
#Renames the columns in demographicDf1
demographicDf1 = demographicDf1.rename(columns=\
                                    {"Gender of the participant.": "Gender",\
                                      "Age in years of the participant at the time of screening. Individuals 80 and over are topcoded at 80 years of age.": "Age",\
                                      "Pregnancy status for females between 20 and 44 years of age at the time of MEC exam.": "Pregnancy_Status", \
                                      "Did {you/SP} ever serve in a foreign country during a time of armed conflict or on a humanitarian or peace-keeping mission? (This would include National Guard or reserve or active duty monitoring or conducting peace keeping operations in Bosnia and Kosovo, in the Sinai between Egypt and Israel, or in response to the 2004 tsunami or Haiti in 2010.)": "Served",\
                                      "What is the highest grade or level of school {you have/SP has} completed or the highest degree {you have/s/he has} received?": "Education1",\
                                      "What is the highest grade or level of school {you have/SP has} completed or the highest degree {you have/s/he has} received?": "Education2"
                                     
                                    })

In [92]:
#Combines medicationsDf with labsDf1
joinedDf = medicationsDf.join(labsDf1.set_index('SEQN'), on='SEQN', how='outer')

In [93]:
#Combines New dataframe "joinDF" with the demographicDf1
joinedDf2 = joinedDf.join(demographicDf1.set_index('SEQN'), on='SEQN', how='outer')

In [94]:
#Drops rows with duplicate SEQN values
joinedDf2 = joinedDf2.drop_duplicates(subset='SEQN',keep='last')

In [95]:
joinedDf2.to_csv("Combined_data.csv")