In [1]:
##Latest notebook visualizing results for Distribution Paper by Julia Geller
##Updated:  07/02/2022

# Required to access the database
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"


import sys
import numpy
numpy.set_printoptions(threshold=sys.maxsize)

# Data analysis tools
import pandas as pd
import numpy as np
import seaborn as sns

# Models available in our application
from datasets.models import RawFlower, RawUNM, RawDAR, RawNHANES_BIO
from django.contrib.auth.models import User
from datasets.models import RawDictionary


from datasets.models import RawNEU
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels

from api import adapters

In [2]:
##Create aliases
NEU_alias = 'PROTECT'
DAR_alias = 'NHBCS'
UNM_alias = 'Navajo'
NHANES_alias = 'NHANES' 
## Get the data

## NEU (Cohort 1)
df_NEU = adapters.neu.get_dataframe_orig()
df_NEU = df_NEU[df_NEU['TimePeriod']==2] # Visit 2

NEU_covars = adapters.neu.get_dataframe_covars()
df_NEU_covars = NEU_covars.merge(df_NEU, on = ['PIN_Patient','CohortType','TimePeriod']) #Merge the covariates
df_NEU['CohortType'] = NEU_alias

df_NEU_blod = adapters.neu.get_dataframe_BLOD()
df_NEU_blod['CohortType'] = NEU_alias
df_NEU_blod = df_NEU_blod[df_NEU_blod['TimePeriod']==2]


##DAR (Cohort 2)

df_DAR = adapters.dar.get_dataframe()

df_DAR_blod = adapters.dar.get_dataframe_BLOD()
df_DAR_blod['CohortType'] = DAR_alias

''''DAR_covars = adapters.dar.get_dataframe_covars()
df_DAR_covars = DAR_covars.merge(df_DAR, on = ['PIN_Patient','CohortType','TimePeriod']) #Merge the covariates
'''
df_DAR['CohortType'] = DAR_alias




df_UNM = adapters.unm.get_dataframe_orig()

df_UNM_blod = adapters.unm.get_dataframe_BLOD()
df_UNM_blod['CohortType'] = UNM_alias

UNM_covars = adapters.unm.get_dataframe_covars()
df_UNM_covars = UNM_covars.merge(df_UNM, on = ['PIN_Patient','CohortType','TimePeriod']) #Merge the covariates
df_UNM['CohortType'] = UNM_alias

'''
##!!DELETE FOR WHEN FINALIZING RESULTS 
df_UNM = df_NEU.copy()
df_UNM['CohortType'] = UNM_alias
df_UNM_blod = df_NEU_blod.copy()
df_UNM_blod['CohortType'] = UNM_alias
'''

##NHANES 

df_NHANES = adapters.nhanes.get_dataframe_orig()
df_NHANES = df_NHANES.rename_axis(None, axis=1)

df_NHANES_blod = adapters.nhanes.get_dataframe_orig_blod()

NHANES_covars = adapters.nhanes.get_dataframe_covars()
##df_NHANES_covars = NHANES_covars.merge(df_NHANES, on = ['PIN_Patient','CohortType','TimePeriod'])
df_NHANES['CohortType'] = NHANES_alias
df_NHANES_blod['CohortType'] = NHANES_alias



In [3]:
df_unm_lod = pd.DataFrame.from_records(
        RawUNM.objects.
        # exclude(Creat_Corr_Result__lt=-1000).
        # exclude(Creat_Corr_Result__isnull=True).
        values()
    )
df_unm_lod['LOD']=np.where(df_unm_lod['imputed'] == 1, df_unm_lod['Result'] * np.sqrt(2), np.nan)
df_unm_lod = df_unm_lod.loc[~df_unm_lod['LOD'].isna(), ['LOD', 'Analyte']].drop_duplicates()

In [4]:
df_neu_lod = pd.DataFrame.from_records(
        RawNEU.objects.
        # exclude(Creat_Corr_Result__lt=-1000).
        # exclude(Creat_Corr_Result__isnull=True).
        values()
    )
df_neu_lod2 = df_neu_lod.loc[~df_neu_lod['LOD'].isna(), ['LOD', 'Analyte']].drop_duplicates()



In [18]:
df_dar_lod = pd.DataFrame.from_records(
        RawDAR.objects.
        # exclude(Creat_Corr_Result__lt=-1000).
        # exclude(Creat_Corr_Result__isnull=True).
        values()
    )
c = df_dar_lod[[x for x in df_dar_lod.columns if 'IDL' in x]]
df_dar_lod_melted = pd.melt(c).drop_duplicates()
map_analyte = {'UIAS': 'iAs' ,
            'UASB': 'AsB', 
            'UAS3': 'AsIII', 
            'UAS5': 'AsV', 
            'UDMA': 'DMA', 
            'UMMA': 'MMA',
            'UBA': 'Ba', 
            'UAG': 'Ag', 
            'UAL': 'Al', 
            'UAS': 'As', 
            'UBE': 'Be', 
            'UCA': 'Ca', 
            'UCD': 'Cd', 
            'UCO': 'Co', 
            'UCR': 'Cr', 
            'UCS': 'Cs', 
            'UCU': 'Cu', 
            'UFE': 'Fe', 
            'UHG': 'Hg', 
            'UPO': 'K',
            'UMG': 'Mg', 
            'UMN': 'Mn', 
            'UMO': 'Mo', 
            'UNI': 'Ni', 
            'UPP': 'P', 
            'UPB': 'Pb',
            'USB': 'Sb', 
            'USE': 'Se', 
            'USI': 'Si', 
            'USN': 'Sn',
            'USR': 'Sr', 
            'UTL': 'Tl',
            'UUR': 'U', 
            'UTU': 'W', 
            'UZN': 'Zn', 
            'UVA': 'V'}
map_analyte_inv = {v:k for k, v in map_analyte.items()}
df_dar_lod_melted['Analyte'] = df_dar_lod_melted['variable'].apply(lambda x : map_analyte_inv[x.replace('_IDL', '')])
df_dar_lod_melted['LOD'] = df_dar_lod_melted['value']

In [19]:
df_NHANES_lod = pd.DataFrame.from_records(
        RawNHANES_LLOD.objects.
        # exclude(Creat_Corr_Result__lt=-1000).
        # exclude(Creat_Corr_Result__isnull=True).
        values()
    )
df_NHANES_lod

df_NHANES_lod['LOD'] = df_NHANES_lod['Value']

In [7]:
pd.set_option('display.max_rows', 500)

In [21]:
df_neu_lod2['Cohort'] = 'NEU'
df_unm_lod['Cohort'] = 'UNM'
df_NHANES_lod['Cohort'] = 'NHANES'
df_dar_lod_melted['Cohort'] = 'DAR'

In [23]:
keep_cols = ['Cohort','Analyte','LOD']

In [28]:
all_dfs = [df_neu_lod2[keep_cols],
df_unm_lod[keep_cols],
df_NHANES_lod[keep_cols],
df_dar_lod_melted[keep_cols]]

[print(x.columns) for x in all_dfs]

Index(['Cohort', 'Analyte', 'LOD'], dtype='object')
Index(['Cohort', 'Analyte', 'LOD'], dtype='object')
Index(['Cohort', 'Analyte', 'LOD'], dtype='object')
Index(['Cohort', 'Analyte', 'LOD'], dtype='object')


[None, None, None, None]

In [33]:
pd.concat(all_dfs).to_csv('all_LODS_by_cohort.csv', index = False)