In [1]:
import os
import copy
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from scipy.stats import multivariate_normal

from microsim.population import NHANESDirectSamplePopulation, build_people_using_nhanes_for_sampling
from microsim.gender import NHANESGender
from microsim.smoking_status import SmokingStatus
from microsim.race_ethnicity import NHANESRaceEthnicity
from microsim.treatment import DefaultTreatmentsType
from microsim.risk_factor import StaticRiskFactorsType, DynamicRiskFactorsType

microsimDir = "/Users/deligkaris.1/OneDrive - The Ohio State University Wexner Medical Center/MICROSIM"
os.chdir(microsimDir+"/CODE/microsim")

popSize = 500000

In [None]:
if True:
    pop = NHANESDirectSamplePopulation(popSize, 1999)
else:
    pop = NHANESDirectSamplePopulation(2, 1999)
    nhanesDf = pd.read_stata(microsimDir + "/microsim/data/fullyImputedDataset.dta")
    pop._people = build_people_using_nhanes_for_sampling(nhanesDf,popSize)

In [None]:
pop.advance(100, None, nWorkers=1)

In [None]:
srfList = [StaticRiskFactorsType.GENDER.value, StaticRiskFactorsType.SMOKING_STATUS.value, 
           StaticRiskFactorsType.RACE_ETHNICITY.value]

dtList = [DefaultTreatmentsType.STATIN.value]

drfList = [DynamicRiskFactorsType.AGE.value, DynamicRiskFactorsType.SBP.value, 
           DynamicRiskFactorsType.DBP.value, DynamicRiskFactorsType.BMI.value]

attrDict = {"staticRiskFactors": srfList,
            "dynamicRiskFactors": drfList,
            "defaultTreatments": dtList}

In [None]:
def get_all_person_years_as_df(pop, attrDict):
    """The attribute dictionary dictates what information will be returned in the dataframe.
    TO DO: define a default attrDict if the user wants everything to be returned."""
    
    srfList = attrDict["staticRiskFactors"]
    drfList = attrDict["dynamicRiskFactors"]
    dtList = attrDict["defaultTreatments"]
    columnNames = [col for sublist in list(attrDict.values()) for col in sublist]
    nestedList = list(map(lambda x: 
                          list(zip(*[
                              *[[getattr(x, "_"+attr)]*(x._waveCompleted+1) for attr in srfList],
                              *[getattr(x,"_"+attr) for attr in drfList],
                              *[getattr(x,"_"+attr) for attr in dtList]])), 
                          pop._people))
    df = pd.concat([pd.DataFrame(nestedList[i], columns=columnNames) for i in range(len(nestedList))], ignore_index=True)
    return df

In [None]:
df = get_all_person_years_as_df(pop, attrDict)

In [None]:
df.head()

In [None]:
df.to_csv(microsimDir+"/NOTEBOOKS/DATA/nhanes-normality-test.csv", index=False)

In [None]:
#I may need to move these to the microsim code

In [None]:
def get_last_attribute(person, attr):
    if (attr in [x.value for x in DynamicRiskFactorsType]) | (attr in [x.value for x in DefaultTreatmentsType]) :
        return getattr(person, "_"+attr)[-1]
    elif attr in [x.value for x in StaticRiskFactorsType]:
        return getattr(person, "_"+attr)
    else: 
        raise RuntimeError(f"Unknown Person object attribute: {attr}")

In [None]:
def filter_people(people, lambdaFilter=lambda x: True):
    return pd.Series(list(filter(lambdaFilter, people))) 

In [None]:
def filter_people_dict(people, filterDict):
    for attr,value in filterDict.items():
        people = filter_people(people, lambda x: get_last_attribute(x,attr)==value)
        #print(attr, people.shape)
    return people

In [None]:
def get_pop_filter(gender, race, smoking, statin, anyPhysicalActivity):
    return {StaticRiskFactorsType.GENDER.value: gender,
            StaticRiskFactorsType.RACE_ETHNICITY.value: race,
            StaticRiskFactorsType.SMOKING_STATUS.value: smoking,
            DynamicRiskFactorsType.ANY_PHYSICAL_ACTIVITY.value: anyPhysicalActivity,
            DefaultTreatmentsType.STATIN.value: statin}

In [None]:
data = np.array(list(zip(ageList,sbpList)))

In [None]:
mu = np.mean(data, axis=0)
covariance = np.cov(data.T)

In [None]:
dist = multivariate_normal(mu, covariance)

In [None]:
dist.

In [None]:
data[0]

In [None]:
#need only male and female
for gender in NHANESGender:
    #need to include asian
    for race in [race for race in NHANESRaceEthnicity if race!=NHANESRaceEthnicity.MEXICAN_AMERICAN]:
        #need only never and ever
        for smoking in [SmokingStatus.NEVER, SmokingStatus.FORMER]:
            for statin in [True, False]:
                for anyPhysicalActivity in [True, False]:
                    filterDict = get_pop_filter(gender, race, smoking, statin, anyPhysicalActivity)
                    filteredPeople = filter_people_dict(pop.get_people_copy(), filterDict)
                    names = list(map( lambda x: x._name, filteredPeople))
                    nameSet = set(names)
                    ageList = list(map(lambda x: x._age[-1], filteredPeople))
                    sbpList = list(map(lambda x: x._sbp[-1], filteredPeople))
                    dbpList = list(map(lambda x: x._dbp[-1], filteredPeople))
                    bmiList = list(map(lambda x: x._bmi[-1], filteredPeople))
                    #data = np.array(list(zip(ageList,sbpList)))
                    #mu = np.mean(data, axis=0)
                    #covariance = np.cov(data.T)
                    #dist = multivariate_normal()
                    #dist.fit(data)
                    #plt.hist2d(dbpList, bmiList, bins=8)
                    #plt.show()
                    print(gender.value, race.value, smoking.value, statin, anyPhysicalActivity, filteredPeople.shape[0], len(nameSet))

In [6]:
os.chdir(microsimDir)
nhanesDf = pd.read_stata(microsimDir + "/CODE/microsim/microsim/data/fullyImputedDataset.dta")

In [7]:
nhanesDf["antiHypertensive"].value_counts()

0.0    39933
1.0    10984
2.0     5239
3.0     2133
4.0      715
5.0      169
6.0       26
7.0        5
Name: antiHypertensive, dtype: int64

In [None]:
filterDict = get_pop_filter(NHANESGender.MALE, 
                            NHANESRaceEthnicity.NON_HISPANIC_WHITE,
                            SmokingStatus.NEVER,
                            False,
                            True)

In [None]:
filteredPeople = filter_people_dict(pop._people, filterDict)

In [None]:
filteredPeople.shape