Skip to content

Depression play

Asif Tamuri edited this page Jun 14, 2018 · 8 revisions

This is a standalone implementation of the depression module in order to explore different approaches.

from datetime import datetime

from pandas import Timestamp as Date
from pandas.tseries.offsets import DateOffset
import pandas as pd

import numpy as np

population_size = 10000

def get_population_props():
    """Returns a dummy population Dataframe ready for depression module. Some
    properties are randomly assigned"""
    properties = [ 'is_depressed', 'date_init_depression', 'date_depression_resolved',
            'ever_depressed', 'prob_3m_resol_depression', 'is_pregnant', 'female',
            'male', 'date_of_birth', 'has_hyptension', 'has_chronic_back_pain']
    df = pd.DataFrame(index=range(0, population_size), columns=properties)
    df.index.name = 'person'
    df.loc[:, 'is_depressed'] = False
    df.loc[:, 'ever_depressed'] = False
    df.loc[:, 'is_pregnant'] = False
    df.loc[:, 'female'] = rng.choice([True, False], replace=True, size=len(df))
    df.loc[:, 'male'] = ~df['female']
    df.loc[:, 'date_of_birth'] = df.apply(lambda x:datetime(rng.randint(1950, 2010), rng.randint(1, 12), rng.randint(1, 28)), axis=1)
    df.loc[:, 'has_hyptension'] = rng.choice([True, False], replace=True, size=len(df), p=[0.1, 0.9])
    df.loc[:, 'has_chronic_back_pain'] = rng.choice([True, False], replace=True, size=len(df), p=[0.1, 0.9])
    return df

params = {
    'base_3m_prob_depression': 0.0001,
    'rr_depression_low_ses': 3,
    'rr_depression_chron_cond': 1.25,
    'rr_depression_pregnancy': 3,
    'rr_depression_female': 1.5,
    'rr_depression_prev_episode': 50,
    'rr_depression_age_15_20': 1,
    'rr_depression_age_60plus': 3,
    'depression_resolution_rates': [0.2, 0.3, 0.5, 0.7, 0.95],
    'rr_resol_depress_chron_cond': 0.75
}

def single_step(df, now):
    """A single step of the depression module"""
    depressed = (df.is_depressed).copy()

    ago_15yr = now - DateOffset(years=15)
    ago_20yr = now - DateOffset(years=20)
    ago_60yr = now - DateOffset(years=60)

    # calculate the effective probability of depression for not-depressed persons
    effective_prob_depression = pd.Series(params['base_3m_prob_depression'], index=df[~df.is_depressed].index)
    effective_prob_depression.loc[df.is_pregnant] *= params['rr_depression_pregnancy']
    effective_prob_depression.loc[~df.ever_depressed] *= params['rr_depression_prev_episode']
    effective_prob_depression.loc[df.date_of_birth.between(ago_20yr, ago_15yr)] *= params['rr_depression_age_15_20']
    effective_prob_depression.loc[(df.date_of_birth > ago_60yr)] *= params['rr_depression_age_60plus']
    effective_prob_depression.loc[df.female] *= params['rr_depression_female']
    effective_prob_depression.loc[df.has_hyptension & df.has_chronic_back_pain] *= params['rr_depression_chron_cond']

    # this is only for not_depressed in population
    is_newly_depressed = effective_prob_depression > rng.rand(len(effective_prob_depression))
    newly_depressed = is_newly_depressed[is_newly_depressed == True].index
    print('\tnewly depressed:', len(newly_depressed))
    df.loc[newly_depressed, 'is_depressed'] = True
    df.loc[newly_depressed, 'ever_depressed'] = True
    df.loc[newly_depressed, 'data_init_depression'] = now
    df.loc[newly_depressed, 'data_depression_resolved'] = None
    df.loc[newly_depressed, 'prob_3m_resol_depression'] = rng.choice(params['depression_resolution_rates'], size=len(newly_depressed))

    # continuation or resolution of depression
    effective_prob_recover = pd.Series(df.loc[:, 'prob_3m_resol_depression'], index=df[depressed].index)
    effective_prob_recover[df.has_hyptension & df.has_chronic_back_pain] *= params['rr_resol_depress_chron_cond']
    is_resol_depression = effective_prob_recover > rng.rand(len(effective_prob_recover))
    resolved_depress = is_resol_depression[is_resol_depression == True].index
    df.loc[resolved_depress, 'is_depressed'] = False
    df.loc[resolved_depress, 'date_depression_resolved'] = now
    df.loc[resolved_depress, 'date_init_depression'] = None
    print('\tno longer depressed', len(resolved_depress))
    print('\ttotal depressed', sum(df.is_depressed))

# Run the depression module for three 3-month events
rng = np.random.RandomState(seed=123456789)
population = get_population_props()
now = pd.to_datetime('today')
for timestep in range(0,3):
    print(now)
    single_step(population, now)
    now = now + pd.DateOffset(months=3)

Output should be (dates will vary):

2018-06-14 11:00:27.515949
        new depressed: 158
        no longer depressed 0
        total depressed 158
2018-09-14 11:00:27.515949
        new depressed: 175
        no longer depressed 82
        total depressed 251
2018-12-14 11:00:27.515949
        new depressed: 175
        no longer depressed 128
        total depressed 298