In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ~/demres

/Users/zurfarosa/demres


In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
import pylab as pl
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [4]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [5]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [6]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [7]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [19]:
pt_features.sort_values(by='benzo_and_z_drugs_100_pdds',ascending=False)

Unnamed: 0,patid,yob,pracid,female,index_date,isCase,final dementia medcode,data_end,data_start,matchid,reason_for_removal,age_at_index_date,insomnia,stroke,non_stroke_vascular_disease,hypertension,diabetes,mental_illness_non_smi,mental_illness_smi,sleep_apnoea,chronic_pulmonary_disease,epilepsy,mood_stabilisers_100_pdds,benzo_and_z_drugs_100_pdds,other_sedatives_100_pdds,antipsychotics_100_pdds,depot_antipsychotics_100_pdds,antidepressants_100_pdds,non_insomnia_GP_consultations,benzo_and_z_drugs_any,insomnia_any,insomnia_count:0,insomnia_count:1_5,insomnia_count:above_5,non_insomnia_GP_consultations:0,non_insomnia_GP_consultations:1_10,non_insomnia_GP_consultations:11_100,non_insomnia_GP_consultations:101_1000,non_insomnia_GP_consultations:above_1000,age_at_index_date:65-69,age_at_index_date:70-74,age_at_index_date:75-79,age_at_index_date:80-84,age_at_index_date:85-89,age_at_index_date:90-99,age_at_index_date:above_99,antidepressant_pdds:00000,antidepressant_pdds:00001_10,antidepressant_pdds:00011_100,antidepressant_pdds:00101_1000,antidepressant_pdds:01001_10000,antidepressant_pdds:10000_and_above,antidepressant_pdds:10000_and_above.1,antipsychotic_pdds:00000,antipsychotic_pdds:00001_10,antipsychotic_pdds:00011_100,antipsychotic_pdds:00101_1000,antipsychotic_pdds:01001_10000,antipsychotic_pdds:10000_and_above,antipsychotic_pdds:10000_and_above.1,depot_antipsychotic_pdds:00000,depot_antipsychotic_pdds:00001_10,depot_antipsychotic_pdds:00011_100,depot_antipsychotic_pdds:00101_1000,depot_antipsychotic_pdds:01001_10000,depot_antipsychotic_pdds:10000_and_above,depot_antipsychotic_pdds:10000_and_above.1,other_sedative_pdds:00000,other_sedative_pdds:00001_10,other_sedative_pdds:00011_100,other_sedative_pdds:00101_1000,other_sedative_pdds:01001_10000,other_sedative_pdds:10000_and_above,other_sedative_pdds:10000_and_above.1,benzo_and_z_drug_pdds:00000,benzo_and_z_drug_pdds:00001_10,benzo_and_z_drug_pdds:00011_100,benzo_and_z_drug_pdds:00101_1000,benzo_and_z_drug_pdds:01001_10000,benzo_and_z_drug_pdds:10000_and_above,benzo_and_z_drug_pdds:10000_and_above.1,mood_stabiliser_pdds:00000,mood_stabiliser_pdds:00001_10,mood_stabiliser_pdds:00011_100,mood_stabiliser_pdds:00101_1000,mood_stabiliser_pdds:01001_10000,mood_stabiliser_pdds:10000_and_above,mood_stabiliser_pdds:10000_and_above.1,intercept
26433,20376223,22,223,1,2006-10-18,True,4693.0,2010-03-30,1992-06-30,34115,,84,7,0,1,1,0,1,1,0,0,0,0.000000,0.762973,3.323259,0.995541,0.000000,3.210739,191,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0
3780,7874206,15,206,1,2003-06-11,False,,2005-10-13,1993-03-31,60507,,88,0,0,0,0,0,1,0,0,0,0,6.591439,0.663852,0.000000,0.116023,0.000000,3.312281,177,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,1.0
22464,8687191,27,191,1,2002-11-28,False,,2007-04-19,1991-01-31,12342,,75,5,1,1,0,0,1,0,0,1,0,0.000000,0.652575,0.000000,0.232046,0.000000,2.367624,196,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0
25044,11222223,28,223,1,2008-10-08,True,8634.0,2009-02-05,1992-01-31,33993,,80,2,1,1,1,0,1,0,0,1,0,0.000000,0.589066,0.000000,0.116023,0.000000,2.108387,332,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0
4083,13565005,24,5,0,2008-01-05,False,,2010-06-30,1997-11-21,84109,,84,0,0,0,0,0,1,0,0,0,0,0.000000,0.576549,0.000000,0.000000,0.000000,0.000000,55,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0
25998,11106223,24,223,1,2006-04-24,True,1350.0,2010-07-16,1993-01-31,33990,,82,0,0,0,0,0,1,0,0,0,1,3.295719,0.571706,0.201268,0.000000,0.000000,0.422395,143,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,1.0
7428,5396004,8,4,1,2002-12-18,False,,2005-12-29,1992-09-30,91142,,94,0,0,0,0,0,0,0,0,0,0,0.000000,0.568812,0.000000,0.000000,0.000000,0.000000,92,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0
30397,1822269,9,269,1,2002-07-23,True,1916.0,2010-06-07,1987-01-31,41388,,93,0,0,1,1,0,1,1,0,1,0,0.000000,0.568812,0.000000,0.188563,0.000000,0.000000,118,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0
11672,2226251,20,251,1,2007-05-27,True,6578.0,2009-09-17,1995-10-31,38371,,87,0,0,1,0,0,1,0,0,0,0,4.642933,0.547904,0.473829,0.671703,0.000000,6.186451,212,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,1.0
895,1165322,20,322,1,2003-10-22,True,7323.0,2003-10-29,1992-01-31,49026,,83,0,1,1,1,0,1,0,0,1,0,0.000000,0.539009,0.000000,0.000000,0.000000,0.658551,220,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,,0.0,1.0


In [9]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'female', 'index_date', 'isCase',
       'final dementia medcode', 'data_end', 'data_start', 'matchid',
       'reason_for_removal', 'age_at_index_date', 'insomnia', 'stroke',
       'non_stroke_vascular_disease', 'hypertension', 'diabetes',
       'mental_illness_non_smi', 'mental_illness_smi', 'sleep_apnoea',
       'chronic_pulmonary_disease', 'epilepsy', 'mood_stabilisers_100_pdds',
       'benzo_and_z_drugs_100_pdds', 'other_sedatives_100_pdds',
       'antipsychotics_100_pdds', 'depot_antipsychotics_100_pdds',
       'antidepressants_100_pdds', 'non_insomnia_GP_consultations',
       'benzo_and_z_drugs_any', 'insomnia_any', 'insomnia_count:0',
       'insomnia_count:1_5', 'insomnia_count:above_5',
       'non_insomnia_GP_consultations:0', 'non_insomnia_GP_consultations:1_10',
       'non_insomnia_GP_consultations:11_100',
       'non_insomnia_GP_consultations:101_1000',
       'non_insomnia_GP_consultations:above_1000', 'age_at_index_date:65-69',


In [10]:
pt_features['intercept'] = 1.0

In [11]:
# pt_features.sort_values(by='insomnia_count<=10',ascending=False)

In [12]:
columns_always_for_inclusion = [
    'female',
    'non_insomnia_GP_consultations', 'stroke',
    'non_stroke_vascular_disease', 'hypertension',
    'diabetes', 'mental_illness_non_smi', 'mental_illness_smi',
    'sleep_apnoea', 'chronic_pulmonary_disease', 'epilepsy',
    'mood_stabilisers_100_pdds',
    'other_sedatives_100_pdds', 'antidepressants_100_pdds', 
    'depot_antipsychotics_100_pdds', 'antipsychotics_100_pdds',
    'benzo_and_z_drugs_100_pdds'
]

## Insomnia consultations

### insomnia yes/no

In [13]:
cols_for_inclusion_here = ['insomnia_any']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
stroke  being retained as mean > 0
non_stroke_vascular_disease  being retained as mean > 0
hypertension  being retained as mean > 0
diabetes  being retained as mean > 0
mental_illness_non_smi  being retained as mean > 0
mental_illness_smi  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
depot_antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
non_insomnia_GP_consultations  being retained as mean > 0
insomnia_any  being retained as mean > 0


*Stage 1*
Univariate results
                               Univariate OR  p value  [0.025  0.975]
antidepressants_100_pdds               1.

In [14]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,p value,[0.025,0.975],Multivariate OR,p value.1,[0.025.1,0.975].1
antidepressants_100_pdds,1.371,0.0,1.304,1.441,1.128,0.0,1.071,1.188
chronic_pulmonary_disease,1.373,0.0,1.304,1.445,1.168,0.0,1.101,1.238
diabetes,1.386,0.0,1.289,1.49,1.09,0.035,1.006,1.182
epilepsy,1.745,0.0,1.441,2.114,1.288,0.014,1.053,1.575
female,1.0,1.0,0.973,1.028,0.678,0.0,0.651,0.707
hypertension,1.081,0.0,1.046,1.118,0.875,0.0,0.835,0.917
insomnia_any,1.583,0.0,1.441,1.74,1.181,0.001,1.068,1.306
mental_illness_non_smi,1.581,0.0,1.516,1.649,1.487,0.0,1.41,1.568
mood_stabilisers_100_pdds,1.058,0.0,1.038,1.078,1.014,0.134,0.996,1.034
non_insomnia_GP_consultations,1.002,0.0,1.001,1.002,1.002,0.0,1.002,1.002


### insomnia (continuous variable)

In [15]:
cols_for_inclusion_here = ['insomnia','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
insomnia  being retained as mean > 0
stroke  being retained as mean > 0
non_stroke_vascular_disease  being retained as mean > 0
hypertension  being retained as mean > 0
diabetes  being retained as mean > 0
mental_illness_non_smi  being retained as mean > 0
mental_illness_smi  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
depot_antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
non_insomnia_GP_consultations  being retained as mean > 0


*Stage 1*
Univariate results
                               Univariate OR  p value  [0.025  0.975]
antidepressants_100_pdds               1.371 

In [16]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,p value,[0.025,0.975],Multivariate OR,p value.1,[0.025.1,0.975].1
antidepressants_100_pdds,1.371,0.0,1.304,1.441,1.132,0.0,1.074,1.194
benzo_and_z_drugs_100_pdds,6.751,0.0,4.428,10.293,0.893,0.647,0.549,1.452
chronic_pulmonary_disease,1.373,0.0,1.304,1.445,1.168,0.0,1.101,1.238
diabetes,1.386,0.0,1.289,1.49,1.088,0.04,1.004,1.18
epilepsy,1.745,0.0,1.441,2.114,1.291,0.013,1.056,1.579
female,1.0,1.0,0.973,1.028,0.679,0.0,0.652,0.708
hypertension,1.081,0.0,1.046,1.118,0.874,0.0,0.834,0.916
insomnia,1.179,0.0,1.13,1.231,1.068,0.001,1.027,1.111
mental_illness_non_smi,1.581,0.0,1.516,1.649,1.491,0.0,1.414,1.573
mood_stabilisers_100_pdds,1.058,0.0,1.038,1.078,1.014,0.143,0.995,1.033


### insomnia quantiles

In [17]:
cols_for_inclusion_here = ['insomnia_count:1_5','insomnia_count:above_5','benzo_and_z_drugs_100_pdds']
training_cols = [col for col in pt_features.columns if col in cols_for_inclusion_here+columns_always_for_inclusion]
summary_table,univariate_and_multivariate_results = purposefully_select_covariates(pt_features,covariates=training_cols,main_variables=cols_for_inclusion_here)

female  being retained as mean > 0
stroke  being retained as mean > 0
non_stroke_vascular_disease  being retained as mean > 0
hypertension  being retained as mean > 0
diabetes  being retained as mean > 0
mental_illness_non_smi  being retained as mean > 0
mental_illness_smi  being retained as mean > 0
sleep_apnoea  being retained as mean > 0
chronic_pulmonary_disease  being retained as mean > 0
epilepsy  being retained as mean > 0
mood_stabilisers_100_pdds  being retained as mean > 0
benzo_and_z_drugs_100_pdds  being retained as mean > 0
other_sedatives_100_pdds  being retained as mean > 0
antipsychotics_100_pdds  being retained as mean > 0
depot_antipsychotics_100_pdds  being retained as mean > 0
antidepressants_100_pdds  being retained as mean > 0
non_insomnia_GP_consultations  being retained as mean > 0
insomnia_count:1_5  being retained as mean > 0
insomnia_count:above_5  being retained as mean > 0


*Stage 1*
Univariate results
                               Univariate OR  p value 

In [18]:
univariate_and_multivariate_results

Unnamed: 0,Univariate OR,p value,[0.025,0.975],Multivariate OR,p value.1,[0.025.1,0.975].1
antidepressants_100_pdds,1.371,0.0,1.304,1.441,1.131,0.0,1.073,1.193
benzo_and_z_drugs_100_pdds,6.751,0.0,4.428,10.293,0.851,0.522,0.52,1.394
chronic_pulmonary_disease,1.373,0.0,1.304,1.445,1.167,0.0,1.101,1.238
diabetes,1.386,0.0,1.289,1.49,1.089,0.038,1.005,1.181
epilepsy,1.745,0.0,1.441,2.114,1.288,0.014,1.053,1.575
female,1.0,1.0,0.973,1.028,0.679,0.0,0.651,0.708
hypertension,1.081,0.0,1.046,1.118,0.874,0.0,0.835,0.916
insomnia_count:1_5,1.55,0.0,1.408,1.706,1.174,0.003,1.055,1.305
insomnia_count:above_5,2.667,0.0,1.615,4.403,1.803,0.025,1.077,3.018
mental_illness_non_smi,1.581,0.0,1.516,1.649,1.489,0.0,1.411,1.57
