In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
cd ~/demres

/Users/zurfarosa/demres


In [22]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tools.tools import add_constant
import pylab as pl
from datetime import date, timedelta

import demres
from demres.common.constants import entry_type
from demres.demins.constants import Study_Design as sd
from demres.common import codelists
from demres.common.helper_functions import *
from demres.demins.statistical_functions import *

In [23]:
pd.set_option('display.max_columns', None)

## Specifiy dementia subtype

In [24]:
subtype = 'all_dementia' # options: 'alzheimers', 'vascular','all_dementia'

## Specify exposure window

In [25]:
window = '10_to_5' #options: '12_to_7','10_to_5','8_to_3'

## Load relevant dataframe and create intercept

In [26]:
pt_features = pd.read_csv('data/pt_data/processed_data/pt_features_demins_' + subtype + '_' + window +'.csv',delimiter=',',parse_dates=['index_date','data_end','data_start'],infer_datetime_format=True)

In [27]:
pt_features.columns

Index(['patid', 'yob', 'pracid', 'male', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'age_at_index_date', 'mood_stabilisers_100_pdds',
       'benzo_and_z_drugs_100_pdds', 'benzo_and_z_drugs<1096',
       'benzo_and_z_drugs>1096', 'benzo_and_z_drugs_never_used',
       'other_sedatives_100_pdds', 'fgas_100_pdds', 'sgas_100_pdds',
       'sga_depots_100_pdds', 'fga_depots_100_pdds',
       'antidepressants_100_pdds', 'non_insomnia_GP_consultations', 'insomnia',
       'insomnia_consultations', 'stroke', 'intellectual_disability',
       'CHD_heart_failure_and_peripheral_vascular_disease', 'hypertension',
       'diabetes', 'clin_sig_alcohol_use', 'mental_illness_non_smi',
       'mental_illness_smi', 'sleep_apnoea', 'current_smoker',
       'chronic_pulmonary_disease', 'CKD', 'epilepsy', 'COPD', 'asthma',
       '100_non_insomnia_GP_consultations'],
      dtype='object')

In [28]:
pt_features['intercept'] = 1.0

In [30]:
pt_features

Unnamed: 0,patid,yob,pracid,male,index_date,isCase,final dementia medcode,data_start,data_end,matchid,age_at_index_date,mood_stabilisers_100_pdds,benzo_and_z_drugs_100_pdds,benzo_and_z_drugs<1096,benzo_and_z_drugs>1096,benzo_and_z_drugs_never_used,other_sedatives_100_pdds,fgas_100_pdds,sgas_100_pdds,sga_depots_100_pdds,fga_depots_100_pdds,antidepressants_100_pdds,non_insomnia_GP_consultations,insomnia,insomnia_consultations,stroke,intellectual_disability,CHD_heart_failure_and_peripheral_vascular_disease,hypertension,diabetes,clin_sig_alcohol_use,mental_illness_non_smi,mental_illness_smi,sleep_apnoea,current_smoker,chronic_pulmonary_disease,CKD,epilepsy,COPD,asthma,100_non_insomnia_GP_consultations,intercept
0,6807236,30,236,1,2006-11-06,True,26270.0,1996-11-05,2006-11-17,35945,76,0.000000,0.000000,0,0,1,0.000000,0.000000,0.000000,0.0,0.0,0.000000,7,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1.0
1,2275105,27,105,0,2006-04-25,True,1917.0,1996-04-26,2006-05-09,14685,79,0.000000,0.000000,0,0,1,0.000000,0.000000,0.000000,0.0,0.0,0.000000,53,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,1.0
2,12440330,25,330,1,2010-03-30,True,4693.0,2000-03-22,2010-04-09,50156,85,0.000000,0.903928,1,0,0,0.000000,0.142054,0.000000,0.0,0.0,1.467862,170,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,17,1.0
3,16055443,27,443,1,2010-12-20,True,1350.0,2000-12-15,2011-01-13,65609,83,0.000000,0.000000,0,0,1,0.000000,0.000000,0.000000,0.0,0.0,0.000000,215,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,21,1.0
4,2994148,19,148,1,2007-02-15,True,1916.0,1997-01-20,2007-02-21,21680,88,0.000000,11.299105,0,1,0,0.000000,0.000000,0.000000,0.0,0.0,0.000000,58,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,1.0
5,6726262,33,262,0,2007-02-23,True,9509.0,1997-02-09,2007-03-23,40437,74,0.000000,0.000000,0,0,1,0.000000,0.000000,0.000000,0.0,0.0,0.000000,240,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,24,1.0
6,706017,25,17,1,2008-05-22,True,4693.0,1998-04-18,2008-06-05,2566,83,0.000000,0.000000,0,0,1,0.000000,0.000000,0.000000,0.0,0.0,0.000000,215,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,21,1.0
7,14778043,21,43,0,2006-01-31,True,1350.0,1996-01-05,2006-02-23,6184,85,0.000000,0.000000,0,0,1,0.000000,0.000000,0.000000,0.0,0.0,0.000000,24,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,1.0
8,1451179,31,179,0,2006-09-20,True,1350.0,1996-08-09,2006-09-29,26924,75,0.000000,0.000000,0,0,1,0.000000,0.000000,0.000000,0.0,0.0,0.000000,107,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,10,1.0
9,6140152,31,152,0,2010-04-09,True,6578.0,2000-04-03,2010-05-24,22083,79,0.000000,0.000000,0,0,1,0.000000,0.000000,0.000000,0.0,0.0,0.404582,177,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,1.0


## Insomnia consultations

In [36]:
columns_not_for_inclusion = [
        'patid', 'yob', 'pracid', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'insomnia',
        'epilepsy',
#         'non_insomnia_GP_consultations',
        'CKD',
#         'chronic_pulmonary_disease',
        'COPD','asthma',
       'benzo_and_z_drugs_100_pdds', 'benzo_and_z_drugs<1096',
       'benzo_and_z_drugs>1096', 'benzo_and_z_drugs_never_used'
]

training_cols = [col for col in pt_features.columns if col not in columns_not_for_inclusion]
# training_cols
training_cols.insert(0, training_cols.pop(training_cols.index('insomnia_consultations')))

In [37]:
pt_features.sample(4)

Unnamed: 0,patid,yob,pracid,male,index_date,isCase,final dementia medcode,data_start,data_end,matchid,age_at_index_date,mood_stabilisers_100_pdds,benzo_and_z_drugs_100_pdds,benzo_and_z_drugs<1096,benzo_and_z_drugs>1096,benzo_and_z_drugs_never_used,other_sedatives_100_pdds,fgas_100_pdds,sgas_100_pdds,sga_depots_100_pdds,fga_depots_100_pdds,antidepressants_100_pdds,non_insomnia_GP_consultations,insomnia,insomnia_consultations,stroke,intellectual_disability,CHD_heart_failure_and_peripheral_vascular_disease,hypertension,diabetes,clin_sig_alcohol_use,mental_illness_non_smi,mental_illness_smi,sleep_apnoea,current_smoker,chronic_pulmonary_disease,CKD,epilepsy,COPD,asthma,100_non_insomnia_GP_consultations,intercept
9720,11023110,32,110,0,2009-05-05,True,1350.0,1997-01-22,2013-05-14,15361,77,0.0,0.0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,163,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,16,1.0
3758,425071,20,71,0,2010-05-14,False,,1999-12-09,2013-05-13,54825,90,0.0,0.0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,124,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,12,1.0
3316,5717484,29,484,1,2010-01-01,False,,1999-11-04,2013-03-18,70997,81,0.0,0.0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,259,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,25,1.0
6599,1527251,34,251,1,2010-01-07,True,4693.0,1999-01-27,2013-04-26,38354,76,0.0,0.0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,26,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1.0


In [38]:
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variable='insomnia_counts')

*Stage 1*
Univariate results
                                                   odds_ratio  p_value
insomnia_consultations                                 1.0696    0.004
male                                                   1.0000    1.000
age_at_index_date                                      1.0000    1.000
mood_stabilisers_100_pdds                              1.0381    0.000
other_sedatives_100_pdds                               1.0164    0.323
fgas_100_pdds                                          1.0403    0.011
sgas_100_pdds                                          1.1117    0.108
antidepressants_100_pdds                               1.0274    0.000
non_insomnia_GP_consultations                          1.0007    0.000
stroke                                                 1.2699    0.000
CHD_heart_failure_and_peripheral_vascular_disease      1.1441    0.001
hypertension                                           0.9559    0.122
diabetes                                        

In [39]:
summary_table

0,1,2,3
Dep. Variable:,isCase,No. Observations:,10592.0
Model:,Logit,Df Residuals:,10582.0
Method:,MLE,Df Model:,9.0
Date:,"Thu, 18 May 2017",Pseudo R-squ.:,0.01398
Time:,21:39:44,Log-Likelihood:,-7239.2
converged:,True,LL-Null:,-7341.8
,,LLR p-value:,2.5619999999999998e-39


In [40]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
insomnia_consultations,1.036,0.036,0.154,-0.013,0.085
mood_stabilisers_100_pdds,1.028,0.028,0.007,0.008,0.048
antidepressants_100_pdds,1.012,0.012,0.015,0.002,0.022
non_insomnia_GP_consultations,1.002,0.002,0.0,0.001,0.002
stroke,1.204,0.186,0.011,0.043,0.328
hypertension,0.828,-0.189,0.0,-0.27,-0.108
diabetes,1.145,0.135,0.055,-0.003,0.273
mental_illness_non_smi,1.433,0.36,0.0,0.271,0.448
current_smoker,0.895,-0.111,0.027,-0.21,-0.012
intercept,0.793,-0.231,0.0,-0.305,-0.158


## Benzo and z-drug PDDs

In [16]:
columns_not_for_inclusion = [
        'patid', 'yob', 'pracid', 'index_date', 'isCase',
       'final dementia medcode', 'data_start', 'data_end', 'matchid',
       'insomnia','insomnia_consultations',
        'epilepsy',
        'CKD',
#         'chronic_pulmonary_disease',
        'COPD','asthma',#        'benzo_and_z_drugs_100_pdds'
        'benzo_and_z_drugs<1096',
       'benzo_and_z_drugs>1096', 'benzo_and_z_drugs_never_used'
]

training_cols = [col for col in pt_features.columns if col not in columns_not_for_inclusion]
training_cols.insert(0, training_cols.pop(training_cols.index('benzo_and_z_drugs_100_pdds')))
# training_cols.insert(0, training_cols.pop(training_cols.index('benzo_and_z_drugs_more_than_1000pdds')))
# training_cols.insert(0, training_cols.pop(training_cols.index('benzo_and_z_drugs_101_to_1000pdds')))
# training_cols.insert(0, training_cols.pop(training_cols.index('benzo_and_z_drugs_1_to_100pdds')))

In [17]:
summary_table,result = purposefully_select_covariates(pt_features,covariates=training_cols,main_variable='benzo_and_z_drugs_100_pdds')

*Stage 1*
Univariate results
                                                   odds_ratio  p_value
benzo_and_z_drugs_100_pdds                             1.0147    0.004
male                                                   1.0000    1.000
age_at_index_date                                      1.0000    1.000
mood_stabilisers_100_pdds                              1.0381    0.000
other_sedatives_100_pdds                               1.0164    0.323
fgas_100_pdds                                          1.0403    0.011
sgas_100_pdds                                          1.1117    0.108
antidepressants_100_pdds                               1.0274    0.000
non_insomnia_GP_consultations                          1.0007    0.000
stroke                                                 1.2699    0.000
CHD_heart_failure_and_peripheral_vascular_disease      1.1441    0.001
hypertension                                           0.9559    0.122
diabetes                                        

In [18]:
summary_table

0,1,2,3
Dep. Variable:,isCase,No. Observations:,10592.0
Model:,Logit,Df Residuals:,10582.0
Method:,MLE,Df Model:,9.0
Date:,"Thu, 18 May 2017",Pseudo R-squ.:,0.01387
Time:,21:03:29,Log-Likelihood:,-7240.0
converged:,True,LL-Null:,-7341.8
,,LLR p-value:,5.584e-39


In [19]:
result

Unnamed: 0,OR,coef,p,[0.025,0.975]
benzo_and_z_drugs_100_pdds,1.004,0.004,0.502,-0.007,0.014
mood_stabilisers_100_pdds,1.028,0.028,0.007,0.008,0.048
antidepressants_100_pdds,1.012,0.012,0.02,0.002,0.022
non_insomnia_GP_consultations,1.002,0.002,0.0,0.001,0.002
stroke,1.202,0.184,0.011,0.042,0.327
hypertension,0.827,-0.19,0.0,-0.271,-0.109
diabetes,1.14,0.131,0.062,-0.007,0.269
mental_illness_non_smi,1.436,0.362,0.0,0.273,0.451
current_smoker,0.896,-0.109,0.03,-0.208,-0.011
intercept,0.796,-0.228,0.0,-0.302,-0.155
