In [2]:
from logs import logDecorator as lD 
import jsonref, pprint
from lib.databaseIO import pgIO

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly
from ipywidgets import interact, interact_manual
import pickle
import statistics

# 1. Load 
## Get Data for all patients that meet the filter criteria 

In [3]:
visits_data = pickle.load(open('../data/intermediate/visits_data_ALL.pkl','rb'))

print(f'There are {len(visits_data.PatientID.unique())} patients in the cohort of interest.')
visits_data.head(10)

There are 10054 patients in the MDD cohort of interest.


Unnamed: 0,PatientID,VisitID,Days,VisitType,CGI,Medication,Dose,Regimen,Diagnosis,DSMNo
0,7223,188297,448,Outpatient,1,bupropion,100.0,2.0,Amphetamine Abuse,305.70
1,7223,188300,399,Outpatient,1,bupropion,100.0,2.0,Depressive Disorder NOS,311
2,7223,188308,374,Discharge summary,4,bupropion,100.0,2.0,Depressive Disorder NOS,311
3,7223,188309,374,Outpatient,4,bupropion,100.0,2.0,Depressive Disorder NOS,311
4,7223,188311,294,Outpatient,1,bupropion,100.0,2.0,Amphetamine Abuse,305.70
5,7223,188313,292,Discharge summary,4,bupropion,100.0,2.0,Amphetamine Abuse,305.70
6,7223,188321,282,Inpatient,1,bupropion,100.0,2.0,Depressive Disorder NOS,311
7,9209,241510,568,Discharge summary,3,citalopram,20.0,1.0,No Diagnosis on Axis II,V71.09
8,9209,241510,568,Discharge summary,3,tramadol,50.0,1.0,"Major Depressive Disorder, Recurrent, Unspecified",296.30
9,9209,241511,568,Outpatient,3,citalopram,20.0,1.0,"Major Depressive Disorder, Recurrent, Unspecified",296.30


## Get visits related to MDD Only
Since the full dataset includes comorbidities, we will extract those visits which have dsmno in our group of interest.

In [23]:
diagnosis_dsmno = pd.read_csv('../data/raw_data/disorders_dsmno.csv')
dsm_regex = diagnosis_dsmno.loc[diagnosis_dsmno['disorders']=='major depressive disorder', 'regex'].item()

In [43]:
mdd_visits_data = visits_data.loc[visits_data['DSMNo'].str.contains(dsm_regex)]
patient_list = mdd_visits_data.PatientID.unique()

In [44]:
print(f'After filtering for MDD patients, we have {len(mdd_visits_data.PatientID.unique())} patients with {len(mdd_visits_data)} visits')

After filtering for MDD patients, we have 8258 patients with 291287 visits


In [49]:
def getPatientData(pid, data):
    patientData = data.loc[data.PatientID == pid].drop('PatientID', axis=1)
    if 'Days' in patientData.columns:
        patientData.sort_values('Days', inplace=True)
    return patientData

def getComparativePopulation(pid, data):
    cpData =  data.loc[data.PatientID != pid].sort_values(['PatientID','Days'],axis=1)
    return cpData

# Plots

## Individual Patient's CGI over Time
Enter by textbox, not dropdown

In [92]:
@interact
def getCGIoverTime(pid=patient_list):
    patient_data = getPatientData(pid, visits_data)
    cgi = patient_data.groupby('Days').apply(lambda x: statistics.mean([x['CGI'].max(), x['CGI'].min()]))
    
    plt.plot(patient_data.groupby('Days')['CGI'].mean(), label='Severity', marker='x', color='blue')

    pid_daysmax = patient_data.Days.max()
    for yr in range(pid_daysmax//365): plt.axvline(x=(yr+1)*365,linestyle='--',c='grey')
    plt.axis([-10, pid_daysmax+10, -0.5,7.5])
    plt.title(f'CGI Severity of Patient {pid} over time')
    plt.show()

    return

interactive(children=(Dropdown(description='pid', options=(9209, 20777, 23082, 25197, 25895, 31219, 33302, 485…

In [100]:
patient_data

Unnamed: 0,VisitID,Days,VisitType,CGI,Medication,Dose,Regimen,Diagnosis,DSMNo
86,241641,6,Outpatient,4,tramadol,50.0,1.0,Alcohol Dependence,303.90
85,241641,6,Outpatient,4,citalopram,20.0,1.0,"Major Depressive Disorder, Recurrent, Unspecified",296.30
83,241637,8,Outpatient,4,citalopram,20.0,1.0,"Major Depressive Disorder, Recurrent, Unspecified",296.30
84,241637,8,Outpatient,4,tramadol,50.0,1.0,"Major Depressive Disorder, Recurrent, Unspecified",296.30
81,241613,48,Outpatient,4,citalopram,20.0,1.0,Alcohol Dependence,303.90
...,...,...,...,...,...,...,...,...,...
11,241520,414,Outpatient,4,citalopram,20.0,1.0,Alcohol Dependence,303.90
10,241512,565,Outpatient,3,citalopram,20.0,1.0,Opioid Dependence,304.00
9,241511,568,Outpatient,3,citalopram,20.0,1.0,"Major Depressive Disorder, Recurrent, Unspecified",296.30
8,241510,568,Discharge summary,3,tramadol,50.0,1.0,"Major Depressive Disorder, Recurrent, Unspecified",296.30


## Individual Patient's Change in CGI over time
Consider using a rolling window?

In [90]:
@interact
def getCGIChangeoverTime(pid=patient_list):
    patient_data = visits_data[visits_data.PatientID ==pid].sort_values('Days')
    patient_data['CGI-Change'] = patient_data['CGI'] - patient_data['CGI'].shift(1)
    plt.plot(patient_data.Days, patient_data['CGI-Change'])
    # Negative = Improve
    # Positive = Deprove (Increase in CGI) 

interactive(children=(Dropdown(description='pid', options=(9209, 20777, 23082, 25197, 25895, 31219, 33302, 485…

# Question:
## Which visits to include?
Should it be using the visits that are related to the diagnosis of interest only? or include all other recordings of CGI. 

Because my main interest is **effect of the drug on their CGI score**

In [71]:
mdd_visits_data.loc[mdd_visits_data.PatientID == 31219]

Unnamed: 0,PatientID,VisitID,Days,VisitType,CGI,Medication,Dose,Regimen,Diagnosis,DSMNo
1090,31219,755134,117,Inpatient,5,sertraline,50.0,1.0,"Major Depressive Disorder, Single Episode, Mod...",296.22
1092,31219,755136,106,Inpatient,1,sertraline,50.0,1.0,"Major Depressive Disorder, Single Episode, Mod...",296.22


In [72]:
visits_data.loc[visits_data.PatientID == 31219]

Unnamed: 0,PatientID,VisitID,Days,VisitType,CGI,Medication,Dose,Regimen,Diagnosis,DSMNo
1090,31219,755134,117,Inpatient,5,sertraline,50.0,1.0,"Major Depressive Disorder, Single Episode, Mod...",296.22
1091,31219,755135,115,Inpatient,1,sertraline,50.0,1.0,No Diagnosis on Axis II,V71.09
1092,31219,755136,106,Inpatient,1,sertraline,50.0,1.0,"Major Depressive Disorder, Single Episode, Mod...",296.22
1093,31219,755137,104,Inpatient,1,sertraline,50.0,1.0,Eating Disorder NOS,307.50
1094,31219,755138,103,Inpatient,1,sertraline,50.0,1.0,Eating Disorder NOS,307.50
1095,31219,755139,102,Inpatient,5,sertraline,50.0,1.0,Eating Disorder NOS,307.50
1096,31219,755140,101,Inpatient,3,sertraline,50.0,1.0,Partner Relational Problem,V61.10
1097,31219,755141,101,Inpatient,3,sertraline,50.0,1.0,Eating Disorder NOS,307.50
1098,31219,755142,98,Inpatient,3,sertraline,50.0,1.0,Partner Relational Problem,V61.10
1099,31219,755146,3,Outpatient,5,sertraline,50.0,1.0,Eating Disorder NOS,307.50


## How to determine how long they are taking that medication for? 

In [109]:
pid=9209
patient_data = getPatientData(pid, visits_data)
patient_data.groupby('Medication')['Days'].apply(list) #.agg({'Days':['min','max']})

Medication
citalopram    [6, 8, 48, 50, 53, 55, 57, 60, 62, 64, 69, 71,...
tramadol      [6, 8, 48, 50, 53, 55, 57, 60, 62, 64, 69, 71,...
Name: Days, dtype: object