------
# **Dementia Patients -- Analysis and Prediction**
### ***Author : Akhilesh Vyas***
### ****Date : August, 2019****



# ***Result Plots***

- <a href='#00'>0. Setup </a>
    - <a href='#00.1'>0.1. Load libraries </a>
    - <a href='#00.2'>0.2. Define paths </a>

- <a href='#01'>1. Data Preparation </a>  
    - <a href='#01.1'>1.1. Read Data </a> 
    - <a href='#01.2'>1.2. Prepare data  </a>
    - <a href='#01.3'>1.3. Prepare target </a>
    - <a href='#01.4'>1.4. Removing Unwanted Features </a>
    
- <a href='#02'>2. Data Analysis</a> 
    - <a href='#02.1'>2.1. Feature </a> 
    - <a href='#02.2'>2.2. Target </a> 
    
- <a href='#03'>3. Data Preparation and Vector Transformation</a>

- <a href='#04'>4. Analysis and Imputing Missing Values </a>

- <a href='#05'>5. Feature Analysis</a> 
    - <a href='#05.1'>5.1. Correlation Matrix</a>
    - <a href='#05.2'>5.2. Feature and target </a>
    - <a href='#05.3'>5.3. Feature Selection Models </a>
    
- <a href='#06'>6.Machine Learning -Classification Model</a> 

# <a id='00'>0. Setup </a>

# <a id='00.1'>0.1 Load libraries </a>

Loading Libraries

In [1]:
import sys
sys.path.insert(1, '../preprocessing/')
import numpy as np
import pickle
import scipy.stats as spstats
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_profiling
from sklearn.datasets.base import Bunch
#from data_transformation_cls import FeatureTransform
from ast import literal_eval
import plotly.figure_factory as ff
import plotly.offline as py
import plotly.graph_objects as go

import pandas as pd
pd.set_option('display.max_columns', None)  
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

from ordered_set import OrderedSet

%matplotlib inline



# <a id='00.2'>0.2 Define paths </a>

In [None]:
# data_path
# data_path
data_path = '../../../datalcdem/data/optima/dementia_18July/class_fast_normal_slow_api_inputs/'
result_path = '../../../datalcdem/data/optima/dementia_18July/class_fast_normal_slow_api_inputs/results/'
optima_path = '../../../datalcdem/data/optima/optima_excel/'

# <a id='1'>1. Data Preparation </a> 

## <a id='01.1'>1.1. Read Data</a>

In [None]:
#Preparation Features from Raw data

# Patient Comorbidities data
'''patient_com_raw_df = pd.read_csv(data_path + 'optima_patients_comorbidities.csv').groupby(by=['patient_id', 'EPISODE_DATE'], as_index=False).agg(lambda x: x.tolist())[['patient_id', 'EPISODE_DATE', 'Comorbidity_cui']]
display(patient_com_raw_df.head(5))
patient_com_raw_df['EPISODE_DATE'] = pd.to_datetime(patient_com_raw_df['EPISODE_DATE'])


# Patient Treatment data
patient_treat_raw_df = pd.read_csv(data_path + 'optima_patients_treatments.csv').groupby(by=['patient_id', 'EPISODE_DATE'], as_index=False).agg(lambda x: x.tolist())[['patient_id', 'EPISODE_DATE', 'Medication_cui']]
display(patient_treat_raw_df.head(5))
patient_treat_raw_df['EPISODE_DATE'] = pd.to_datetime(patient_treat_raw_df['EPISODE_DATE'])

# Join Patient Treatment and Comorbidities data
patient_com_treat_raw_df = pd.merge(patient_com_raw_df, patient_treat_raw_df,on=['patient_id', 'EPISODE_DATE'], how='outer')
patient_com_treat_raw_df.sort_values(by=['patient_id', 'EPISODE_DATE'],axis=0, inplace=True, ascending=True)
patient_com_treat_raw_df.reset_index(drop=True, inplace=True)
patient_com_treat_raw_df.head(5)


#Saving data
patient_com_treat_raw_df.to_csv(data_path + 'patient_com_treat_episode_df.csv', index=False)'''

# Extracting selected features from Raw data
def rename_columns(col_list):
    d = {}
    for i in col_list:
        if i=='GLOBAL_PATIENT_DB_ID':
            d[i]='patient_id'
        elif 'CAMDEX SCORES: ' in i:
            d[i]=i.replace('CAMDEX SCORES: ', '').replace(' ', '_')
        elif 'CAMDEX ADMINISTRATION 1-12: ' in i:
            d[i]=i.replace('CAMDEX ADMINISTRATION 1-12: ', '').replace(' ', '_')
        elif 'DIAGNOSIS 334-351: ' in i:
            d[i]=i.replace('DIAGNOSIS 334-351: ', '').replace(' ', '_')
        elif 'OPTIMA DIAGNOSES V 2010: ' in i:
            d[i]=i.replace('OPTIMA DIAGNOSES V 2010: ', '').replace(' ', '_')
        elif 'PM INFORMATION: ' in i:
            d[i]=i.replace('PM INFORMATION: ', '').replace(' ', '_')
        else:
            d[i]=i.replace(' ', '_')
    return d

sel_col_df = pd.read_excel(optima_path+'Variable_Guide_Highlighted_Fields_.xlsx')
display(sel_col_df.head(5))
sel_cols = [i+j.replace('+', ':')for i,j in zip(sel_col_df['Sub Category '].tolist(), sel_col_df['Variable Label'].tolist())]
rem_cols= ['OPTIMA DIAGNOSES V 2010: OTHER SYSTEMIC ILLNESS: COMMENT'] # Missing column in the dataset
sel_cols = sorted(list(set(sel_cols)-set(rem_cols)))
print (sel_cols)

columns_selected = list(OrderedSet(['GLOBAL_PATIENT_DB_ID', 'EPISODE_DATE', 'Age At Episode', 'CAMDEX SCORES: MINI MENTAL SCORE', 
                    'OPTIMA DIAGNOSES V 2010: PETERSEN MCI', 'OPTIMA DIAGNOSES V 2010: PETERSEN MCI TYPE',
                    'DIAGNOSIS 334-351: PRIMARY PSYCHIATRIC DIAGNOSES', 'OPTIMA DIAGNOSES V 2010: AD (NINCDS-ADSDA)'] + sel_cols))

df_datarequest = pd.read_excel(data_path+'Data_Request_Jan_2019_final.xlsx')
display(df_datarequest.head(1))
df_datarequest_features = df_datarequest[columns_selected]
display(df_datarequest_features.columns)

columns_renamed = rename_columns(df_datarequest_features.columns.tolist())
df_datarequest_features.rename(columns=columns_renamed, inplace=True)

display(df_datarequest_features.head(5))
# df_datarequest_features.drop(columns=['Age_At_Episode', 'PETERSEN_MCI_TYPE'], inplace=True)
display(df_datarequest_features.head(5))

# drop columns having out of range MMSE value
# df_datarequest_features = df_datarequest_features[(df_datarequest_features['MINI_MENTAL_SCORE']<=30) & (df_datarequest_features['MINI_MENTAL_SCORE']>=0)]

# Merging Join Patient Treatment, Comorbidities and selected features from raw data
#patient_com_treat_raw_df['EPISODE_DATE'] = pd.to_datetime(patient_com_treat_raw_df['EPISODE_DATE'])
#patient_com_treat_fea_raw_df = pd.merge(patient_com_treat_raw_df,df_datarequest_features,on=['patient_id', 'EPISODE_DATE'], how='left')
#patient_com_treat_fea_raw_df.sort_values(by=['patient_id', 'EPISODE_DATE'],axis=0, inplace=True, ascending=True)
#patient_com_treat_fea_raw_df.reset_index(inplace=True, drop=True)
#display(patient_com_treat_fea_raw_df.head(5))
patient_com_treat_fea_raw_df = df_datarequest_features # Need to be changed ------------------------

# Filling misssing MMSE value with patient group Average

#patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE']\
#                                        = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['MINI_MENTAL_SCORE'].transform(lambda x: x.fillna(x.mean()))
display(patient_com_treat_fea_raw_df.head(5))

#  19<=Mild<=24 , 14<=Moderate<=18 , Severe<=13 
patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE_CATEGORY']=np.nan

def change_minimentalscore_to_category(df):
    df.loc[(df['MINI_MENTAL_SCORE']<=30) & (df['MINI_MENTAL_SCORE']>24),'MINI_MENTAL_SCORE_CATEGORY'] = 'Normal'
    df.loc[(df['MINI_MENTAL_SCORE']<=24) & (df['MINI_MENTAL_SCORE']>=19),
           'MINI_MENTAL_SCORE_CATEGORY'] = 'Mild'
    df.loc[(df['MINI_MENTAL_SCORE']<=18) & (df['MINI_MENTAL_SCORE']>=14),
           'MINI_MENTAL_SCORE_CATEGORY'] = 'Moderate'
    df.loc[(df['MINI_MENTAL_SCORE']<=13) & (df['MINI_MENTAL_SCORE']>=0),'MINI_MENTAL_SCORE_CATEGORY'] = 'Severe'
    
    return df

patient_com_treat_fea_raw_df = change_minimentalscore_to_category(patient_com_treat_fea_raw_df)

# saving file
patient_com_treat_fea_raw_df.to_csv(data_path + 'patient_com_treat_fea_episode_raw_without_expand_df.csv', index=False)

# Set line number for treatment line
def setLineNumber(lst):
    lst_dict = {ide:0 for ide in lst}
    lineNumber_list = []
    
    for idx in lst:
        if idx in lst_dict:
           lst_dict[idx] = lst_dict[idx] + 1 
           lineNumber_list.append(lst_dict[idx])
            
    return lineNumber_list

patient_com_treat_fea_raw_df['lineNumber'] = setLineNumber(patient_com_treat_fea_raw_df['patient_id'].tolist())
display(patient_com_treat_fea_raw_df.head(5))

# Extend episode data into columns
def extend_episode_data(df):
    id_dict = {i:0 for i in df['patient_id'].tolist()}
    for x in df['patient_id'].tolist():
        if x in id_dict:
            id_dict[x]=id_dict[x]+1
    
    line_updated = [int(j) for i in id_dict.values() for j in range(1,i+1)]
    # print (line_updated[0:10])
    df.update(pd.Series(line_updated, name='lineNumber'),errors='ignore')
    print ('\n----------------After creating line-number for each patients------------------')
    display(df.head(20))
    
    # merging episodes based on id and creating new columns for each episode
    r = df['lineNumber'].max()
    print ('Max line:',r)
    l = [df[df['lineNumber']==i] for i in range(1, int(r+1))]
    print('Number of Dfs to merge: ',len(l))
    df_new = pd.DataFrame()
    tmp_id = []
    for i, df_l in enumerate(l):
        df_l = df_l[~df_l['patient_id'].isin(tmp_id)]
        for j, df_ll in enumerate(l[i+1:]):
            #df_l = df_l.merge(df_ll, on='id', how='left', suffix=(str(j), str(j+1))) #suffixe is not working
            #print (j)
            df_l = df_l.join(df_ll.set_index('patient_id'), on='patient_id', rsuffix='_'+str(j+1))
        tmp_id = tmp_id + df_l['patient_id'].tolist()
        #display(df_l)
        df_new = df_new.append(df_l, ignore_index=True, sort=False)
        return df_new
    


patient_com_treat_fea_raw_df['lineNumber'] = setLineNumber(patient_com_treat_fea_raw_df['patient_id'].tolist())
# drop rows with duplicated episode for a patient
patient_com_treat_fea_raw_df = patient_com_treat_fea_raw_df.drop_duplicates(subset=['patient_id', 'EPISODE_DATE'])
patient_com_treat_fea_raw_df.sort_values(by=['patient_id', 'EPISODE_DATE'], inplace=True)
columns = patient_com_treat_fea_raw_df.columns.tolist()
patient_com_treat_fea_raw_df = patient_com_treat_fea_raw_df[columns[0:2]+columns[-1:]
                                                            +columns[2:4]+columns[-2:-1]
                                                            +columns[4:-2]]
# Expand patient 
#patient_com_treat_fea_raw_df = extend_episode_data(patient_com_treat_fea_raw_df)
display(patient_com_treat_fea_raw_df.head(2))


#Saving extended episode of each patients
#patient_com_treat_fea_raw_df.to_csv(data_path + 'patient_com_treat_fea_episode_raw_df.csv', index=False)



In [None]:
display(patient_com_treat_fea_raw_df.describe(include='all'))
display(patient_com_treat_fea_raw_df.info())

tmp_l = []
for i in range(len(patient_com_treat_fea_raw_df.index)) :
    # print("Nan in row ", i , " : " ,  patient_com_treat_fea_raw_df.iloc[i].isnull().sum())
    tmp_l.append(patient_com_treat_fea_raw_df.iloc[i].isnull().sum())
    
plt.hist(tmp_l)
plt.show()

In [None]:
profile = patient_com_treat_fea_raw_df.profile_report(title='Dementia Profiling Report', style={'full_width':True})
# profile = patient_com_treat_fea_raw_df.profile_report(title='Dementia Profiling Report')
profile.to_file(output_file= result_path + "dementia_data_profiling_report_output_all_patients_notasked.html")

In [None]:
patient_com_treat_fea_raw_df[(patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE_CATEGORY']!='Normal') & (patient_com_treat_fea_raw_df['PETERSEN_MCI']==1.0)][['MINI_MENTAL_SCORE','PETERSEN_MCI']]

In [None]:
patient_com_treat_fea_raw_df['durations(years)'] = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['EPISODE_DATE'].transform(lambda x: (x - x.iloc[0])/(np.timedelta64(1, 'D')*365.25))
# [patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE_CATEGORY']=='Normal']
patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE_CATEGORY(+MCI)'] = patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE_CATEGORY']
patient_com_treat_fea_raw_df.loc[(patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE_CATEGORY']=='Normal') & 
                                                  (patient_com_treat_fea_raw_df['PETERSEN_MCI']==1), 'MINI_MENTAL_SCORE_CATEGORY(+MCI)'] = 'NormalMCI'

patient_com_treat_fea_raw_df[patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE_CATEGORY(+MCI)']=='NormalMCI']

patient_com_treat_fea_raw_df.head(10)

In [None]:
# patients - time taken for changing state
patient_com_treat_fea_raw_df['State_Update(years)'] = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['durations(years)'].transform(lambda x: x - x.shift(+1))
patient_com_treat_fea_raw_df['MMSE_Change'] = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['MINI_MENTAL_SCORE'].transform(lambda x: x - x.shift(+1))
patient_com_treat_fea_raw_df['MMS_CAT_HOP'] = patient_com_treat_fea_raw_df.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[1.0,2.0,3.0,4.0,5.0]).groupby(by=['patient_id'])['MINI_MENTAL_SCORE_CATEGORY(+MCI)'].transform(lambda x: x - x.shift(+1))
patient_com_treat_fea_raw_df['MMS_CAT_HOP_NOMCI'] = patient_com_treat_fea_raw_df.replace(['Normal','Mild', 'Moderate', 'Severe'],[1.0,2.0,3.0,4.0]).groupby(by=['patient_id'])['MINI_MENTAL_SCORE_CATEGORY'].transform(lambda x: x - x.shift(+1))
patient_com_treat_fea_raw_df.loc[patient_com_treat_fea_raw_df['MMS_CAT_HOP']<=-2, 'Misdiagnosed'] = 'YES'
patient_com_treat_fea_raw_df.loc[patient_com_treat_fea_raw_df['MMS_CAT_HOP']>-2, 'Misdiagnosed'] = 'NO'
patient_com_treat_fea_raw_df.loc[patient_com_treat_fea_raw_df['MMS_CAT_HOP_NOMCI']<=-2, 'Misdiagnosed_NOMCI'] = 'YES'
patient_com_treat_fea_raw_df.loc[patient_com_treat_fea_raw_df['MMS_CAT_HOP_NOMCI']>-2, 'Misdiagnosed_NOMCI'] = 'NO'
display(patient_com_treat_fea_raw_df.tail(10))


In [None]:
# count duration of patients
patient_total_duration = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['State_Update(years)'].agg(lambda x: x.sum()).reset_index(name='Total_Duration')
patient_total_duration.hist(column=['Total_Duration'], bins=5)

In [None]:
#For Knowledge Graph 
patient_com_treat_fea_raw_df['MINI_MENTAL_SCORE_CATEGORY(+MCI)_PREV']=patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['MINI_MENTAL_SCORE_CATEGORY(+MCI)'].transform(lambda x: x.shift(+1))
patient_com_treat_fea_raw_df['EPISODE_DATE_PREV'] = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['EPISODE_DATE'].transform(lambda x: x.shift(+1))
patient_epst_epend_misdiagnosed = patient_com_treat_fea_raw_df[patient_com_treat_fea_raw_df['Misdiagnosed']=='YES'][['patient_id', 'EPISODE_DATE_PREV','EPISODE_DATE', 
                                                                                                                     'MINI_MENTAL_SCORE_CATEGORY(+MCI)_PREV','MINI_MENTAL_SCORE_CATEGORY(+MCI)','State_Update(years)', 'lineNumber']]
display(patient_epst_epend_misdiagnosed.tail(10))
patient_epst_epend_misdiagnosed.to_csv(data_path+'patient_epst_epend_misdiagnosed.csv', index=False)


patient_misdiagnosed_totaltime = patient_epst_epend_misdiagnosed.groupby(by=['patient_id'])['State_Update(years)'].agg(lambda x: x.sum()).reset_index(name='Total_Misdiagnosed_Time')
display(patient_misdiagnosed_totaltime.head(10))
patient_misdiagnosed_totaltime.to_csv(data_path+'patient_misdiagnosed_totaltime.csv', index=False)
patient_com_treat_fea_raw_df[patient_com_treat_fea_raw_df['patient_id']==8735]

#AD Patients
patient_ad = patient_com_treat_fea_raw_df[(patient_com_treat_fea_raw_df['AD_STATUS']==1)|(patient_com_treat_fea_raw_df['AD_STATUS']==2)][['patient_id','EPISODE_DATE_PREV', 'EPISODE_DATE', 'AD_STATUS']]
patient_ad.to_csv(data_path+'patient_ad.csv', index=False)

In [None]:
# Plot Number of years for changing state -- Misdiagnosed - With MCI
tmp_df = patient_com_treat_fea_raw_df[['State_Update(years)', 'MMS_CAT_HOP', 'Misdiagnosed']].dropna(how='any')

import plotly.express as px
fig = px.scatter_matrix(tmp_df,
    dimensions=["State_Update(years)", "MMS_CAT_HOP"],
    color="Misdiagnosed", symbol="Misdiagnosed")
fig.show()


fig.write_image("plot_mmshop_stateupdate_misdiagnosed.png", width=800, height=400, scale=3.0)
py.plot(fig,filename='plot_mmshop_stateupdate_misdiagnosed.html')

# Plot Number of years for changing state
tmp_df = patient_com_treat_fea_raw_df[['State_Update(years)', 'MMSE_Change', 'Misdiagnosed']].dropna(how='any')


fig = px.scatter_matrix(tmp_df,
    dimensions=["State_Update(years)", "MMSE_Change"],
    color="Misdiagnosed", symbol="Misdiagnosed")
fig.show()


fig.write_image("plot_mmschange_stateupdate_misdiagnosed.png", width=800, height=400, scale=3.0)
py.plot(fig,filename='plot_mmschange_stateupdate_misdiagnosed.html')



tmp_df = patient_com_treat_fea_raw_df[['State_Update(years)', 'MMSE_Change', 'Misdiagnosed']].dropna(how='any')

fig = px.histogram(tmp_df, x="MMSE_Change", color="Misdiagnosed", marginal="rug", # can be `box`, `violin`
                         hover_data=tmp_df.columns)
fig.show()

fig.write_image("plot_mmschange_stateupdate_misdiagnosed_hist.png", width=800, height=400, scale=3.0)
py.plot(fig,filename='plot_mmschange_stateupdate_misdiagnosed_hist.html')

In [None]:
# Plot Number of years for changing state -- Misdiagnosed - With NO MCI
tmp_df = patient_com_treat_fea_raw_df[['State_Update(years)', 'MMS_CAT_HOP_NOMCI', 'Misdiagnosed_NOMCI']].dropna(how='any')

import plotly.express as px
fig = px.scatter_matrix(tmp_df,
    dimensions=["State_Update(years)", "MMS_CAT_HOP_NOMCI"],
    color="Misdiagnosed_NOMCI", symbol="Misdiagnosed_NOMCI")
fig.show()
fig.write_image("plot_mmshop_stateupdate_misdiagnosed_nommci.png", width=800, height=400, scale=3.0)
py.plot(fig,filename='plot_mmshop_stateupdate_misdiagnosed_nomci.html')

# Plot Number of years for changing state
tmp_df = patient_com_treat_fea_raw_df[['State_Update(years)', 'MMSE_Change', 'Misdiagnosed_NOMCI']].dropna(how='any')
fig = px.scatter_matrix(tmp_df,
    dimensions=["State_Update(years)", "MMSE_Change"],
    color="Misdiagnosed_NOMCI", symbol="Misdiagnosed_NOMCI")
fig.show()
fig.write_image("plot_mmschange_stateupdate_misdiagnosed_nomci.png", width=800, height=400, scale=3.0)
py.plot(fig,filename='plot_mmschange_stateupdate_misdiagnosed_nomci.html')



tmp_df = patient_com_treat_fea_raw_df[['State_Update(years)', 'MMSE_Change', 'Misdiagnosed_NOMCI']].dropna(how='any')
fig = px.histogram(tmp_df, x="MMSE_Change", color="Misdiagnosed_NOMCI", marginal="rug", # can be `box`, `violin`
                         hover_data=tmp_df.columns)
fig.show()
fig.write_image("plot_mmschange_stateupdate_misdiagnosed_nomci_hist.png", width=800, height=400, scale=3.0)
py.plot(fig,filename='plot_mmschange_stateupdate_misdiagnosed_nomci_hist.html')

In [None]:
patient_com_treat_fea_raw_df['State_Update(0,2,4,6,8,10,12)'] =  patient_com_treat_fea_raw_df['State_Update(years)']//2
patient_com_treat_fea_raw_df[['State_Update(0,2,4,6,8,10,12)', 'MMS_CAT_HOP']].groupby(by=['MMS_CAT_HOP', 'State_Update(0,2,4,6,8,10,12)'])
# patient_cat_dur_id_pivot = patient_dur_mci_id.pivot(index='patient_id', columns='lineNumber', values='MINI_MENTAL_SCORE_CATEGORY(+MCI)')
patient_com_treat_fea_raw_df.head(5)

In [None]:
patient_dur_mci_id = patient_com_treat_fea_raw_df[['patient_id', 'Misdiagnosed', 'durations(years)']].dropna()
#patient_dur_mci_id['Misdiagnosed'].fillna('NO', inplace=True)
patient_cat_dur_id_pivot = patient_dur_mci_id.pivot(index='patient_id', columns='durations(years)', values='Misdiagnosed')
# patient_cat_dur_id_pivot.fillna(0, inplace=True)
# patient_cat_dur_id_pivot.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[20,40,60,80,100],inplace=True)
patient_cat_dur_id_pivot.replace(['NO', 'YES'],[0,100],inplace=True)
patient_cat_dur_id_pivot.interpolate(method='linear', axis=1, limit_area='inside',  inplace=True)
patient_cat_dur_id_pivot.sort_values(by=[i for i in patient_cat_dur_id_pivot.columns.values], inplace=True, ascending=True)
display(patient_cat_dur_id_pivot.head(5))

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot.index.values]

line_patients = patient_cat_dur_id_pivot.columns.values

mms_values = patient_cat_dur_id_pivot.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=line_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.3, "y":0.8, "title":"Misdiagnosed: NO(0), YES(100)", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=1300, height=6000, title='Patient misdiagnosed during years of treatment',xaxis_nticks=37,
                         xaxis={"title": "Duration(year)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 6}, "tickangle": -10}))


#fig.show()

py.plot(fig,filename='plot_misdiagnos_duration_patientid_heatmap.html')


# Taking sample
# Taking only few patients for the plots
patient_cat_dur_id_pivot_sam = patient_cat_dur_id_pivot.sample(n=40, random_state=5)

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_sam.index.values]

line_patients = patient_cat_dur_id_pivot_sam.columns.values

mms_values = patient_cat_dur_id_pivot_sam.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=line_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.5, "y":0.5, "title":"Misdiagnosed: NO(0), YES(100)", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=700, height=500, title='Patient misdiagnosed during years of treatment',xaxis_nticks=37,
                         xaxis={"title": "Duration(year)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 7}, "tickangle": -10}))


#fig.show()
fig.write_image("plot_misdiagnos_duration_patientid_heatmap_sample.png", width=600, height=500, scale=3.0)
py.plot(fig,filename='plot_misdiagnos_duration_patientid_heatmap_sample.html')


In [None]:
patient_dur_mci_id = patient_com_treat_fea_raw_df[['patient_id', 'Misdiagnosed', 'lineNumber']].dropna()
#patient_dur_mci_id['Misdiagnosed'].fillna('NO', inplace=True)
patient_cat_dur_id_pivot = patient_dur_mci_id.pivot(index='patient_id', columns='lineNumber', values='Misdiagnosed')
# patient_cat_dur_id_pivot.fillna(0, inplace=True)
# patient_cat_dur_id_pivot.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[20,40,60,80,100],inplace=True)
patient_cat_dur_id_pivot.replace(['NO', 'YES'],[0,100],inplace=True)
patient_cat_dur_id_pivot.interpolate(method='linear', axis=1, limit_area='inside',  inplace=True)
patient_cat_dur_id_pivot.sort_values(by=[i for i in patient_cat_dur_id_pivot.columns.values], inplace=True, ascending=True)
display(patient_cat_dur_id_pivot.head(5))

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot.index.values]

line_patients = patient_cat_dur_id_pivot.columns.values

mms_values = patient_cat_dur_id_pivot.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=line_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.3, "y":0.8, "title":"Misdiagnosed: NO(0), YES(100)", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=1300, height=6000, title='Patient misdiagnosed during episodes',xaxis_nticks=37,
                         xaxis={"title": "Episode Number"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 6}, "tickangle": -10}))


#fig.show()

py.plot(fig,filename='plot_misdiagnos_episode_patientid_heatmap.html')


# Taking sample
# Taking only few patients for the plots
patient_cat_dur_id_pivot_sam = patient_cat_dur_id_pivot.sample(n=40, random_state=5)

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_sam.index.values]

line_patients = patient_cat_dur_id_pivot_sam.columns.values

mms_values = patient_cat_dur_id_pivot_sam.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=line_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.5, "y":0.5, "title":"Misdiagnosed: NO(0), YES(100)", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=700, height=500, title='Patient misdiagnosed during episodes',xaxis_nticks=37,
                         xaxis={"title": "Episode Number"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 8}, "tickangle": -10}))


#fig.show()
fig.write_image("plot_misdiagnos_episode_patientid_heatmap_sample.png", width=600, height=600, scale=3.0)
py.plot(fig,filename='plot_misdiagnos_episode_patientid_heatmap_sample.html')


In [None]:
patient_dur_mci_id = patient_com_treat_fea_raw_df[['patient_id', 'MINI_MENTAL_SCORE_CATEGORY(+MCI)', 'lineNumber']]
patient_cat_dur_id_pivot = patient_dur_mci_id.pivot(index='patient_id', columns='lineNumber', values='MINI_MENTAL_SCORE_CATEGORY(+MCI)')
#patient_cat_dur_id_pivot.fillna(0, inplace=True)
patient_cat_dur_id_pivot.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[20,40,60,80,100],inplace=True)
patient_cat_dur_id_pivot.interpolate(method='linear', axis=1, limit_area='inside',  inplace=True)
patient_cat_dur_id_pivot.sort_values(by=[i for i in patient_cat_dur_id_pivot.columns.values], inplace=True, ascending=False)
display(patient_cat_dur_id_pivot.head(5))

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot.index.values]

line_patients = patient_cat_dur_id_pivot.columns.values

mms_values = patient_cat_dur_id_pivot.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=line_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.3, "y":0.80, "title":"MINI_MENTAL_SCORE_Category_Scale: Normal(20),NormalMCI(40),Mild(60),Moderate(80),Severe(100))", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=1300, height=6000, title='MMSE progression of Patients',xaxis_nticks=37,
                         xaxis={"title": "Episode Number"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 6}, "tickangle": -10}))


#fig.show()

py.plot(fig,filename='plot_mmscat_episode_patientid_heatmap.html')

# Taking sample
# Taking only few patients for the plots
patient_cat_dur_id_pivot_sam = patient_cat_dur_id_pivot.sample(n=40, random_state=5)

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_sam.index.values]

line_patients = patient_cat_dur_id_pivot_sam.columns.values

mms_values = patient_cat_dur_id_pivot_sam.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=line_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.7, "y":0.50, "title":"MMS_Scale: Normal(20),NormalMCI(40),Mild(60),Moderate(80),Severe(100)", 'titleside':'right',"titlefont": {"size": 10} },
        showscale=True),
        layout=go.Layout(width=700, height=500, title='MMSE progression of Patients',xaxis_nticks=37,
                         xaxis={"title": "Episode Number"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 7}, "tickangle": -10}))


#fig.show()
fig.write_image("plot_mmscat_episode_patientid_heatmapp_sample.png", width=600, height=600, scale=3.0)
py.plot(fig,filename='plot_mmscat_episode_patientid_heatmap_sample.html')




In [None]:
patient_com_treat_fea_raw_df['durations_round(years)']= round(patient_com_treat_fea_raw_df['durations(years)'], 4)
#patient_com_treat_fea_raw_df['durations_round(years)'] = patient_com_treat_fea_raw_df['durations(years)']
patient_dur_mci_id = patient_com_treat_fea_raw_df[['patient_id', 'MINI_MENTAL_SCORE_CATEGORY(+MCI)', 'durations_round(years)']]
# patient_dur_mci_id.drop_duplicates(subset=['patient_id', 'durations_round(years)'],inplace=True)
# display(patient_dur_mci_id[patient_dur_mci_id.duplicated(subset=['patient_id', 'durations_round(years)'])]['patient_id'].unique().shape)
patient_cat_dur_id_pivot = patient_dur_mci_id.pivot(index='patient_id', columns='durations_round(years)', values='MINI_MENTAL_SCORE_CATEGORY(+MCI)')
#patient_cat_dur_id_pivot.fillna(0, inplace=True)
#patient_cat_dur_id_pivot.fillna(method='bfill' , inplace=True, axis=1)
patient_cat_dur_id_pivot.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[20,40,60,80,100],inplace=True)
patient_cat_dur_id_pivot.interpolate(method='linear', axis=1, limit_area='inside',  inplace=True)
patient_cat_dur_id_pivot.sort_values(by=[i for i in patient_cat_dur_id_pivot.columns.values], inplace=True, ascending=False)
patient_cat_dur_id_pivot_col = patient_cat_dur_id_pivot # patient_cat_dur_id_pivot[patient_cat_dur_id_pivot.columns.tolist()[-2:]+patient_cat_dur_id_pivot.columns.tolist()[0:-2]]
display(patient_cat_dur_id_pivot_col.iloc[400:420])

import datetime
import numpy as np
import plotly.graph_objs as go

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_col.index.values]

duration_patients = patient_cat_dur_id_pivot_col.columns.values

mms_values = patient_cat_dur_id_pivot_col.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=duration_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.1, "y":0.80, "title":"MINI_MENTAL_SCORE_Category_Scale_(N/A(0),Normal(20),NormalMCI(40),Mild(60),Moderate(80),Severe(100))", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=1300, height=6000, title='MMSE progression of Patients',xaxis_nticks=48,
                         xaxis={"title": "Duration(years)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 6}, "tickangle": -10}))


#fig.show()

py.plot(fig,filename='plot_mmscat_duration_patientid_heatmap.html')


# Taking sample
# Taking only few patients for the plots
patient_cat_dur_id_pivot_sam = patient_cat_dur_id_pivot.sample(n=40, random_state=5)

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_sam.index.values]

duration_patients = patient_cat_dur_id_pivot_sam.columns.values

mms_values = patient_cat_dur_id_pivot_sam.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=duration_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.7, "y":0.50, "title":"MMS_Scale: Normal(20),NormalMCI(40),Mild(60),Moderate(80),Severe(100)", 'titleside':'right',"titlefont": {"size": 10} },
        showscale=True),
        layout=go.Layout(width=700, height=500, title='MMSE progression of Patients',xaxis_nticks=37,
                         xaxis={"title": "Duration (years)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 7}, "tickangle": -10}))


#fig.show()
fig.write_image("plot_mmscat_duration_patientid_heatmap_sample.png", width=600, height=600, scale=3.0)
py.plot(fig,filename='plot_mmscat_duration_patientid_heatmap_sample.html')


In [None]:
patient_com_treat_fea_raw_df['durations_round(years)']= round(patient_com_treat_fea_raw_df['durations(years)'], 4)
#patient_com_treat_fea_raw_df['durations_round(years)'] = patient_com_treat_fea_raw_df['durations(years)']
patient_dur_mci_id = patient_com_treat_fea_raw_df[['patient_id', 'MINI_MENTAL_SCORE', 'durations_round(years)']]
#patient_dur_mci_id['MINI_MENTAL_SCORE'] = patient_dur_mci_id['MINI_MENTAL_SCORE']*3
# patient_dur_mci_id.drop_duplicates(subset=['patient_id', 'durations_round(years)'],inplace=True)
# display(patient_dur_mci_id[patient_dur_mci_id.duplicated(subset=['patient_id', 'durations_round(years)'])]['patient_id'].unique().shape)
patient_cat_dur_id_pivot = patient_dur_mci_id.pivot(index='patient_id', columns='durations_round(years)', values='MINI_MENTAL_SCORE')
#patient_cat_dur_id_pivot.fillna(0, inplace=True)
#patient_cat_dur_id_pivot.fillna(method='bfill' , inplace=True, axis=1)
#patient_cat_dur_id_pivot.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[20,40,60,80,100],inplace=True)
patient_cat_dur_id_pivot.interpolate(method='linear', axis=1, limit_area='inside',  inplace=True)
patient_cat_dur_id_pivot.sort_values(by=[i for i in patient_cat_dur_id_pivot.columns.values], inplace=True, ascending=False)
patient_cat_dur_id_pivot_col = patient_cat_dur_id_pivot # patient_cat_dur_id_pivot[patient_cat_dur_id_pivot.columns.tolist()[-2:]+patient_cat_dur_id_pivot.columns.tolist()[0:-2]]
display(patient_cat_dur_id_pivot_col.iloc[400:420])

import datetime
import numpy as np
import plotly.graph_objs as go

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_col.index.values]

duration_patients = patient_cat_dur_id_pivot_col.columns.values

mms_values = patient_cat_dur_id_pivot_col.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=duration_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.1, "y":0.80, "title":"MMS_Scale: MINI_MENTAL_SCORE (MMSE(X))", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=1300, height=6000, title='MMSE score progression of Patients',xaxis_nticks=48,
                         xaxis={"title": "Duration(years)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 6}, "tickangle": -10}))


#fig.show()

py.plot(fig,filename='plot_mmsscore_duration_patientid_heatmap.html')


# Taking sample
# Taking only few patients for the plots
patient_cat_dur_id_pivot_sam = patient_cat_dur_id_pivot.sample(n=40, random_state=5)

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_sam.index.values]

duration_patients = patient_cat_dur_id_pivot_sam.columns.values

mms_values = patient_cat_dur_id_pivot_sam.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=duration_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.7, "y":0.50, "title":"MMS_Scale: MINI_MENTAL_SCORE (MMSE(X))", 'titleside':'right',"titlefont": {"size": 10} },
        showscale=True),
        layout=go.Layout(width=700, height=500, title='MMSE score progression of Patients',xaxis_nticks=37,
                         xaxis={"title": "Duration (years)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 7}, "tickangle": -10}))


#fig.show()
fig.write_image("plot_mmscore_duration_patientid_heatmap_sample.png", width=600, height=600, scale=3.0)
py.plot(fig,filename='plot_mmsscore_duration_patientid_heatmap_sample.html')


In [None]:
def get_patient_state(l):
    # find constant
    if len(set(l))==1:
        return 'C_'+ str(l[0])
    else:
        return 'NC_'

def get_patient_st1_st2_min(l):
    t = []
    for i in range(len(l)-1):
        t.append(l[i+1]-l[i])
    if len(t)>=1:
        return min(t)    

def get_patient_st1_st2_max(l):
    t = []
    for i in range(len(l)-1):
        t.append(l[i+1]-l[i])
    if len(t)>=1:
        return max(t)    

    
#patient_com_treat_fea_raw_df['patient_state'] = #['PSYCHIATRIC','MINI_MENTAL_SCORE_CATEGORY(+MCI)'].transform(lambda x: x)
patient_com_treat_fea_raw_agg_list_df = patient_com_treat_fea_raw_df.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[1.0,2.0,3.0,4.0,5.0])\
                            .groupby(by=['patient_id']).agg(lambda x : x.tolist())


patient_com_treat_fea_raw_agg_list_df['STATE_CONSTANT']= patient_com_treat_fea_raw_agg_list_df['MINI_MENTAL_SCORE_CATEGORY(+MCI)'].apply(get_patient_state)
patient_com_treat_fea_raw_agg_list_df['ST1_ST2_MIN']= patient_com_treat_fea_raw_agg_list_df['MINI_MENTAL_SCORE_CATEGORY(+MCI)'].apply(get_patient_st1_st2_min)
patient_com_treat_fea_raw_agg_list_df['ST1_ST2_MAX']= patient_com_treat_fea_raw_agg_list_df['MINI_MENTAL_SCORE_CATEGORY(+MCI)'].apply(get_patient_st1_st2_max)

display(patient_com_treat_fea_raw_agg_list_df.head(15))

In [None]:
# Non Constant Patients

patient_com_treat_fea_raw_df['durations_round(years)']= round(patient_com_treat_fea_raw_df['durations(years)'], 4)
#patient_com_treat_fea_raw_df['durations_round(years)'] = patient_com_treat_fea_raw_df['durations(years)']

# Filter Non Constant Patients (Normal and Severe)
filter_df = patient_com_treat_fea_raw_agg_list_df[~(patient_com_treat_fea_raw_agg_list_df['STATE_CONSTANT'].isin(['C_1.0', 'C_5.0', 'C_nan']))]
display(filter_df[filter_df['ST1_ST2_MAX']>=2].shape)
display(filter_df[filter_df['ST1_ST2_MIN']<=-2].shape)
display(filter_df.shape)
#display(patient_com_treat_fea_raw_df[filter_df.index])
patient_dur_mci_id = patient_com_treat_fea_raw_df[patient_com_treat_fea_raw_df['patient_id'].isin(filter_df.index)][['patient_id', 'MINI_MENTAL_SCORE_CATEGORY(+MCI)', 'durations_round(years)']]
display(patient_dur_mci_id.shape)
#patient_dur_mci_id.drop_duplicates(subset=['patient_id', 'durations_round(years)'],inplace=True)
#display(patient_dur_mci_id[patient_dur_mci_id.duplicated(subset=['patient_id', 'durations_round(years)'])]['patient_id'].unique().shape)
patient_cat_dur_id_pivot = patient_dur_mci_id.pivot(index='patient_id', columns='durations_round(years)', values='MINI_MENTAL_SCORE_CATEGORY(+MCI)')
display(patient_cat_dur_id_pivot.shape)
#patient_cat_dur_id_pivot.fillna(0, inplace=True)
#patient_cat_dur_id_pivot.fillna(method='bfill' , inplace=True, axis=1)
patient_cat_dur_id_pivot.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[20.0,40.0,60.0,80.0,100.0],inplace=True)
#patient_cat_dur_id_pivot = patient_cat_dur_id_pivot.astype('float64')
patient_cat_dur_id_pivot.interpolate(method='linear', axis=1, limit_area='inside',  inplace=True)
patient_cat_dur_id_pivot.sort_values(by=[i for i in patient_cat_dur_id_pivot.columns.values], inplace=True, ascending=False)
patient_cat_dur_id_pivot.dropna(axis=0, how='all', inplace=True)
# patient_cat_dur_id_pivot[-1]=0.0
# patient_cat_dur_id_pivot[-2]=50.0
patient_cat_dur_id_pivot_col = patient_cat_dur_id_pivot # [patient_cat_dur_id_pivot.columns.tolist()[-2:]+patient_cat_dur_id_pivot.columns.tolist()[0:-2]]
display(patient_cat_dur_id_pivot_col.iloc[400:420])

import datetime
import numpy as np
import plotly.graph_objs as go

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_col.index.values]

duration_patients = patient_cat_dur_id_pivot_col.columns.values

mms_values = patient_cat_dur_id_pivot_col.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=duration_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.1, "y":0.80, "title":"MINI_MENTAL_SCORE_Category_Scale_(N/A(0),Normal(20),NormalMCI(40),Mild(60),Moderate(80),Severe(100))", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=1300, height=4000, title='MMSE progression of Patients',xaxis_nticks=48,
                         xaxis={"title": "Duration(years)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 6}, "tickangle": -10}))


#fig.show()

py.plot(fig,filename='plot_mmscat_duration_patientid_noconstant_heatmap.html')


# Taking sample
# Taking only few patients for the plots
patient_cat_dur_id_pivot_sam = patient_cat_dur_id_pivot.sample(n=40, random_state=5)

import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot_sam.index.values]

duration_patients = patient_cat_dur_id_pivot_sam.columns.values

mms_values = patient_cat_dur_id_pivot_sam.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=duration_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.7, "y":0.50, "title":"MMS_Scale: Normal(20),NormalMCI(40),Mild(60),Moderate(80),Severe(100)", 'titleside':'right',"titlefont": {"size": 10} },
        showscale=True),
        layout=go.Layout(width=700, height=500, title='MMSE progression of Patients',xaxis_nticks=37,
                         xaxis={"title": "Duration (years)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 7}, "tickangle": -10}))


#fig.show()
fig.write_image("plot_mmscat_duration_patientid_noconstant_heatmap_sample.png", width=600, height=600, scale=3.0)
py.plot(fig,filename='plot_mmscat_duration_patientid_noconstant_heatmap_sample.html')



In [None]:
#Categorising patient based in their progression
patient_com_treat_fea_raw_df['durations_round(years)']= round(patient_com_treat_fea_raw_df['durations(years)'], 3)
#patient_com_treat_fea_raw_df['durations_round(years)'] = patient_com_treat_fea_raw_df['durations(years)']
patient_dur_mci_id = patient_com_treat_fea_raw_df[['patient_id', 'MINI_MENTAL_SCORE_CATEGORY(+MCI)', 'durations_round(years)']]
patient_dur_mci_id.drop_duplicates(subset=['patient_id', 'durations_round(years)'],inplace=True)
display(patient_dur_mci_id[patient_dur_mci_id.duplicated(subset=['patient_id', 'durations_round(years)'])]['patient_id'].unique().shape)
patient_cat_dur_id_pivot = patient_dur_mci_id.pivot(index='patient_id', columns='durations_round(years)', values='MINI_MENTAL_SCORE_CATEGORY(+MCI)')
#patient_cat_dur_id_pivot.fillna(0, inplace=True)
patient_cat_dur_id_pivot.fillna(method='bfill' , inplace=True, axis=1)
patient_cat_dur_id_pivot.replace(['Normal', 'NormalMCI','Mild', 'Moderate', 'Severe'],[20,40,60,80,100],inplace=True)
patient_cat_dur_id_pivot.sort_values(by=[i for i in patient_cat_dur_id_pivot.columns.values], inplace=True, ascending=False)
display(patient_cat_dur_id_pivot.iloc[400:420])

'''import datetime
import numpy as np
import plotly.graph_objs as go
np.random.seed(1)

patient_ids = ["P_ID:"+str(i) for i in patient_cat_dur_id_pivot.index.values]

duration_patients = patient_cat_dur_id_pivot.columns.values

mms_values = patient_cat_dur_id_pivot.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=duration_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.3, "y":0.80, "title":"MINI_MENTAL_SCORE_Category_Scale_(N/A(0),Normal(20),NormalMCI(40),Mild(60),Moderate(80),Severe(100))", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=1300, height=6000, title='MMSE progression of Patients',xaxis_nticks=37,
                         xaxis={"title": "Duration(years)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 6}, "tickangle": -10}))


fig.show()

py.plot(fig,filename='plot_mms_cat_patients_train_heatmap.html')'''

In [None]:
patient_dur_mci_id_normal = patient_dur_mci_id[patient_dur_mci_id.columns[(patient_dur_mci_id.iloc[0:1,0:]=='Normal').values.flatten()]]
fig = go.Figure([{
    'x': patient_dur_mci_id_normal[col].dropna().index,
    'y': patient_dur_mci_id_normal[col].dropna(),
    'name': col
}  for col in patient_dur_mci_id_normal.columns])
#py.iplot()

py.plot(fig,filename='plot_mms_cat_frst_last_duration_normal.html')

In [None]:
#patient_com_treat_fea_raw_df['durations(years)'] = 
# patient_com_treat_fea_raw_df.drop(columns=['mms_cat_containing_normal_mci_frst', 'mms_cat_containing_normal_mci_lst'], inplace=True)
patient_com_treat_fea_raw_df['mms_cat_containing_normal_mci_frst']=patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['mms_cat_containing_normal_mci'].transform(lambda x : x.iloc[0])
patient_com_treat_fea_raw_df['mms_cat_containing_normal_mci_lst']=patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['mms_cat_containing_normal_mci'].transform(lambda x : x.iloc[-1])
patient_com_treat_fea_raw_df['mms_cat_frst_last'] = patient_com_treat_fea_raw_df[['mms_cat_containing_normal_mci_frst', 'mms_cat_containing_normal_mci_lst']].agg(lambda x : str(x[0])+'_'+str(x[1]), axis=1)
data_mms_cat_frst_last = patient_com_treat_fea_raw_df[['patient_id', 'mms_cat_frst_last', 'mms_cat_containing_normal_mci_frst']][patient_com_treat_fea_raw_df.mms_cat_frst_last != 'nan_nan'].drop_duplicates()
data_mms_cat_frst_last.sort_values('mms_cat_frst_last', inplace=True)
fig = px.histogram(data_mms_cat_frst_last, x="mms_cat_frst_last")
fig.show()
py.plot(fig,filename='plot_mms_cat_frst_last.html')

In [None]:

l= ['Categories']+data_mms_cat_frst_last.mms_cat_frst_last.unique().tolist() + data_mms_cat_frst_last.mms_cat_containing_normal_mci_frst.unique().tolist()
p = [''] + [i.split('_')[0] for i in data_mms_cat_frst_last.mms_cat_frst_last.unique()] + ['Categories' for i in data_mms_cat_frst_last.mms_cat_containing_normal_mci_frst.unique()]
mms_cat_frst_last_count = dict(data_mms_cat_frst_last.mms_cat_frst_last.value_counts())
mms_cat_containing_normal_mci_frst_count = dict(data_mms_cat_frst_last.mms_cat_containing_normal_mci_frst.value_counts())
mms_cat_fl_dict_total = {**mms_cat_frst_last_count, **mms_cat_containing_normal_mci_frst_count}
v = [sum(mms_cat_frst_last_count.values())] + [mms_cat_fl_dict_total[i] for i in l if i in mms_cat_fl_dict_total]
#v = [(i,i/sum(mms_cat_frst_last_count.values())*100) for i in v ]
print (l, len(l))
print (p, len(p))
print (v, len(v))

fig =go.Figure(go.Sunburst(
    labels=l,
    parents=p,
    values= v,
    hovertext=[(str(j)+' '+'Count: '+str(i), 'Percentage: '+str(i/sum(mms_cat_frst_last_count.values())*100)) for i,j in zip(v,l)],
    hoverinfo="text",
    branchvalues="total",
))

fig.update_layout(title=go.layout.Title(text='Figure'), margin = dict(t=100, l=0, r=100, b=0))
py.plot(fig,filename='plot_mms_cat_frst_last_sunbur.html')
fig.show()

In [None]:
# plot MMSE Category VS Episode Number for each patient   # lets select patient based on his intial condition
# sort categorical data 
from pandas.api.types import CategoricalDtype
cat_type = CategoricalDtype(categories=['Normal', 'Mild', 'Moderate', 'Severe'], ordered=True)

patient_cat_line_id = patient_com_treat_fea_raw_df[['patient_id', 'MINI_MENTAL_SCORE_CATEGORY', 'lineNumber']]
display(patient_cat_line_id.head(5))
patient_cat_line_id['MINI_MENTAL_SCORE_CATEGORY'] = patient_cat_line_id['MINI_MENTAL_SCORE_CATEGORY'].astype(cat_type)
patient_cat_line_id.sort_values(by=['MINI_MENTAL_SCORE_CATEGORY'], inplace=True)
patient_cat_line_id_pivot = patient_cat_line_id.pivot(index='lineNumber', columns='patient_id', values='MINI_MENTAL_SCORE_CATEGORY')
py.iplot([{
    'x': patient_cat_line_id_pivot.index,
    'y': patient_cat_line_id_pivot[col],
    'name': col
}  for col in patient_cat_line_id_pivot.columns], filename='./patient_cat_line_id_pivot')

In [None]:
patient_com_treat_fea_raw_df['durations(years)'] = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['EPISODE_DATE'].transform(lambda x: (x - x.iloc[0])/(np.timedelta64(1, 'D')*365.25))

patient_cat_dur_id = patient_com_treat_fea_raw_df[['patient_id', 'MINI_MENTAL_SCORE_CATEGORY', 'durations(years)']]
display(patient_cat_dur_id.head(5))
patient_cat_dur_id['MINI_MENTAL_SCORE_CATEGORY'] = patient_cat_dur_id['MINI_MENTAL_SCORE_CATEGORY'].astype(cat_type)
patient_cat_dur_id.sort_values(by=['MINI_MENTAL_SCORE_CATEGORY'], inplace=True)
patient_cat_dur_id_pivot = patient_cat_dur_id.pivot(index='durations(years)', columns='patient_id', values='MINI_MENTAL_SCORE_CATEGORY')

py.iplot([{
    'x': patient_cat_dur_id_pivot[col].dropna().index,
    'y': patient_cat_dur_id_pivot[col].dropna(),
    'name': col
}  for col in patient_cat_dur_id_pivot.columns], filename='./patient_cat_dur_id_pivot')


# import math 
# max_val  = math.ceil(max(patient_com_treat_fea_raw_df['durations(years)']))
# pd.cut(patient_com_treat_fea_raw_df['durations(years)'], bins=[-1,1,3,7, max_val], labels=[1,3,7, max_val]) #labels=['20s', '30s', '40s']

In [None]:
patient_com_treat_fea_raw_df['durations(years)'] = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['EPISODE_DATE'].transform(lambda x: (x - x.iloc[0])/(np.timedelta64(1, 'D')*365.25))
patient_com_treat_fea_raw_df['initial_state'] = patient_com_treat_fea_raw_df.groupby(by=['patient_id'])['MINI_MENTAL_SCORE_CATEGORY'].transform(lambda x: x.iloc[0])
display(patient_com_treat_fea_raw_df.head(5))

# select patient based on intial state
def plot_duration_mmse_category(df):
    patient_cat_dur_id = df[['patient_id', 'MINI_MENTAL_SCORE_CATEGORY', 'durations(years)']]
    display(patient_cat_dur_id.head(5))
    patient_cat_dur_id['MINI_MENTAL_SCORE_CATEGORY'] = patient_cat_dur_id['MINI_MENTAL_SCORE_CATEGORY'].astype(cat_type)
    patient_cat_dur_id.sort_values(by=['MINI_MENTAL_SCORE_CATEGORY'], inplace=True)
    #patient_cat_dur_id.replace(['Normal', 'Mild', 'Moderate', 'Severe'], [0,1,2,3], inplace=True)
    #display(patient_cat_dur_id.head(10))
    patient_cat_dur_id_pivot = patient_cat_dur_id.pivot(index='durations(years)', columns='patient_id', values='MINI_MENTAL_SCORE_CATEGORY')
    #print (patient_cat_dur_id_pivot[col].dropna())
    py.iplot([{
        'x': patient_cat_dur_id_pivot[col].dropna().index,
        'y': patient_cat_dur_id_pivot[col].dropna(),
        'name': col
    }  for col in patient_cat_dur_id_pivot.columns], filename='./patient_cat_dur_id_pivot')



# for normal patients
plot_duration_mmse_category(patient_com_treat_fea_raw_df[patient_com_treat_fea_raw_df['initial_state']=='Normal'])
plot_duration_mmse_category(patient_com_treat_fea_raw_df[patient_com_treat_fea_raw_df['initial_state']=='Mild'])
plot_duration_mmse_category(patient_com_treat_fea_raw_df[patient_com_treat_fea_raw_df['initial_state']=='Moderate'])
