# **Dementia Patients -- Analysis and Prediction**
### ***Author : Akhilesh Vyas***
### ****Date : May, 2020****

- <a href='#00'>0. Setup </a>
    - <a href='#00.1'>0.1. Load libraries </a>
    - <a href='#00.2'>0.2. Define paths </a>

- <a href='#01'>1. Data Cleaning and Preprocessing </a>  
    
- <a href='#02'>2. Deep Neural Network Model</a>

- <a href='#03'>3. Result Analysis</a> 

# <a id='00'>0. Setup </a>

## <a id='#00.1'>0.1. Load libraries </a>

In [5]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)  
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import csv
import random
import re
import os
import datetime
import unicodedata
import codecs
from io import open
import itertools
import math
import time

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker


from sklearn.model_selection import train_test_split
from sklearn.model_selection import ParameterGrid

import pickle

import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
np.random.seed(1)

  """


## <a id='#00.2'>0.2. Define paths </a>

In [6]:
data_path = '../../../tib_data/'

# <a id='#01'>1. Data Cleaning and Preprocessing </a>

In [7]:
patient_df = pd.read_csv(data_path+'patient_df_rnn.csv')

#print (patient_df.isna().sum()/patient_df.shape[0])
patient_df = patient_df.loc[:, patient_df.isna().sum()/patient_df.shape[0]< 0.4]

patient_df['patient_index'] = patient_df.index+1
patient_ids = patient_df['patient_id']
patient_eps = patient_df['EPISODE_DATE']

patient_df = patient_df.drop(columns=['EPISODE_DATE', 'MINI_MENTAL_SCORE_PRE'])
patient_df = patient_df.groupby(by='patient_id').transform(lambda x: x.interpolate(method='ffill'))

patient_df['EPISODE_DATE'] = patient_eps
patient_df['patient_id'] = patient_ids

patient_df = patient_df.loc[:, patient_df.isna().sum()/patient_df.shape[0]< 0.2]

patient_df = patient_df.iloc[:, [-1,-2,-3]+ [i for i in range(0, len(patient_df.columns)-3)]]

patient_df = patient_df.fillna(-1)
patient_df.head(5)

Unnamed: 0,patient_id,EPISODE_DATE,patient_index,MINI_MENTAL_SCORE,ANXIETY/PHOBIC,CERBRO-VASCULAR_DISEASE_PRESENT,DEPRESSIVE_ILLNESS,DIAGNOSTIC_CODE,EST_OF_SEVERITY_OF_DEPRESSION,EST_SEVERITY_OF_DEMENTIA,PRIMARY_PSYCHIATRIC_DIAGNOSES,PETERSEN_MCI,PRESENT_STATE_13-46:_(016)_AGE_LEFT_SCHOOL,PRESENT_STATE_13-46:_(017)_YEARS_IN_FURTHER_EDUCATION,HISTORY_PATIENT_74-119:_(076)_HISTORY_OF_STROKE,PRESENT_STATE_13-46:_(036)_FEELING_DEPRESSED,INTERVIEWER_OBS_188-212:_(196)_DEPRESSED_MOOD,SEVERITY_OF_DEPRESSION,HISTORY_PATIENT_74-119:_(077)_HISTORY_OF_HEAD_INJURY,PRESENT_STATE_47-73:_(058)_MEMORY_PROBLEM,PRESENT_STATE_47-73:_(062)_DURATION_OF_MEMORY_PROBLEMS,PRESENT_STATE_47-73:_(063)_ONSET_OF_MEMORY_PROBLEMS,PRESENT_STATE_47-73:_(064)_CHANGE_IN_MEMORY_PROBLEMS,CAMCOG_REMOTE_MEMORY_SCORE,CAMCOG_RECENT_MEMORY_SCORE,CAMCOG_LEARNING_MEMORY_SCORE,COGNITIVE_EXAM_120-161:_COGNITIVE_EXAM_120-161,COGNITIVE_EXAM_120-161:_(120)_IDENTIFIES_DAY_OF_WEEK,COGNITIVE_EXAM_120-161:_(121)_IDENTIFIES_DATE,COGNITIVE_EXAM_120-161:_(122)_IDENTIFIES_MONTH,COGNITIVE_EXAM_120-161:_(123)_IDENTIFIES_YEAR,COGNITIVE_EXAM_120-161:_(124)_IDENTIFIES_SEASON,COGNITIVE_EXAM_120-161:_(125)_IDENTIFIES_COUNTY,COGNITIVE_EXAM_120-161:_(126)_IDENTIFIES_TOWN,COGNITIVE_EXAM_120-161:_(127)_IDENTIFIES_STREETS/COUNTRY,COGNITIVE_EXAM_120-161:_(128)_IDENTIFIES_FLOOR,COGNITIVE_EXAM_120-161:_(129)_IDENTIFIES_PRESENT_PLACE,COGNITIVE_EXAM_120-161:_(130)_COMPREHENDS_NOD,COGNITIVE_EXAM_120-161:_(131)_COMPREHENDS_TOUCH,COGNITIVE_EXAM_120-161:_(132)_COMPREHENDS_LOOK,COGNITIVE_EXAM_120-161:_(133)_COMPREHENDS_TAP,COGNITIVE_EXAM_120-161:_(134)_COMPREHENDS_HOTEL,COGNITIVE_EXAM_120-161:_(135)_COMPREHENDS_VILLAGE,COGNITIVE_EXAM_120-161:_(136)_COMPREHENDS_RADIO,COGNITIVE_EXAM_120-161:_(137)_IDENTIFIES_OBJECTS,COGNITIVE_EXAM_120-161:_(137)_IDENTIFIES_OBJECTS:_PENCIL,COGNITIVE_EXAM_120-161:_(137)_IDENTIFIES_OBJECTS:_WATCH,COGNITIVE_EXAM_120-161:_(138)_NAMES_PICTURES,COGNITIVE_EXAM_120-161:_(138)_NAMES_PICTURES:_SHOE,COGNITIVE_EXAM_120-161:_(138)_NAMES_PICTURES:_TYPEWRITER,COGNITIVE_EXAM_120-161:_(138)_NAMES_PICTURES:_SCALES,COGNITIVE_EXAM_120-161:_(138)_NAMES_PICTURES:_SUITCASE,COGNITIVE_EXAM_120-161:_(138)_NAMES_PICTURES:_BAROMETER,COGNITIVE_EXAM_120-161:_(138)_NAMES_PICTURES:_LAMP,COGNITIVE_EXAM_120-161:_(139)_NUMBER_OF_ANIMALS_LISTED,COGNITIVE_EXAM_120-161:_(139)_NUMBER_OF_ANIMALS_LISTED:_SCORE,COGNITIVE_EXAM_120-161:_(140)_DEFINES_HAMMER,COGNITIVE_EXAM_120-161:_(144)_REPETITION,COGNITIVE_EXAM_120-161:_(146)_RECALLS_OBJECTS,COGNITIVE_EXAM_120-161:_(146)_RECALLS_OBJECTS:_SHOE,COGNITIVE_EXAM_120-161:_(146)_RECALLS_OBJECTS:_TYPEWRITER,COGNITIVE_EXAM_120-161:_(146)_RECALLS_OBJECTS:_SCALES,COGNITIVE_EXAM_120-161:_(146)_RECALLS_OBJECTS:_SUITCASE,COGNITIVE_EXAM_120-161:_(146)_RECALLS_OBJECTS:_BAROMETER,COGNITIVE_EXAM_120-161:_(146)_RECALLS_OBJECTS:_LAMP,COGNITIVE_EXAM_120-161:_(147)_RECOGNISES_PICTURES:_SHOE,COGNITIVE_EXAM_120-161:_(147)_RECOGNISES_PICTURES:_SCALES,COGNITIVE_EXAM_120-161:_(147)_RECOGNISES_PICTURES:_BAROMETER,COGNITIVE_EXAM_120-161:_(148)_REMEMBERS_WW1_DATE,COGNITIVE_EXAM_120-161:_(149)_REMEMBERS_WW2_DATE,COGNITIVE_EXAM_120-161:_(150)_REMEMBERS_HITLER,COGNITIVE_EXAM_120-161:_(151)_REMEMBERS_STALIN,COGNITIVE_EXAM_120-161:_(152)_REMEMBERS_MAE_WEST,COGNITIVE_EXAM_120-161:_(153)_REMEMBERS_LINDBERGH,COGNITIVE_EXAM_120-161:_(154)_KNOWS_MONARCH,COGNITIVE_EXAM_120-161:_(155)_KNOWS_HEIR_TO_THRONE,COGNITIVE_EXAM_120-161:_(156)_KNOWS_PRIME_MINISTER,COGNITIVE_EXAM_120-161:_(157)_KNOWS_RECENT_NEWS_ITEM,COGNITIVE_EXAM_120-161:_(158)_REGISTERS_OBJECTS,COGNITIVE_EXAM_120-161:_(158)_REGISTERS_OBJECTS_1:_APPLE,COGNITIVE_EXAM_120-161:_(158)_REGISTERS_OBJECTS_3:_PENNY,COGNITIVE_EXAM_120-161:_(158)_REGISTERS_OBJECTS:_REPEATS,COGNITIVE_EXAM_120-161:_(159)_COUNTING_BACKWARDS,COGNITIVE_EXAM_120-161:_(161)_RECALLS_OBJECTS,COGNITIVE_EXAM_120-161:_(161)_RECALLS_OBJECTS_1:_APPLE,COGNITIVE_EXAM_120-161:_(161)_RECALLS_OBJECTS_2:_TABLE,COGNITIVE_EXAM_120-161:_(161)_RECALLS_OBJECTS_3:_PENNY,COGNITIVE_EXAM_162-187:_COGNITIVE_EXAM_162-187,COGNITIVE_EXAM_162-187:_(162)_READING_COMPREHENSION_1,COGNITIVE_EXAM_162-187:_(163)_READING_COMPREHENSION_2,COGNITIVE_EXAM_162-187:_(164)_DRAWS_PENTAGON,COGNITIVE_EXAM_162-187:_(165)_DRAWS_SPIRAL,COGNITIVE_EXAM_162-187:_(166)_DRAWS_HOUSE,COGNITIVE_EXAM_162-187:_(167)_CLOCK_DRAWING,COGNITIVE_EXAM_162-187:_(167)_CLOCK_DRAWING:_CIRCLE,COGNITIVE_EXAM_162-187:_(167)_CLOCK_DRAWING:_NUMBERS,COGNITIVE_EXAM_162-187:_(167)_CLOCK_DRAWING:_TIME,COGNITIVE_EXAM_162-187:_(168)_WRITES_A_SENTENCE,COGNITIVE_EXAM_162-187:_(169)_PRAXIS_-_PAPER,COGNITIVE_EXAM_162-187:_(169)_PRAXIS_-_PAPER:_RIGHT_HAND,COGNITIVE_EXAM_162-187:_(169)_PRAXIS_-_PAPER:_FOLDS,COGNITIVE_EXAM_162-187:_(169)_PRAXIS_-_PAPER:_ON_LAP,COGNITIVE_EXAM_162-187:_(170)_PRAXIS_-_ENVELOPE,COGNITIVE_EXAM_162-187:_(171)_DICTATION,COGNITIVE_EXAM_162-187:_(172)_MIME_-_WAVE,COGNITIVE_EXAM_162-187:_(173)_MIME_-_SCISSORS,COGNITIVE_EXAM_162-187:_(174)_MIME_-_BRUSHING_TEETH,COGNITIVE_EXAM_162-187:_(175)_IDENTIFIES_COIN,COGNITIVE_EXAM_162-187:_(176)_ADDS_UP_MONEY,COGNITIVE_EXAM_162-187:_(177)_SUBTRACTS_MONEY,COGNITIVE_EXAM_162-187:_(178)_RECALLS_ADDRESS,COGNITIVE_EXAM_162-187:_(178)_RECALLS_ADDRESS:_JOHN,COGNITIVE_EXAM_162-187:_(178)_RECALLS_ADDRESS:_BROWN,COGNITIVE_EXAM_162-187:_(178)_RECALLS_ADDRESS:_D42,COGNITIVE_EXAM_162-187:_(178)_RECALLS_ADDRESS:_WEST,COGNITIVE_EXAM_162-187:_(178)_RECALLS_ADDRESS:_BEDFORD,COGNITIVE_EXAM_162-187:_(179)_SIMILARITIES_-_FRUIT,COGNITIVE_EXAM_162-187:_(180)_SIMILARITIES_-_CLOTHING,COGNITIVE_EXAM_162-187:_(181)_SIMILARITIES_-_FURNITURE,COGNITIVE_EXAM_162-187:_(182)_SIMILARITIES_-_LIFE,COGNITIVE_EXAM_162-187:_(183)_RECOGNISES_FAMOUS_PEOPLE,COGNITIVE_EXAM_162-187:_(184)_RECOGNISES_OBJECTS,COGNITIVE_EXAM_162-187:_(184)_RECOGNISES_OBJECTS:_SPECTACLES,COGNITIVE_EXAM_162-187:_(184)_RECOGNISES_OBJECTS:_SHOE,COGNITIVE_EXAM_162-187:_(184)_RECOGNISES_OBJECTS:_PURSE,COGNITIVE_EXAM_162-187:_(184)_RECOGNISES_OBJECTS:_CUP,COGNITIVE_EXAM_162-187:_(184)_RECOGNISES_OBJECTS:_TELEPHONE,COGNITIVE_EXAM_162-187:_(184)_RECOGNISES_OBJECTS:_PIPE,COGNITIVE_EXAM_162-187:_(185)_RECOGNISE_PERSON,COGNITIVE_EXAM_162-187:_(187)_PATIENT,COGNITIVE_EXAM_162-187:_HANDED,COGNITIVE_IMPAIRMENT,PRESENT_STATE_47-73:_(050)_PHYSICAL_SYMPTOMS,PRESENT_STATE_47-73:_(057)_PHYSICAL_PROBLEMS,PHYSICAL_EXAM_213-234:_PHYSICAL_EXAM_213-234,PHYSICAL_EXAM_213-234:_(213)_BLOOD_PRESSURE,PHYSICAL_EXAM_213-234:_(213)_BLOOD_PRESSURE:_SYSTOLIC,PHYSICAL_EXAM_213-234:_(213)_BLOOD_PRESSURE:_DIASTOLIC,PHYSICAL_EXAM_213-234:_(215)_TENDON_REFLEXES,PHYSICAL_EXAM_213-234:_(216)_PLANTAR_REFLEXES,PHYSICAL_EXAM_213-234:_(217)_HEMIPARESIS,PHYSICAL_EXAM_213-234:_(218)_GAIT,PHYSICAL_EXAM_213-234:_(219)_MOBILITY,PHYSICAL_EXAM_213-234:_(220)_DEAFNESS,PHYSICAL_EXAM_213-234:_(221)_VISUAL_DEFECT,PHYSICAL_EXAM_213-234:_(222)_TREMOR,PHYSICAL_EXAM_213-234:_(223)_MANUAL_DIFFICULTY,PHYSICAL_EXAM_213-234:_(224)_ABNORMAL_EYE_MOVEMENTS,PHYSICAL_EXAM_213-234:_(225)_SHORTNESS_OF_BREATH,PHYSICAL_EXAM_213-234:_(226)_FULL_BLOOD_COUNT,PHYSICAL_EXAM_213-234:_(227)_B12_OR_FOLATE,PHYSICAL_EXAM_213-234:_(228)_THYROID_FUNCTION_TESTS,PHYSICAL_EXAM_213-234:_(229)_UREA_AND_ELECTROLYTES,PHYSICAL_EXAM_213-234:_(230)_SKULL_XRAY_OR_SPECT_SCAN,PHYSICAL_EXAM_213-234:_(231)_LIVER_FUNCTION_TESTS,PHYSICAL_EXAM_213-234:_(232)_CT_OR_MRI_SCAN,PHYSICAL_EXAM_213-234:_(233)_VDRL,PHYSICAL_EXAM_213-234:_(234)_CAUSES_OF_DEMENTIA_EXCLUDED,PHYSICAL_EXAM_213-234:_SUBJECT_ON_MEDICATION,PRESENT_STATE_47-73:_(049)_ANXIOUS,PRESENT_STATE_47-73:_(051)_ANXIOUS_SITUATIONS,INTERVIEWER_OBS_188-212:_(195)_ANXIOUS_OR_FEARFUL,smoker,age,gender_Female,gender_Male,apoe_E2E2,apoe_E2E3,apoe_E2E4,apoe_E3E3,apoe_E3E4,apoe_E4E4,apoe_TBD,apoe__E3E3,durations(years)
0,1,1998-01-13,1,30,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,9.0,9.0,6,4,14,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,4.0,1.0,1.0,3.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,0.0,2.0,3.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,5.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,40.0,9.0,0.0,0.0,0.0,2.0,0.0,138.0,84.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,1.0,1.0,1.0,0.0,1.0,71.0,0,1,0,0,0,1,0,0,0,0,0.0
1,1,2000-01-20,2,30,1.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,18.0,2.0,0.0,0.0,0.0,9.0,1.0,1.0,888.0,8.0,1.0,6,4,14,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,18.0,4.0,1.0,1.0,3.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,0.0,2.0,3.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,25.0,9.0,0.0,0.0,0.0,0.0,9.0,138.0,84.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,1.0,1.0,0.0,0.0,1.0,73.019178,0,1,0,0,0,1,0,0,0,0,2.019178
2,1,2002-02-21,3,30,1.0,9.0,0.0,0.0,0.0,0.0,9.0,0.0,18.0,2.0,0.0,0.0,0.0,9.0,1.0,1.0,888.0,8.0,1.0,-1,-1,-1,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,2.0,1.0,1.0,6.0,9.0,9.0,9.0,9.0,9.0,9.0,18.0,4.0,9.0,1.0,3.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,3.0,1.0,1.0,0.0,9.0,3.0,1.0,1.0,1.0,3.0,1.0,9.0,1.0,9.0,9.0,4.0,9.0,9.0,9.0,1.0,3.0,1.0,1.0,1.0,9.0,9.0,9.0,9.0,9.0,1.0,9.0,9.0,5.0,9.0,9.0,9.0,9.0,9.0,2.0,2.0,2.0,2.0,2.0,6.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,25.0,9.0,0.0,0.0,0.0,0.0,9.0,138.0,84.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,1.0,1.0,0.0,0.0,1.0,75.109589,0,1,0,0,0,1,0,0,0,0,4.109589
3,1,2002-10-28,4,29,1.0,9.0,0.0,0.0,0.0,0.0,9.0,0.0,18.0,2.0,0.0,0.0,0.0,9.0,1.0,1.0,888.0,8.0,1.0,-1,-1,-1,3.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,2.0,1.0,1.0,6.0,9.0,9.0,9.0,9.0,9.0,9.0,18.0,4.0,9.0,1.0,3.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,3.0,1.0,1.0,0.0,9.0,3.0,1.0,1.0,1.0,3.0,1.0,9.0,1.0,9.0,9.0,4.0,9.0,9.0,9.0,1.0,3.0,1.0,1.0,1.0,9.0,9.0,9.0,9.0,9.0,1.0,9.0,9.0,5.0,9.0,9.0,9.0,9.0,9.0,2.0,2.0,2.0,2.0,2.0,6.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,25.0,9.0,0.0,0.0,0.0,0.0,9.0,138.0,84.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,1.0,1.0,0.0,0.0,1.0,75.791781,0,1,0,0,0,1,0,0,0,0,4.791781
4,1,2004-06-22,5,30,1.0,9.0,0.0,0.0,0.0,0.0,9.0,0.0,18.0,2.0,0.0,0.0,0.0,9.0,1.0,1.0,888.0,8.0,1.0,-1,-1,-1,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,2.0,1.0,1.0,6.0,9.0,9.0,9.0,9.0,9.0,9.0,18.0,4.0,9.0,1.0,3.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,3.0,1.0,1.0,0.0,9.0,3.0,1.0,1.0,1.0,3.0,1.0,9.0,1.0,9.0,9.0,4.0,9.0,9.0,9.0,1.0,3.0,1.0,1.0,1.0,9.0,9.0,9.0,9.0,9.0,1.0,9.0,9.0,5.0,9.0,9.0,9.0,9.0,9.0,2.0,2.0,2.0,2.0,2.0,6.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,25.0,9.0,0.0,0.0,0.0,2.0,9.0,138.0,84.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,0.0,1.0,0.0,0.0,1.0,77.443836,0,1,0,0,0,1,0,0,0,0,6.443836


In [8]:
# pairs 
patient_indx_MMS = patient_df.groupby(['patient_id'])['patient_index', 'MINI_MENTAL_SCORE'].agg(lambda x : x.tolist())
patient_indx_MMS['count'] = patient_indx_MMS['patient_index'].apply(lambda x: len(x))



Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



# <a id='#02'>2. Deep Neural Network Model</a>

In [9]:
MAX_LENGTH = 10
USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
print (device)
PAD_token = 6512

cpu


In [10]:
pid_list = patient_indx_MMS['patient_index'].values
mmse_list = patient_indx_MMS['MINI_MENTAL_SCORE'].values
pairs = [[pid, mmse] if len(pid) <= 10 else [pid[0:10], mmse[0:10]] for pid, mmse  in zip(pid_list,mmse_list)]
pairs[0:10]

def tensorsFromPair(pair):
    input_tensor = torch.tensor(pair[0], dtype=torch.long, device=device).view(-1, 1)
    target_tensor = torch.tensor(pair[1], dtype=torch.float, device=device).view(-1, 1)
    return (input_tensor, target_tensor)

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))


def showPlot(points1, points2, file_name_suf):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=20)
    ax.yaxis.set_major_locator(loc)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.plot(points1, label='train')
    plt.plot(points2, label='val')
    ax.legend()
    plt.savefig('./result/'+'train_val_batch'+file_name_suf+'.png')
    plt.close()
    

def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

def outputVar(indexes_batch):
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.BoolTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns padded input sequence tensor and lengths
def inputVar(indexes_batch):
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths


# Returns all items for a given batch of pairs
def batch2TrainData(pair_batch):
    #print (pair_batch)
    pair_batch.sort(key=lambda x: len(x[0]), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch)
    output, mask, max_target_len = outputVar(output_batch)
    return inp, lengths, output, mask, max_target_len

def maskLoss(criterion,inp, target, mask):
    nTotal = mask.sum()
    #diff2 = (torch.flatten(inp) - torch.flatten(target)) ** 2.0 * torch.flatten(mask)
    #print (inp.shape, target.shape, mask.shape)
    #diff2 = torch.abs((torch.flatten(inp) - torch.flatten(target))) * torch.flatten(mask)
    #loss1 = torch.sum(diff2) / torch.sum(mask)
    loss  = criterion(inp, target.view(-1,1)).squeeze(1).masked_select(mask).mean()
    #print (loss1, loss)
    return loss, nTotal.item()


def readpicklefile(file):
    with open(file, 'rb') as f:
        file_obj = pickle.load(f)
    return file_obj


In [None]:
class LSTMModel(nn.Module):
    def __init__(self, hidden_size, output_size, embedding_weight, dropout=0.0):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        #self.dropout = dropout
        self.embedding = nn.Embedding.from_pretrained(embedding_weight)
        #self.embedding_dropout = nn.Dropout(dropout)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input)
        #output = self.embedding_dropout(output)
        # print(output.dtype, hidden.dtype)
        # output = F.relu(output) # May be not converging due to this 
        # print(output.dtype, hidden.dtype)
        outputs, hidden = self.lstm(embedded, hidden)
        #print (outputs.shape)
        output = self.out(outputs[0])
        return output, hidden

    def initHidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device), torch.zeros(1, batch_size, self.hidden_size, device=device))

In [None]:
def train(input_variable, lengths, target_variable, mask,  max_target_len, lstm_model,
                    lstm_model_optimizer, criterion, batch_size, clip, max_length=MAX_LENGTH):
    lstm_model_optimizer.zero_grad()
    
     # Set device options
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    
    # Initialize variables
    loss = 0
    print_losses = []
    n_totals = 0
    
    # hidden state
    lstm_model_hidden = lstm_model.initHidden(batch_size)

    for di in range(max_target_len):
        # print (input_tensor[di].dtype, lstm_model_hidden.dtype)
        lstm_model_output, lstm_model_hidden  = lstm_model(
            input_variable[di].view(1, -1), lstm_model_hidden)
        #print (lstm_model_output.dtype, target_tensor[di].dtype)
        #print (lstm_model_output, target_variable[di], mask[di])
        # print (lstm_model_output.shape, target_variable[di].shape, mask[di].shape)
        
        mask_loss, nTotal = maskLoss(criterion,lstm_model_output, target_variable[di], mask[di])
        #print (mask_loss, nTotal)
        loss += mask_loss
        print_losses.append(mask_loss.item() * nTotal)
        n_totals += nTotal
        # loss += criterion(lstm_model_output[0], target_tensor[di], mask[di])
        #print (loss.requires_grad)
        #print ('train:',loss.dtype)
    
    loss.backward()
    
    _ = nn.utils.clip_grad_norm_(lstm_model.parameters(), clip)
    
    lstm_model_optimizer.step()

    return sum(print_losses)/ n_totals  # need to be changed

def evalu(input_variable, lengths, target_variable, mask,  max_target_len, lstm_model,
                criterion, batch_size=1, max_length=MAX_LENGTH):
    with torch.no_grad():
        # Set device options
        input_variable = input_variable.to(device)
        lengths = lengths.to(device)
        target_variable = target_variable.to(device)
        mask = mask.to(device)
        
        # Initialize variables
        lstm_model_hidden = lstm_model.initHidden(batch_size)
        loss = 0
        print_losses = []
        n_totals = 0
        
        # Prediction
        output = []
        o_act = []
        o_pre = []
        
        for di in range(max_target_len):
            # print (input_tensor[di].dtype, lstm_model_hidden.dtype)
            lstm_model_output, lstm_model_hidden  = lstm_model(
                input_variable[di].view(1, -1), lstm_model_hidden)
            #print (lstm_model_output.dtype, target_tensor[di].dtype)
            #print (lstm_model_output, target_variable[di], mask[di])
            # print (lstm_model_output.shape, target_variable[di].shape, mask[di].shape)

            mask_loss, nTotal = maskLoss(criterion,lstm_model_output, target_variable[di], mask[di])
            #print (mask_loss, nTotal)
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
            
            o_pre.append(target_variable[di].view(1, -1).flatten().cpu().numpy().tolist())
            o_act.append(lstm_model_output.flatten().cpu().numpy().tolist())
        o_act = np.array(o_act).flatten().tolist()
        o_pre = np.array(o_pre).flatten().tolist()
        output.append((o_act,o_pre))

    return (sum(print_losses)/ n_totals, output)

In [None]:
def trainIters(train_pairs, val_pairs, lstm_model, n_iters, print_every, plot_every, batch_size, clip, learning_rate, optimser):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    
    plot_loss_val = []

    lstm_model_optimizer = optimser(lstm_model.parameters(), lr=learning_rate)
    
    
    # need to define loss function
    # criterion = nn.MSELoss()
    # criterion = nn.SmoothL1Loss
    criterion = nn.L1Loss(reduce=False)
    

    for iter in range(1, n_iters+1):
        
        batches = batch2TrainData([random.choice(train_pairs) for _ in range(batch_size)])
        input_variable, lengths, target_variable, mask, max_target_len = batches
    
        '''print (input_variable)
        print (lengths)
        print (mask)
        print (max_target_len)
        #print (target_variable)'''
        
        loss = train(input_variable, lengths, target_variable, mask,  max_target_len, lstm_model,
                    lstm_model_optimizer, criterion, batch_size, clip)
        
        #print ('trainIters:',loss.dtype)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            total_val_loss = 0
            lstm_model.eval()
            output_total = []
            val_size = len(val_pairs)
            
            for i in range(0, val_size):
                batches_val = batch2TrainData([val_pairs[i]])
                input_variable_val, lengths_val, target_variable_val, mask_val, max_target_len_val = batches_val
                loss1, output = evalu(input_variable_val, lengths_val, target_variable_val, mask_val,  max_target_len_val, lstm_model,
                criterion)
                total_val_loss+=loss1
                output_total.append(output)
            
            print_val_loss_avg = total_val_loss / val_size
            
            print('%s (%d %d%%) %.4f %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg, print_val_loss_avg))
            
            plot_loss_val.append(print_val_loss_avg)
            
            lstm_model.train()      

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
            
            
    return (plot_losses, plot_loss_val, output_total)

In [12]:
def listgen(pkkk):
    lnth = len(pkkk)
    i = 0
    while i < lnth:
        yield pkkk[i]
        i +=1

def act_prd_list_func(file_o):
    act_prd_l=[]
    for i,j in enumerate(listgen(file_o)):
        act_prd_l.append(list(j[0]))
    return act_prd_l


def diff_act_prd_func(output_total, file_name_suf):
    with open('./result/'+'output_total_batch'+file_name_suf+'.pkl', 'wb') as f:
        pickle.dump(output_total, f)
        
    act_l = []
    prd_l = []
    for i,j in enumerate(listgen(output_total)):
        for k,l in zip(j[0][0], j[0][1]):
            act_l.append(k)
            prd_l.append(l)
    return [math.fabs(i-j) for i, j in zip(act_l, prd_l)]

def plot_hist(diff_act_prd, file_name_suf):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.hist(diff_act_prd, label='Absolute difference between True and Predicted MMSE Value')
    ax.set_title('Histogram absolute difference between True and Predicted MMSE Value')
    plt.ylabel('count')
    plt.xlabel('value')
    fig.savefig('./result/'+'abs_pre_true_batch'+file_name_suf+'.jpeg')
    
    

hidden_size = 171
output_size = 1
token_size = (1,171)
epoch = 40

#token_emb_init, clip, batch_size, learning rate,
param_grid = {'batch_size':[16], 'clip':[10], 'learning_rate':[0.001], #clip can be [5,10]
              'optimser':[optim.Adam], 'pad_token_init':[torch.zeros]}



for params in list(ParameterGrid(param_grid)):
    
    train_pairs, val_pairs = train_test_split(pairs, test_size=0.2, random_state=42)
    
    print ('Dataset Size:', len(train_pairs), len(val_pairs))
    
    with open('./result/'+'train'+'.pkl', 'wb') as f:
        pickle.dump(train_pairs, f)
        
    with open('./result/'+'test'+'.pkl', 'wb') as f:
        pickle.dump(val_pairs, f)
        
    break
    
    print (params)
    pad_token_init = params['pad_token_init'](token_size, )
    batch_size = params['batch_size']
    clip = params['clip']
    learning_rate = params['learning_rate']
    optimser = params['optimser']
    
    # embedding Matrix
    embedding_weight = torch.cat((torch.from_numpy(patient_df.iloc[:, 4:].values).float(), pad_token_init), dim=0)
    # embedding_weight = F.normalize(torch.from_numpy(patient_df.iloc[:, 4:].values).float(), p=2, dim=1, eps=1e-12, out=None)
    print(embedding_weight.shape)

    lstm_model = LSTMModel(hidden_size, output_size, embedding_weight, dropout=0.0).to(device)
    #trainIters(lstm_model, 3, print_every=1)
    plot_losses, plot_loss_val, output_total = trainIters(train_pairs, val_pairs, lstm_model, int(len(train_pairs)/batch_size)*epoch, 
                                                          int(len(train_pairs)/batch_size), int(len(train_pairs)/batch_size), 
                                                          batch_size, clip, learning_rate, optimser)
    
    #plot_losses, plot_loss_val, output_total = trainIters(train_pairs, val_pairs, lstm_model, 1, 
    #                                                      1, 1, 
    #                                                      batch_size, clip, learning_rate, optimser)
    
    file_name_suf = '_{}_{}_{}_{}_{}'.format(params['pad_token_init'].__name__, batch_size, clip, learning_rate, optimser)
    diff_act_prd = diff_act_prd_func(output_total, file_name_suf)
    act_prd_list_func(output_total, file_name_suf)
    plot_hist(diff_act_prd, file_name_suf)
    showPlot(plot_losses, plot_loss_val, file_name_suf)
    print ('diff_act_prd', len(diff_act_prd))
    

Dataset Size: 822 206


# <a id='#03'>3. Result Analysis</a>

### Plotting Graph, Patientid, Duration and Predicted MMSE diferences.

In [127]:
# getting Predicted MMSE values and indexes from test data
test_file_l = readpicklefile('./result/test.pkl')
output_file_ob = readpicklefile('./result/result_08_06_MAE/output_total_batch_zeros_16_10_0.001_<class \'torch.optim.adam.Adam\'>.pkl')
act_prd_l = act_prd_list_func(output_file_ob)

patient_index_l = [j for i in test_file_l for j in i[0]]
act_mmse_l = [j for i in test_file_l for j in i[1]]
pred_mmse_l = [j for i in act_prd_l for j in i[0]]
act_mmse_l_l = [j for i in act_prd_l for j in i[1]]
print (patient_index_l[0:20])
print (act_mmse_l[0:20])
print (pred_mmse_l[0:20]) 
print (act_mmse_l_l[0:20])

test_df = pd.DataFrame(data={'patient_index':patient_index_l, 'MINI_MENTAL_SCORE_PRED':pred_mmse_l})
print (test_df.head(10), test_df.shape)


patient_df_id_in_mm_du = patient_df[['patient_id', 'patient_index', 'MINI_MENTAL_SCORE', 'durations(years)']]
print (patient_df_id_in_mm_du.head(10))

test_df_id_in_mm_du = pd.merge(patient_df_id_in_mm_du, test_df, on=['patient_index'])
test_df_id_in_mm_du['True_Minus_Pred_MMSE']=round(test_df_id_in_mm_du['MINI_MENTAL_SCORE']-test_df_id_in_mm_du['MINI_MENTAL_SCORE_PRED'],2)
test_df_id_in_mm_du['durations(years)']=round(test_df_id_in_mm_du['durations(years)'],2)
p
print(test_df_id_in_mm_du.head(10), test_df_id_in_mm_du.shape)

[2826, 2827, 3523, 3524, 3525, 3526, 3527, 2558, 2559, 2560, 693, 694, 695, 696, 697, 698, 2809, 2789, 2790, 2791]
[19, 19, 15, 15, 6, 12, 8, 29, 27, 30, 24, 22, 24, 25, 22, 23, 16, 16, 6, 3]
[23.709699630737305, 24.34981918334961, 20.649702072143555, 19.9171142578125, 16.923812866210938, 15.171982765197754, 12.018853187561035, 24.69800567626953, 28.36417007446289, 23.593219757080078, 22.705934524536133, 22.71784210205078, 19.79880142211914, 19.079936981201172, 18.891691207885742, 16.19797706604004, 26.465221405029297, 4.199371337890625, 3.019397497177124, 2.0817935466766357]
[19, 19, 15, 15, 6, 12, 8, 29, 27, 30, 24, 22, 24, 25, 22, 23, 16, 16, 6, 3]
   patient_index  MINI_MENTAL_SCORE_PRED
0  2826           23.709700             
1  2827           24.349819             
2  3523           20.649702             
3  3524           19.917114             
4  3525           16.923813             
5  3526           15.171983             
6  3527           12.018853             
7  2558     

NameError: name 'p' is not defined

In [128]:
test_df_id_in_mm_du_pivot = test_df_id_in_mm_du.pivot(index='patient_id', columns='durations(years)', values='True_Minus_Pred_MMSE')
test_df_id_in_mm_du_pivot.interpolate(method='bfill', axis=0, limit_area='inside', inplace=True)
patient_ids = ["P_ID:"+str(i) for i in test_df_id_in_mm_du_pivot.index.values]

line_patients = test_df_id_in_mm_du_pivot.columns.values

mms_values = test_df_id_in_mm_du_pivot.values

fig = go.Figure(data=go.Heatmap(
        z=mms_values,
        x=line_patients,
        y=patient_ids,
        colorscale='Viridis', 
        colorbar={"len":0.3, "y":0.8, "title":"True(-)Predicted MMSE", 'titleside':'right'},
        showscale=True),
        layout=go.Layout(width=1200, height=1500, title='Patient True and Predicted MMMSE value difference ',xaxis_nticks=30,
                         xaxis={"title": "Duration(year)"},
                         yaxis={"title": "Patient_ids", "tickvals":patient_ids, "ticktext":patient_ids,"tickfont": {"size": 6}, "tickangle": -10}))


#fig.show()

py.plot(fig,filename='plot_MMSE_difference.html')

'plot_MMSE_difference.html'

In [136]:
test_df_id_in_mm_du_grby_pat_id = test_df_id_in_mm_du.groupby(by=['patient_id'])['durations(years)', 'MINI_MENTAL_SCORE', 'MINI_MENTAL_SCORE_PRED', 'True_Minus_Pred_MMSE']\
                       .aggregate(lambda x : x.tolist())

test_df_id_in_mm_du_grby_pat_id['count_episode'] = test_df_id_in_mm_du_grby_pat_id['durations(years)'].apply(lambda x: len(x))
test_df_id_in_mm_du_grby_pat_id['True_Minus_Pred_MMSE_max'] = test_df_id_in_mm_du_grby_pat_id['True_Minus_Pred_MMSE'].apply(lambda x: max(x))
test_df_id_in_mm_du_grby_pat_id.head(5)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,durations(years),MINI_MENTAL_SCORE,MINI_MENTAL_SCORE_PRED,True_Minus_Pred_MMSE,count_episode,True_Minus_Pred_MMSE_max
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
16,"[0.0, 1.08, 2.19, 3.15, 3.97, 5.27]","[28, 25, 28, 25, 24, 26]","[24.568830490112305, 29.621824264526367, 29.491886138916016, 29.240150451660156, 29.17577362060547, 26.16006851196289]","[3.43, -4.62, -1.49, -4.24, -5.18, -0.16]",6,3.43
19,"[0.0, 0.5, 0.97, 1.54, 2.13, 2.65, 3.12, 3.64, 4.16]","[12, 14, 15, 5, 7, 6, 0, 0, 0]","[22.46804428100586, 19.070417404174805, 13.2672700881958, 9.009045600891113, 5.489011287689209, 4.158153533935547, 3.831190347671509, -0.07018500566482544, 1.3400731086730957]","[-10.47, -5.07, 1.73, -4.01, 1.51, 1.84, -3.83, 0.07, -1.34]",9,1.84
60,"[0.0, 0.78, 1.15, 1.65, 2.3, 2.59, 3.42]","[13, 15, 20, 13, 17, 13, 10]","[13.984481811523438, 13.641024589538574, 12.154195785522461, 12.043076515197754, 10.432819366455078, 8.622781753540039, 6.379458427429199]","[-0.98, 1.36, 7.85, 0.96, 6.57, 4.38, 3.62]",7,7.85
171,"[0.0, 1.18, 2.18, 3.12, 4.15, 5.14, 6.18]","[28, 30, 26, 29, 25, 27, 21]","[24.65878677368164, 28.350662231445312, 30.207979202270508, 30.12527847290039, 28.767669677734375, 28.402015686035156, 26.994075775146484]","[3.34, 1.65, -4.21, -1.13, -3.77, -1.4, -5.99]",7,3.34
220,"[0.0, 1.06, 1.23, 1.98, 2.58, 3.99, 5.01, 6.04]","[29, 27, 29, 30, 28, 30, 29, 28]","[28.3316707611084, 30.302600860595703, 29.842884063720703, 29.603294372558594, 29.592130661010742, 29.556106567382812, 29.117385864257812, 27.62526512145996]","[0.67, -3.3, -0.84, 0.4, -1.59, 0.44, -0.12, 0.37]",8,0.67


In [170]:
test_df_id_in_mm_du_grby_pat_id.loc[8703]

durations(years)            [0.0, 1.04]                            
MINI_MENTAL_SCORE           [22, 22]                               
MINI_MENTAL_SCORE_PRED      [22.809377670288086, 24.09530258178711]
True_Minus_Pred_MMSE        [-0.81, -2.1]                          
count_episode               2                                      
True_Minus_Pred_MMSE_max   -0.81                                   
Name: 8703, dtype: object

In [177]:
durations_years_list = test_df_id_in_mm_du_grby_pat_id['durations(years)'].values.tolist()
true_minus_pred_mmse_list = test_df_id_in_mm_du_grby_pat_id['True_Minus_Pred_MMSE'].values.tolist()
mmse_list = test_df_id_in_mm_du_grby_pat_id['MINI_MENTAL_SCORE'].values.tolist()
mmse_pre_list = test_df_id_in_mm_du_grby_pat_id['MINI_MENTAL_SCORE_PRED'].values.tolist()

patient_id_unique = test_df_id_in_mm_du['patient_id'].unique()


def create_bubble_tarce(x, y, marker_size, zx, name):
    trace = go.Scatter(
    x=x, y=y,
    mode='markers',
    marker_size=[abs(i)*2 for i in marker_size], 
    name='pid: ' + str(name),
    hovertext=['MMSE_Diff:'+str(i)+'<br>'+'MMSE_PRE:'+str(j) for i,j in zip(marker_size, zx)])
    return trace

def create_bar_trace(x, y, name):
    trace = go.Bar(x=x, y=y, width=0.08, name='pid: ' + str(name))
    return trace
    
    
def plot_stacked_bar(total_pat, rows, cols, width, height, title_text, x_title, y_title, X, Y):
    
    fig = make_subplots(rows=rows, cols=cols, x_title=x_title, y_title=y_title)
    traces = [create_bar_trace(X[i],Y[i], patient_id_unique[i]) for i in range(total_pat)]
    rows_cols_list = [(i,j) for i in range(1,rows+1) for j in range(1,cols+1)]
    [fig.append_trace(trace, rows_cols_list[k][0], rows_cols_list[k][1]) for k, trace in enumerate(traces)]
    fig.update_layout(height=height, width=width, title_text=title_text)
    py.plot(fig,filename='mmse_diff_bar.html')
    
def plot_stacked_bubble(total_pat, rows, cols, width, height, title_text, x_title, y_title, X, Y, Z, Zx):
    
    fig = make_subplots(rows=rows, cols=cols, x_title=x_title, y_title=y_title)
    traces = [create_bubble_tarce(X[i], Y[i], Z[i], Zx[i], patient_id_unique[i]) for i in range(total_pat)]
    rows_cols_list = [(i,j) for i in range(1,rows+1) for j in range(1,cols+1)]
    [fig.append_trace(trace, rows_cols_list[k][0], rows_cols_list[k][1]) for k, trace in enumerate(traces)]
    fig.update_traces(mode='markers')
    fig.update_layout(height=height, width=width, title_text=title_text)
    py.plot(fig,filename='mmse_diff_bubble.html')

total_pat = len(test_df_id_in_mm_du.patient_id.unique())
col = 6
rows = int(total_pat/col) if total_pat%col==0 else int(total_pat/col+1)



#plot_stacked_bar(total_pat, rows, col, 1000, 5000, "Side By Side MMSE_diff Subplots for Patients", 
#                 'Duration(years)', 'MMSE_Difference(True-Predicted)', durations_years_list, true_minus_pred_mmse_list)

plot_stacked_bubble(total_pat, rows, col, 1200, 4000, "Side By Side Bubble Subplots for Patients", 
                 'DURATION(years)', 'TRUE_MMSE_SCORE', durations_years_list, mmse_list, true_minus_pred_mmse_list, mmse_pre_list)

In [179]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create Subplots
fig = make_subplots(rows=2, cols=2)

fig.add_trace(go.Scatter(x=[2, 6], y=[1,1]), row=1, col=1)
fig.add_trace(go.Bar(x=[1,2,3], y=[4,5,6]), row=1, col=2)
fig.add_trace(go.Scatter(x=[10,20], y=[40,50]), row=2, col=1)
fig.add_trace(go.Bar(x=[11,13,15], y=[8,11,20]), row=2, col=2)

# Add shapes
'''fig.update_layout(
    shapes=[
        dict(type="line", xref="x1", yref="y1",
            x0=3, y0=0.5, x1=5, y1=0.8, line_width=3),
        dict(type="rect", xref="x2", yref='y2',
             x0=4, y0=2, x1=5, y1=6),
        dict(type="rect", xref="x3", yref="y3",
             x0=10, y0=20, x1=15, y1=30),
        dict(type="circle", xref="x4", yref="y4",
             x0=5, y0=12, x1=10, y1=18)])'''
py.plot(fig,filename='test.html')

'test.html'