In [None]:
### Tools and Packages
##Basics
import pandas as pd
import numpy as np
import sys, random
import math
try:
    import cPickle as pickle
except:
    import pickle
import string
import re
import os
import time
from tqdm import tqdm

## ML and Stats 
from sklearn import datasets, linear_model
from sklearn.linear_model import LinearRegression
import sklearn.metrics as m
import sklearn.linear_model  as lm
import lifelines#.estimation import KaplanMeierFitter
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.tree import export_graphviz
import statsmodels.formula.api as sm
import patsy
from scipy import stats
from termcolor import colored


## Visualization
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
%matplotlib inline
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.tools as tls
import plotly.graph_objs as go
from plotly.graph_objs import *
from IPython.display import HTML

## DL Framework
import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.autograd import Variable
from torch import optim

###GPU enabling and device allocation
use_cuda = torch.cuda.is_available()
#torch.cuda.set_device(1) ## uncomment if you need to specify specific GPU

#use_cuda=False ## uncomment if you need explicitly to not use GPU

from importlib import reload

### import pytorch ehr files
#import sys
#sys.path.insert(0, '../ehr_pytorch')

import pytorch_ehr_3.models as model 
from pytorch_ehr_3.EHRDataloader import EHRdataloader
from pytorch_ehr_3.EHRDataloader import EHRdataFromLoadedPickles as EHRDataset
import pytorch_ehr_3.utils as ut 
from pytorch_ehr_3.EHREmb import EHREmbeddings


### Data Prepartion

In [None]:
### Read the header of data_preprocess_v5.py for more information

!python data_preprocess_v5.py data_tab_delimited.txt label_tab_delimited.txt medicalcode_mapping_to_token_file.types output_folder/file_prefix NA


### Data Loading

In [None]:
### load Data
train_sl= pickle.load(open('output_folder/file_prefix.combined.train', 'rb'), encoding='bytes')
valid_sl= pickle.load(open('output_folder/file_prefix.combined.valid', 'rb'), encoding='bytes')
test_sl= pickle.load(open('output_folder/file_prefix.combined.test', 'rb'), encoding='bytes')

types_d=pickle.load(open('output_folder/file_prefix.types', 'rb'), encoding='bytes')
types_d_rev = dict(zip(types_d.values(),types_d.keys()))
new_input_size=max(types_d_rev.keys())+1


### Pretrained Models Loading

In [None]:
### Load our models
## Based on the pytorch version, you may face an error loading the model directly using torch.load,
## therefore added the except section to initiate the model and then populate the paramters from the state dictionary
try:
    mort_model = torch.load('CRWD_Pretrained_Models/CovRNN_iMort_v552.pth')
    vent_model = torch.load('CRWD_Pretrained_Models/CovRNN_mVent_v552.pth')
    plos_model = torch.load('CRWD_Pretrained_Models/CovRNN_pLOS_v552.pth')
    mort_surv_model = torch.load('CRWD_Pretrained_Models/CovRNN_iMort_Surv_v552.pth')
    vent_surv_model = torch.load('CRWD_Pretrained_Models/CovRNN_mVent_Surv_v552.pth')
    
except:
    mort_model = model.EHR_RNN([123641], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True )
    mort_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_iMort_v552.st'))

    vent_model = model.EHR_RNN([123641], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True )
    vent_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_mVent_v552.st'))

    plos_model = model.EHR_RNN([123641], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True )
    plos_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_pLOS_v552.st'))

    mort_surv_model = model.EHR_RNN([123641], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True , surv=True)
    mort_surv_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_iMort_Surv_v552.st'))

    vent_surv_model = model.EHR_RNN([123641], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True , surv=True)
    vent_surv_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_mVent_Surv_v552.st'))


#### For fine-tuning and as we will add new vocab to the embedding layer

mort_model.embed.weight = nn.Parameter(torch.cat((mort_model.embed.weight, torch.zeros(new_input_size-123641, loaded_ehr_model.embed_dim))))
vent_model.embed.weight = nn.Parameter(torch.cat((mort_model.embed.weight, torch.zeros(new_input_size-123641, loaded_ehr_model.embed_dim))))
plos_model.embed.weight = nn.Parameter(torch.cat((mort_model.embed.weight, torch.zeros(new_input_size-123641, loaded_ehr_model.embed_dim))))
mort_surv_model.embed.weight = nn.Parameter(torch.cat((mort_model.embed.weight, torch.zeros(new_input_size-123641, loaded_ehr_model.embed_dim))))
vent_surv_model.embed.weight = nn.Parameter(torch.cat((mort_model.embed.weight, torch.zeros(new_input_size-123641, loaded_ehr_model.embed_dim))))

    
if use_cuda:
    mort_model.cuda()
    vent_model.cuda()
    plos_model.cuda()
    vent_surv_model.cuda()
    mort_surv_model.cuda()


### Model Fine-tuning

In [None]:
import fine_tune_utils as ft

In [None]:
[train_mbs,valid_mbs,test_mbs]=ft.load_mbs_var([train_sl, valid_sl,test_sl],packpadmode=True,bs=128)

train_auc_allep_m,valid_auc_allep_m,test_auc_allep_m,RNN_f_label_m,RNN_f_score_m=run_dl_model(mort_model,train_mbs,valid_mbs,[test_mbs],'CovRNN_mort_finetuned.pth','CovRNN_mort_finetuned.st',wmodel='RNN',packpadmode=True,task='mort')
train_auc_allep_v,valid_auc_allep_v,test_auc_allep_v,RNN_f_label_v,RNN_f_score_v=run_dl_model(vent_model,train_mbs,valid_mbs,[test_mbs],'CovRNN_vent_finetuned.pth','CovRNN_vent_finetuned.st',wmodel='RNN',packpadmode=True,task='vent')
train_auc_allep_p,valid_auc_allep_p,test_auc_allep_p,RNN_f_label_p,RNN_f_score_p=run_dl_model(plos_model,train_mbs,valid_mbs,[test_mbs],'CovRNN_plos_finetuned.pth','CovRNN_plos_finetuned.st',wmodel='RNN',packpadmode=True,task='plos')
train_auc_allep_ms,valid_auc_allep_ms,test_auc_allep_ms,RNN_f_label_ms,RNN_f_score_ms=run_dl_model_surv(mort_surv_model,train_mbs,valid_mbs,[test_mbs],'CovRNN_mort_surv_finetuned.pth','CovRNN_mort_surv_finetuned.st',wmodel='RNN',packpadmode=True,task='mort_surv')
train_auc_allep_vs,valid_auc_allep_vs,test_auc_allep_vs,RNN_f_label_vs,RNN_f_score_vs=run_dl_model_surv(vent_surv_model,train_mbs,valid_mbs,[test_mbs],'CovRNN_vent_surv_finetuned.pth','CovRNN_vent_surv_finetuned.st',wmodel='RNN',packpadmode=True,task='vent_surv')


In [None]:
## Although you can use the RNN_f_label_x created above as the true label and the RNN_f_score_x as the predicted score
## you can also run pt_predictions in a similar way 
mort_model_finetuned = torch.load('CovRNN_mort_finetuned.pth')
vent_model_finetuned = torch.load('CovRNN_vent_finetuned.pth')
plos_model_finetuned = torch.load('CovRNN_plos_finetuned.pth')
mort_surv_model_finetuned = torch.load('CovRNN_mort_surv_finetuned.pth')
vent_surv_model_finetuned = torch.load('CovRNN_vent_surv_finetuned.pth')

mort_model_finetuned.eval()
vent_model_finetuned.eval()
plos_model_finetuned.eval()
vent_surv_model_finetuned.eval()
mort_surv_model_finetuned.eval()

newData_preds_afterFinetune=ft.pt_predictions(test_sl,mort_model_finetuned,mort_surv_model_finetuned,vent_model_finetuned
                                ,vent_surv_model_finetuned,plos_model_finetuned)

In [None]:
newData_preds_afterFinetune

In [None]:
newData_preds_afterFinetune.to_csv('newData_preds_afterFinetune_v1.csv',index=False)
