The first 3 cells are for Colab use only: get access to drive, unzip the repo and install packages.

In [None]:
## Uncomment if using google colab
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
## Download zip file from Github and unzip in google colab
#import zipfile
#with zipfile.ZipFile("CovRNN-main.zip","r") as zip_ref:
#    zip_ref.extractall("CovRNN-test")

#%cd drive/My Drive/Colab Notebooks/CovRNN-test/CovRNN-main/Pretrained_Models_usage

In [None]:
## Install required packages on colab
#! pip install lifelines
#! pip install statsmodels

In [None]:
### Tools and Packages
##Basics
import pandas as pd
import numpy as np
import sys, random
import math
try:
    import cPickle as pickle
except:
    import pickle
import string
import re
import os
import time
from tqdm import tqdm

## ML and Stats 
from sklearn import datasets, linear_model
from sklearn.linear_model import LinearRegression
import sklearn.metrics as m
import sklearn.linear_model  as lm
import lifelines
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.tree import export_graphviz
import statsmodels.formula.api as sm
import patsy
from scipy import stats
from termcolor import colored


## Visualization
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
%matplotlib inline
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.tools as tls
import plotly.graph_objs as go
from plotly.graph_objs import *
from IPython.display import HTML

## DL Framework
import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.autograd import Variable
from torch import optim

###GPU enabling and device allocation
use_cuda = torch.cuda.is_available()
#torch.cuda.set_device(1) ## uncomment if you need to specify specific GPU

#use_cuda=False ## uncomment if you need explicitly to not use GPU

from importlib import reload

### import pytorch ehr files
#import sys
#sys.path.insert(0, '../ehr_pytorch')

import pytorch_ehr_3.models as model 
from pytorch_ehr_3.EHRDataloader import EHRdataloader
from pytorch_ehr_3.EHRDataloader import EHRdataFromLoadedPickles as EHRDataset
import pytorch_ehr_3.utils as ut 
from pytorch_ehr_3.EHREmb import EHREmbeddings


### Data Prepartion

In [None]:
### Read the header of data_preprocess_v4.py for more information

!python data_preprocess_v5.py sample_data.txt sample_label.txt CRWD_Pretrained_Models/lr_inhosp_outcome_pred_v1.types output_withoutFT/file_prefix nosplit


### Data Loading

In [None]:
### load Data
test_sl= pickle.load(open('output_withoutFT/file_prefix.combined.all', 'rb'), encoding='bytes')
len(test_sl)

In [None]:
#### In order to avoid any errors from patients who had new medical codes, that were not used during the pretraining
## you may need to exclude those patients from your test set, using some statement like:

test_sl_n=[]
for x in test_sl:
       if (max(max(x[-1], key=lambda xmb: max(xmb[1]))[1]))<123642 : test_sl_n.append(x)
        
### make sure, that you replace all test_sl below with the new test_sl_n

In [None]:
### Load our models
## Based on the pytorch version, you may face an error loading the model directly using torch.load,
## therefore added the except section to initiate the model and then populate the paramters from the state dictionary
try:
    mort_model = torch.load('CRWD_Pretrained_Models/CovRNN_iMort_v552.pth')
    vent_model = torch.load('CRWD_Pretrained_Models/CovRNN_mVent_v552.pth')
    plos_model = torch.load('CRWD_Pretrained_Models/CovRNN_pLOS_v552.pth')
    mort_surv_model = torch.load('CRWD_Pretrained_Models/CovRNN_iMort_Surv_v552.pth')
    vent_surv_model = torch.load('CRWD_Pretrained_Models/CovRNN_mVent_Surv_v552.pth')
    
except:
    mort_model = model.EHR_RNN([123642], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True , surv=True)
    mort_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_iMort_v552.st'))

    vent_model = model.EHR_RNN([123642], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True , surv=True)
    vent_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_mVent_v552.st'))

    plos_model = model.EHR_RNN([123642], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True , surv=True)
    plos_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_pLOS_v552.st'))

    mort_surv_model = model.EHR_RNN([123642], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True , surv=True)
    mort_surv_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_iMort_Surv_v552.st'))

    vent_surv_model = model.EHR_RNN([123642], embed_dim=64, hidden_size=64, n_layers=1, dropout_r=0., cell_type='GRU', bii=False , time=True , surv=True)
    vent_surv_model.load_state_dict(torch.load('CRWD_Pretrained_Models/state_dicts/CovRNN_mVent_Surv_v552.st'))


if use_cuda:
    mort_model.cuda()
    vent_model.cuda()
    plos_model.cuda()
    vent_surv_model.cuda()
    mort_surv_model.cuda()

mort_model.eval()
vent_model.eval()
plos_model.eval()
vent_surv_model.eval()
mort_surv_model.eval()

def pt_predictions(test_set):
    with torch.no_grad():
        pt_preds=[]
        for pt in test_set:
            #print(pt)
            pt_id=pt[0]
            pt_ds = EHRDataset([pt],sort= True, model='RNN')
            #print(pt_ds)
            pt_m = list(EHRdataloader(pt_ds, batch_size = 1, packPadMode = True,multilbl=True))
            #print(len(pt_m[0]))
            x1, label,seq_len,time_diff = pt_m[0]
            if use_cuda:
                label=label.cpu().squeeze().numpy()          
                mort_score = mort_model(x1,seq_len,time_diff).cpu().numpy()
                mort_surv_score = mort_surv_model(x1,seq_len,time_diff).cpu().numpy()
                vent_score = vent_model(x1,seq_len,time_diff).cpu().numpy()
                vent_surv_score = vent_surv_model(x1,seq_len,time_diff).cpu().numpy()
                plos_score = plos_model(x1,seq_len,time_diff).cpu().numpy()
            else:  
                label=label.squeeze().numpy()
                mort_score = mort_model(x1,seq_len,time_diff).numpy()
                mort_surv_score = mort_surv_model(x1,seq_len,time_diff).numpy()
                vent_score = vent_model(x1,seq_len,time_diff).numpy()
                vent_surv_score = vent_surv_model(x1,seq_len,time_diff).numpy()
                plos_score = plos_model(x1,seq_len,time_diff).numpy()
            pt_preds.append([pt_id,label[0],label[1],mort_score,mort_surv_score,label[2],label[3],vent_score,vent_surv_score,label[5],plos_score])
    
    pt_preds_df= pd.DataFrame(pt_preds)
    pt_preds_df.columns=['pt','mort_label','mort_tte','mort_prob','mort_logHF','vent_label','vent_tte','vent_prob','vent_logHF','plos_label','plos_prob']
    return pt_preds_df


In [None]:
newData_preds=pt_predictions(test_sl_n)

In [None]:
newData_preds

In [None]:
newData_preds.to_csv('newData_preds_v1.csv',index=False)
