In [None]:
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
    warnings.filterwarnings("ignore",category=FutureWarning)
    warnings.filterwarnings("ignore",category=UserWarning)
    import sklearn
    import h5py
    import tensorflow.keras

import numpy as np    
np.random.seed(1337) # for reproducibility

import tensorflow as tf 

import nltk
nltk.download('punkt') 
import codecs
import jellyfish
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.text import text_to_word_sequence
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Layer
from nltk import tokenize
from attention import AttLayer

# Set parameters:
max_features = 150000           # Maximum number of tokens in vocabulary
maxlen = 27                     # Maximum length of each sentence 
maxsents = 211                  # Maximum number of sentences    

# Defining the loss function as the conjugation of the binary cross-entropy (BCE) with the log-cosh Tvsersky loss (LCTL):
def LCTL(y_true, y_pred, beta):
    y_true = tf.compat.v1.layers.flatten(y_true)
    y_true = tf.math.round(y_true)
    y_pred = tf.compat.v1.layers.flatten(y_pred)
    numerator = tf.reduce_sum(y_true * y_pred, axis=-1)
    denominator = y_true * y_pred + beta * (1 - y_true) * y_pred + (1 - beta) * y_true * (1 - y_pred)
    loss =  1 - (numerator + 1) / (tf.reduce_sum(denominator, axis=-1) + 1)
    loss = K.log((K.exp(loss) + K.exp(-loss)) / 2.0)
    return loss

BCE = tf.keras.losses.BinaryCrossentropy()

def loss_function(alpha, beta):
    def loss(y_true, y_pred):
        return alpha * BCE(y_true, y_pred) + (1 - alpha) * LCTL(y_true, y_pred, beta)
    return loss

# Load dictionary
word_index = np.load('DICT.npy', allow_pickle=True).item()

# Load ICD to integer codes dictionary
le = np.load('LABEL_ENCODER.npy', allow_pickle=True).item()

print('Load model...')
model = load_model('model.h5', custom_objects = {"AttLayer": AttLayer, "loss": loss_function(alpha=0.9, beta=0.5)})

In [None]:
print('Loading data...')

texts_test = [ line.rstrip('\n') for line in codecs.open('example_test.txt', encoding="utf-8") ]    

# Discharge summaries
ds = []
for i in range (maxsents):
    ds.append([ line.split('<>')[i] for line in texts_test ])

print('Computing Testing Set...')

X_test = np.zeros((len(ds[0]), maxsents, maxlen), dtype = 'int32')

print('Loading discharge summaries...')

for m in range(maxsents):
    part = ds[m]
    for i, sentences in enumerate(part):
        sentences = tokenize.sent_tokenize( sentences )
        k = 0
        for j, sent in enumerate(sentences):
            wordTokens = text_to_word_sequence(sent)
            for _ , word in enumerate(wordTokens):
                if word_index.get(word) == None: 
                    aux = [(jellyfish.jaro_winkler(k,word),v) for k,v in word_index.items()]
                    if k < maxlen and max(aux)[1] < max_features:
                        X_test[i,m,k] = max(aux)[1]
                        k = k + 1
                else:
                    if k < maxlen and word_index.get(word) < max_features:
                        X_test[i,m,k] = word_index.get(word)
                        k = k + 1

In [None]:
print('X_test shape:', X_test.shape)

In [22]:
np.save('X_test.npy', X_test) 

In [None]:
print('Predicting...')

prediction_matrix = model.predict(X_test, batch_size=3)

y_pred = np.round(prediction_matrix)

In [24]:
np.save('prediction_matrix.npy', prediction_matrix)
np.save('y_pred.npy', y_pred)

In [None]:
print('Writing output...')

pred = []

for i in range (len(prediction_matrix)):
    pred_i = []
    for j in range (len(prediction_matrix[0])):
        if prediction_matrix[i][j]>0.5:
            pred_i.append(le.inverse_transform([j])[0])
    pred.append(pred_i)

np.savetxt('predictions.npy', pred, delimiter=" ", fmt="%s")