In [1]:
import numpy as np  
import pandas as pd 
import re           
from bs4 import BeautifulSoup 
from keras.preprocessing.text import Tokenizer 
from keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords   
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import warnings
pd.set_option("display.max_colwidth", 200)
warnings.filterwarnings("ignore")

Using TensorFlow backend.


# Read Data

In [2]:
data=pd.read_csv("hot_topics_data_100K.csv")

# Only select the first 1000 to try the model

In [3]:
data=data.iloc[:1000,]

# Preprocess data

In [4]:
#Drop duplicates
data.drop_duplicates(subset=['FINDINGS'],inplace=True)  #dropping duplicates
data.dropna(axis=0,inplace=True)

In [5]:
data.shape

(993, 3)

In [6]:
#drop all the unwanted symbols, characters, etc. from the text that do not affect the objective of our problem.
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",

                           "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",

                           "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",

                           "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",

                           "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",

                           "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have","it's": "it is", "let's": "let us", "ma'am": "madam",

                           "mayn't": "may not", "might've": "might have","mightn't": "might not","mightn't've": "might not have", "must've": "must have",

                           "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have","o'clock": "of the clock",

                           "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have",

                           "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is",

                           "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have","so's": "so as",

                           "this's": "this is","that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would",

                           "there'd've": "there would have", "there's": "there is", "here's": "here is","they'd": "they would", "they'd've": "they would have",

                           "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have",

                           "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are",

                           "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are",

                           "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is",

                           "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have",

                           "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have",

                           "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all",

                           "y'all'd": "you all would","y'all'd've": "you all would have","y'all're": "you all are","y'all've": "you all have",

                           "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",

                           "you're": "you are", "you've": "you have"}

In [7]:
stop_words = set(stopwords.words('english')) 
def text_cleaner(text):
    newString = text.lower()
    newString = BeautifulSoup(newString, "lxml").text
    newString = re.sub(r'\([^)]*\)', '', newString)
    newString = re.sub('"','', newString)
    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])    
    newString = re.sub(r"'s\b","",newString)
    newString = re.sub("[^a-zA-Z]", " ", newString) 
    tokens = [w for w in newString.split() if not w in stop_words]
    long_words=[]
    for i in tokens:
        if len(i)>=3:                  #removing short word
            long_words.append(i)   
    return (" ".join(long_words)).strip()

cleaned_text = []
for t in data['FINDINGS']:
    cleaned_text.append(text_cleaner(t))

In [8]:
def summary_cleaner(text):
    newString = re.sub('"','', text)
    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])    
    newString = re.sub(r"'s\b","",newString)
    newString = re.sub("[^a-zA-Z]", " ", newString)
    newString = newString.lower()
    tokens=newString.split()
    newString=''
    for i in tokens:
        if len(i)>1:                                 
            newString=newString+i+' '  
    return newString

#Call the above function
cleaned_summary = []
for t in data['IMPRESSION']:
    cleaned_summary.append(summary_cleaner(t))

data['cleaned_text']=cleaned_text
data['cleaned_summary']=cleaned_summary
data['cleaned_summary'].replace('', np.nan, inplace=True)
data.dropna(axis=0,inplace=True)

In [9]:
#Add the START and END special tokens at the beginning and end of the summary
data['cleaned_summary'] = data['cleaned_summary'].apply(lambda x : 'sostok '+ x + ' eostok')

In [10]:
#analyze the length of the reviews and the summary to get an overall idea about the distribution of length of the text
import matplotlib.pyplot as plt
text_word_count = []
summary_word_count = []

# populate the lists with sentence lengths
for i in data['cleaned_text']:
      text_word_count.append(len(i.split()))

for i in data['cleaned_summary']:
      summary_word_count.append(len(i.split()))

length_df = pd.DataFrame({'text':text_word_count, 'summary':summary_word_count})
length_df.hist(bins = 30)
plt.show()

<Figure size 640x480 with 2 Axes>

In [11]:
#From above histogram we define
max_len_text=250 
max_len_summary=80

In [12]:
from sklearn.model_selection import train_test_split
x_tr,x_val,y_tr,y_val=train_test_split(data['cleaned_text'],data['cleaned_summary'],test_size=0.1,random_state=0,shuffle=True) 


In [13]:
#prepare a tokenizer for reviews on training data
x_tokenizer = Tokenizer()
x_tokenizer.fit_on_texts(list(x_tr))

#convert text sequences into integer sequences
x_tr    =   x_tokenizer.texts_to_sequences(x_tr) 
x_val   =   x_tokenizer.texts_to_sequences(x_val)

#padding zero upto maximum length
x_tr    =   pad_sequences(x_tr,  maxlen=max_len_text, padding='post') 
x_val   =   pad_sequences(x_val, maxlen=max_len_text, padding='post')

x_voc_size   =  len(x_tokenizer.word_index) +1

In [14]:
#preparing a tokenizer for summary on training data 
y_tokenizer = Tokenizer()
y_tokenizer.fit_on_texts(list(y_tr))

#convert summary sequences into integer sequences
y_tr    =   y_tokenizer.texts_to_sequences(y_tr) 
y_val   =   y_tokenizer.texts_to_sequences(y_val) 

#padding zero upto maximum length
y_tr    =   pad_sequences(y_tr, maxlen=max_len_summary, padding='post')
y_val   =   pad_sequences(y_val, maxlen=max_len_summary, padding='post')

y_voc_size  =   len(y_tokenizer.word_index) +1

In [15]:
from keras import backend as K 

from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import warnings


K.clear_session()

latent_dim = 500
embedding_dim=200
max_text_len=250 
#max_len_summary=80

# Encoder
encoder_inputs = Input(shape=(max_text_len,))

#embedding layer
enc_emb =  Embedding(x_voc_size, embedding_dim,trainable=True)(encoder_inputs)

#encoder lstm 1
encoder_lstm1 = LSTM(latent_dim,return_sequences=True,return_state=True)
encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)

#encoder lstm 2
encoder_lstm2 = LSTM(latent_dim,return_sequences=True,return_state=True)

encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)

#encoder lstm 3
encoder_lstm3=LSTM(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, state_h, state_c= encoder_lstm3(encoder_output2)

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))

#embedding layer
dec_emb_layer = Embedding(y_voc_size, embedding_dim,trainable=True)
dec_emb = dec_emb_layer(decoder_inputs)

decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True,dropout=0.4,recurrent_dropout=0.2)
decoder_outputs,decoder_fwd_state, decoder_back_state = decoder_lstm(dec_emb,initial_state=[state_h, state_c])

#dense layer
decoder_dense =  TimeDistributed(Dense(y_voc_size, activation='softmax'))
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model 
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 250)]        0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 250, 200)     774200      input_1[0][0]                    
__________________________________________________________________________________________________
lstm (LSTM)                     [(None, 250, 500), ( 1402000     embedding[0][0]                  
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None)]       0                                            
______________________________________________________________________________________________

In [16]:
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')

In [17]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=2)

In [18]:
history=model.fit([x_tr,y_tr[:,:-1]], y_tr.reshape(y_tr.shape[0],y_tr.shape[1], 1)[:,1:], callbacks=[es],epochs=25,batch_size=64, validation_data=([x_val,y_val[:,:-1]], y_val.reshape(y_val.shape[0],y_val.shape[1], 1)[:,1:]))

Train on 893 samples, validate on 100 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [19]:
reverse_target_word_index=y_tokenizer.index_word 
reverse_source_word_index=x_tokenizer.index_word 
target_word_index=y_tokenizer.word_index

In [20]:
# Encode the input sequence to get the feature vector
encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c])

# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_hidden_state_input = Input(shape=(max_text_len,latent_dim))

# Get the embeddings of the decoder sequence
dec_emb2= dec_emb_layer(decoder_inputs) 
# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])

# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_outputs2 = decoder_dense(decoder_outputs2) 

# Final decoder model
decoder_model = Model(
    [decoder_inputs] + [decoder_hidden_state_input, decoder_state_input_h, decoder_state_input_c],
    [decoder_outputs2] + [state_h2, state_c2])

In [21]:
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    e_out, e_h, e_c = encoder_model.predict(input_seq)
    
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    
    # Populate the first word of target sequence with the start word.
    target_seq[0, 0] = target_word_index['sostok']

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
      
        output_tokens, h, c = decoder_model.predict([target_seq] + [e_out, e_h, e_c])

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_token = reverse_target_word_index[sampled_token_index]
        
        if(sampled_token!='eostok'):
            decoded_sentence += ' '+sampled_token

        # Exit condition: either hit max length or find stop word.
        if (sampled_token == 'eostok'  or len(decoded_sentence.split()) >= (80-1)):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index

        # Update internal states
        e_h, e_c = h, c

    return decoded_sentence

In [45]:
e_out, e_h, e_c=encoder_model.predict(x_tr[4].reshape(1,max_text_len))

In [46]:
[target_seq] + [e_out, e_h, e_c]

[array([[3.]]),
 array([[[-0.00215942, -0.0012729 ,  0.00221718, ...,  0.00210001,
           0.00251522,  0.00107004],
         [-0.01443351,  0.0016368 ,  0.00877004, ...,  0.00874334,
           0.00795149,  0.00175459],
         [-0.05629646,  0.01900551,  0.02334377, ...,  0.02191937,
           0.01630341,  0.00449389],
         ...,
         [ 0.00186641,  0.97853106, -0.85139674, ..., -0.2945873 ,
          -0.9760944 ,  0.00651355],
         [ 0.00186641,  0.97853106, -0.85139674, ..., -0.29458734,
          -0.9760944 ,  0.00651355],
         [ 0.00186641,  0.97853106, -0.85139674, ..., -0.29458734,
          -0.9760944 ,  0.00651355]]], dtype=float32),
 array([[ 1.86641444e-03,  9.78531063e-01, -8.51396739e-01,
         -2.01415271e-03,  4.48722579e-03, -2.72983313e-03,
          2.16037151e-03,  2.98018940e-03,  3.09461087e-01,
          2.52694613e-03,  7.95588493e-01,  6.55769836e-04,
          4.05759364e-02,  1.35561591e-03, -2.04600804e-02,
         -3.26213002e-01, -9

In [43]:
# Generate empty target sequence of length 1.
target_seq = np.zeros((1,1))
    
    # Populate the first word of target sequence with the start word.
target_seq[0, 0] = target_word_index['sostok']
decoder_model.predict([target_seq] + [e_out, e_h, e_c])

[array([[[1.21241119e-05, 2.02287876e-04, 6.38994155e-03, ...,
          6.80696758e-06, 4.98498048e-05, 1.12586295e-05]]], dtype=float32),
 array([[ 1.85633644e-01,  3.15331697e-01, -3.73987615e-01,
         -6.36181176e-01,  5.21464348e-01, -8.46351013e-02,
          9.87593889e-01,  9.95265663e-01,  7.82894433e-01,
          5.18841147e-01,  4.34813023e-01,  4.76337522e-01,
         -3.35504174e-01,  7.22437680e-01, -4.99901026e-01,
         -6.42068207e-01, -9.93036449e-01,  2.85810679e-01,
          3.44508708e-01, -5.25260150e-01, -3.95626456e-01,
         -1.05348960e-01,  5.30796885e-01,  6.93649650e-01,
          1.00242466e-01,  4.93848294e-01,  5.95312953e-01,
          1.67777747e-01, -4.83431727e-01, -9.06609297e-01,
          1.00781068e-01,  9.76369977e-01, -4.24500108e-01,
         -8.60313237e-01,  4.90910918e-01, -3.20214987e-01,
         -1.37936041e-01, -6.87173367e-01, -9.37492788e-01,
         -7.81402215e-02, -4.52462167e-01,  4.22538161e-01,
          3.17284197

In [22]:
def seq2summary(input_seq):
    newString=''
    for i in input_seq:
        if((i!=0 and i!=target_word_index['sostok']) and i!=target_word_index['eostok']):
            newString=newString+reverse_target_word_index[i]+' '
    return newString

def seq2text(input_seq):
    newString=''
    for i in input_seq:
        if(i!=0):
            newString=newString+reverse_source_word_index[i]+' '
    return newString

In [23]:
for i in range(0,100):
    print("Review:",seq2text(x_tr[i]))
    print("Original summary:",seq2summary(y_tr[i]))
    print("Predicted summary:",decode_sequence(x_tr[i].reshape(1,max_text_len)))
    print("\n")

Review: abdomen lung bases significant abnormality noted liver biliary tract liver cirrhotic morphology large heterogeneous hepatic mass segment mass effect adjacent vasculature poorly defined difficult accurately measure least dimension right left main portal veins opacified compatible thrombosis questionable enhancement portal vein thrombus raising question tumor infiltration vessels main portal vein smv splenic veins remain patent scattered additional subcentimeter hepatic hypodensities nonspecific prominent gastrohepatic lymph nodes identified spleen significant abnormality notedpancreas significant abnormality notedadrenal glands significant abnormality notedkidneys ureters significant abnormality notedretroperitoneum lymph nodes small prominent lymph nodes seen porta hepatis region bowel mesentery focal wall thickening cecum proximal ascending colon moderate surrounding fluid inflammation focus adjacent air appears extra luminal suggestive contained focal microperforation scatter

Predicted summary:  no evidence of metastatic disease


Review: chest lungs pleura several calcified granulomas present significantly changed compared prior mediastinum hila stable thyroid nodule atherosclerosis present aortic arch coronary arteries chest wall right central venous access port terminates right atrium stable breast nodules described previously correlate mammogram abdomen liver biliary tract biliary cysts unchanged sludge versus stones gallbladder noted spleen significant abnormality noted pancreas significant abnormality noted adrenal glands significant abnormality noted kidneys ureters subcentimeter hypoattenuating foci present kidneys bilaterally small characterize may represent cysts retroperitoneum lymph nodes extensive atherosclerosis present aorta branches stable ivc filter cava remains atretic filter bowel mesentery significant abnormality notedbones soft tissues significant abnormality notedother prominent collateral vessels present abdominal wall pelvis noted pr

Predicted summary:  no evidence of metastatic disease


Review: csf spaces appropriate patient stated age midline shift mild degree periventricular subcortical punctate hyperintense white matter lesions present identified flair images unchanged interval new lesions posterior fossa lesions patient status post suboccipital craniotomy chiari unchanged appearancemild gyral asymmetry parietal lobes stable may simply represent developmental asymmetry redemonstration small focus susceptibility effect right centrum semi ovale associated central flair signal hyperintensity unchanged appearance consistent developmental venous anomaly may associated small cavernous malformationno abnormal mass lesions appreciated intracranially intracranial hemorrhage identified edema identified within brain parenchyma normal vascular flow voids present distal carotid vertebral arteries basilar artery proximal anterior middle posterior cerebral arteries well internal cerebral veins superior sagittal sinus visuali

Predicted summary:  no evidence of metastatic disease


Review: study limited due lack contrast abdomen lung bases dependent atelectasis lung bases cardiomegaly liver biliary tract significant abnormality notedspleen significant abnormality notedpancreas significant abnormality notedadrenal glands significant abnormality notedkidneys ureters bilateral numerous hypodense focal lesions kidneys especially right kidney lesions measure fluid density lack contrast limits optimal evaluation lesions retroperitoneum lymph nodes significant abnormality noted bowel mesentery significant abnormality noted bones soft tissues significant abnormality notedother significant abnormality notedpelvis prostate seminal vesicles significant abnormality notedbladder significant abnormality notedlymph nodes significant abnormality notedbowel mesentery significant abnormality notedbones soft tissues significant abnormality notedother significant abnormality noted 
Original summary: limited study due to lack of

Predicted summary:  no evidence of metastatic disease


Review: sternotomy fixation hardware mediastinal clips pacer leads unchanged mediastinal drains removed cardiomediastinal silhouette stable lung volumes low small left pleural effusion seen linear interstitial airspace opacities 
Original summary: interval removal of mediastinal drains otherwise accounting for inflation stable examination with left pleural effusion bilateral edema and atelectasis 
Predicted summary:  no evidence of metastatic disease


Review: left tube tip stomach stable cardiomediastinal silhouette minimal basilar scarring discoid atelectasis similar prior exam new pulmonary opacities identified pleural effusions 
Original summary: dobbhoff tube with its tip in the stomach stable cardiopulmonary appearance without acute abnormality no specific evidence of infection 
Predicted summary:  no evidence of metastatic disease


Review: since prior exam lateral third ventricles significantly increased size fourth ventri

Predicted summary:  no evidence of metastatic disease


Review: minimal right apical pneumothorax lung margin millimeters chest wall mild right lower lung opacity compatible atelectasis small effusion pacemaker leads expected locations 
Original summary: minimal right pneumothorax 
Predicted summary:  no evidence of metastatic disease


Review: chest lungs pleura scattered micronodules calcified compatible previous infection small focal scars bilaterally suspicious nodules mediastinum hila mildly enlarged right hilar subcarinal lymph nodes unchanged likely benign pericardial effusion coronary artery calcification moderate chest wall degenerative disease spine abdomen absence enteric contrast material limits sensitivity abdominal pathology liver biliary tract multiple hepatic cysts spleen significant abnormality noted adrenal glands partially calcified slightly nodular right adrenal gland unchanged kidneys ureters right renal exophytic cyst pancreas significant abnormality noted retrope

Predicted summary:  no evidence of metastatic disease


Review: chest lungs pleura status post left lower lobectomy evidence tumor recurrence resection margin stable right left costophrenic angle solid nodules new suspicious pulmonary mass nodule scattered pleural parenchymal scarring noted moderate severe upper lobe predominant centrilobular paraseptal emphysema basilar predominant subpleural reticulation consistent interstitial fibrosis appearing similar prior study pleural effusions mediastinum hila subcentimeter left thyroid lobe nodule moderate atherosclerotic disease thoracic aorta left common carotid artery narrowing least stable prominent mediastinal hilar lymph nodes hilar lymphadenopathy normal cardiac size without pericardial effusion coronary artery calcification moderate chest wall degenerative changes spine axillary lymphadenopathy abdomen absence enteric contrast material limits sensitivity abdominal pathology liver biliary tract stable hepatic hypodensities likely benig

Predicted summary:  no evidence of metastatic disease


Review: foot radiographic abnormality moderate pes planus deformity observed lateral weightbearing projection ankle minimal questionable soft tissue swelling please correlate physical exam additional new abnormalities minimal degenerative changes ankle specifically talotibial articulation acute abnormality malalignment calcaneus additional new radiographic abnormality 
Original summary: moderate pes planus alignment deformity with minimal degenerative changes are seen involving the ankle see detail provided 
Predicted summary:  no evidence of metastatic disease


Review: ventricles sulci cisterns symmetric unremarkable gray white matter differentiation normal mass effect edema midline shift intra extra axial fluid collection acute hemorrhage osseous structures unremarkable paranasal sinuses mastoid air cells clear 
Original summary: no acute intracranial abnormality or skull fracture no evidence of otitis media or mastoiditis 
Pre

Predicted summary:  no evidence of metastatic disease


Review: chest motion artifact lung bases somewhat limits evaluation lungs pleura trace left pleural effusion intraluminal debris trachea small peripheral wedge shaped groundglass opacity right upper lobe adjacent major fissure may represent aspiration although infarct also consideration bilateral lower lobe bronchial impaction associated basilar atelectasis aspiration mild paraseptal centrilobular emphysema right apical bulla calcified micronodules consistent healed granulomatous disease noncalcified micronodules unchanged likely also post inflammatory mediastinum hila multichamber cardiomegaly interval insertion lvad fluid collection around drive line exits left anterior abdominal wall pericardial effusion icd leads terminating right atrial appendage coronary sinus right ventricle severe coronary calcification postsurgical findings cabg mild enlarged mediastinal lymph nodes likely reactive chest wall bilateral gynecomastia median

Predicted summary:  no evidence of metastatic disease


Review: chest lungs pleura plaque seen waning patchy opacities consistent recurrent aspiration evidence metastases calcified granulomata noted mediastinum hila left tracheobronchial lymph node unchanged image series scattered lymph nodes nonenlarged heart size pericardium unremarkable calcified mediastinal hilar lymph nodes prior granulomatous disease coronary artery calcification severe chest wall hypertrophic degenerative abnormalities affect thoracic spine abdomen absence enteric contrast material limits sensitivity abdominal pathology liver biliary tract significant abnormality noted spleen significant abnormality noted adrenal glands significant abnormality noted kidneys ureters significant abnormality noted pancreas significant abnormality noted retroperitoneum lymph nodes significant abnormality noted bowel mesentery absence enteric contrast material limits sensitivity pathology gross abnormalities noted bones soft tissues 

Predicted summary:  no evidence of metastatic disease


Review: evidence acute ischemic hemorrhagic lesion patchy low attenuations bilateral periventricular white matter centrum semiovale indicating non specific small vessel disease change since prior exam ventricles sulci cisterns symmetric unremarkable mass effect edema midline shift intra extra axial fluid collection acute hemorrhage osseous structures unremarkable paranasal sinuses mastoid air cells clear 
Original summary: no evidence of acute ischemic or hemorrhagic lesion non specific small vessel ischemic disease no change since prior exam 
Predicted summary:  no evidence of metastatic disease


Review: targeted left ultrasound performed palpable area concern left outer breast large simple fluid collection seen site patient concern compatible postoperative seroma suspicious solid mass identified 
Original summary: large simple fluid collection compatible with seroma no suspicious solid mass the patient should return for annual 

In [59]:
x_

Unnamed: 0.1,Unnamed: 0,FINDINGS,IMPRESSION,cleaned_text,cleaned_summary
0,651769,There is diffusion restriction present involving the right parietal lobe including superior and inferior parietal lobules as well as some of the adjacent right temporal lobe at the posterior aspec...,Subacute infarction involving the right posterior parietal and angular artery territories as well as part of the callosomarginal artery territory.Is a hemorrhagic focus present along the right orb...,diffusion restriction present involving right parietal lobe including superior inferior parietal lobules well adjacent right temporal lobe posterior aspect right superior temporal gyrus posterior ...,_START_ subacute infarction involving the right posterior parietal and angular artery territories as well as part of the callosomarginal artery territory is hemorrhagic focus present along the rig...
1,679548,"Over the interval, previously demonstrated moderate effacement of the basal cisterns has progressed. As before, there is a large suprasellar meningioma demonstrating heterogeneous attenuation with...","1.Over the interval, previously demonstrated moderate effacement of the basal cisterns has progressed. 2.Stable hyperdense foci within a large suprasellar from presumed acute intratumoral hemorrha...",interval previously demonstrated moderate effacement basal cisterns progressed large suprasellar meningioma demonstrating heterogeneous attenuation areas hyperdensity presumed acute hemorrhage alo...,_START_ over the interval previously demonstrated moderate effacement of the basal cisterns has progressed stable hyperdense foci within large suprasellar from presumed acute intratumoral hemorrha...
2,572558,PULMONARY ARTERIES: Diagnostic quality examination for PE -- no evidence of pulmonary embolism.DVT: Nondiagnostic scan for DVT due to poor opacification of the deep venous system of the lower extr...,"1.No evidence of pulmonary embolism.2.Centrilobular emphysema with bilateral lower lobe segmental bronchi mucoid impaction, compatible with aspiration.3.Nondiagnostic scan for DVT due to poor opac...",pulmonary arteries diagnostic quality examination evidence pulmonary embolism dvt nondiagnostic scan dvt due poor opacification deep venous system lower extremities lungs pleura mild centrilobular...,_START_ no evidence of pulmonary embolism centrilobular emphysema with bilateral lower lobe segmental bronchi mucoid impaction compatible with aspiration nondiagnostic scan for dvt due to poor opa...
3,344548,"Enteric tube seen curled on itself with tip extending in a retrograde fashion into distal esophagus, repositioning recommended. Remainder of exam without significant change including nonobstructiv...","No change in appearance of enteric tube as above, repositioning again recommended. Remainder of exam unchanged.",enteric tube seen curled tip extending retrograde fashion distal esophagus repositioning recommended remainder exam without significant change including nonobstructive bowel gas pattern nodular fo...,_START_ no change in appearance of enteric tube as above repositioning again recommended remainder of exam unchanged _END_
4,656772,No intracranial hemorrhage is identified. No intracranial mass or evidence of mass-effect. No midline shift or herniation. Gray-white differentiation is maintained. There is moderate degree of glo...,1. No evidence of intracranial hemorrhage or mass effect. 2. Advanced chronic small vessel ischemic disease with chronic appearing bilateral basal ganglia infarcts. 3. Please note CT is insensitiv...,intracranial hemorrhage identified intracranial mass evidence mass effect midline shift herniation gray white differentiation maintained moderate degree global parenchymal volume loss appears prom...,_START_ no evidence of intracranial hemorrhage or mass effect advanced chronic small vessel ischemic disease with chronic appearing bilateral basal ganglia infarcts please note ct is insensitive f...
...,...,...,...,...,...
995,323196,"ABDOMEN:LUNG BASES: Moderate cardiomegaly. Small hiatal hernia seen.LIVER, BILIARY TRACT: No significant abnormality notedSPLEEN: No significant abnormality notedPANCREAS: No significant abnormali...","1. Small hiatal hernia.2. Moderate cardiomegaly.3. No evidence for obstruction, inflammation, or ileus.",abdomen lung bases moderate cardiomegaly small hiatal hernia seen liver biliary tract significant abnormality notedspleen significant abnormality notedpancreas significant abnormality notedadrenal...,_START_ small hiatal hernia moderate cardiomegaly no evidence for obstruction inflammation or ileus _END_
996,203283,This is a lucency within the right first molar which represents a cavity. There is a rounded opacity projecting over the right maxillary sinus which likely represents a mucous retention cyst.,Cavity of the right first maxillary molar. Other findings as above.,lucency within right first molar represents cavity rounded opacity projecting right maxillary sinus likely represents mucous retention cyst,_START_ cavity of the right first maxillary molar other findings as above _END_
997,750051,Right breast ultrasound reidentified the following three target lesions for biopsy. -The first lesion to be biopsied is an irregular hypoechoic mass with internal punctate echogenic foci located i...,(1) Successful ultrasound-guided core biopsy of the right breast 6:00 lesion and hydromark clip placement. Pathology is pending at this time.(2) Successful ultrasound-guided core biopsy of the rig...,right breast ultrasound reidentified following three target lesions biopsy first lesion biopsied irregular hypoechoic mass internal punctate echogenic foci located right breast location measuring ...,_START_ successful ultrasound guided core biopsy of the right breast lesion and hydromark clip placement pathology is pending at this time successful ultrasound guided core biopsy of the right int...
998,192115,"Extensive airspace opacity mainly in the posterior right upper lobe, and superior segment of the right lower lobe, highly compatible with acute aspiration.Mild opacity in the superior segment of t...","Extensive aspiration pneumonia, predominantly in the right lung.",extensive airspace opacity mainly posterior right upper lobe superior segment right lower lobe highly compatible acute aspiration mild opacity superior segment left lower lobe unremarkable cardiac...,_START_ extensive aspiration pneumonia predominantly in the right lung _END_


In [267]:
model.summary()

Model: "model_8"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_14 (InputLayer)           [(None, None, 250)]  0                                            
__________________________________________________________________________________________________
input_15 (InputLayer)           [(None, None, 81)]   0                                            
__________________________________________________________________________________________________
lstm_8 (LSTM)                   [(None, 500), (None, 1502000     input_14[0][0]                   
__________________________________________________________________________________________________
lstm_9 (LSTM)                   [(None, None, 500),  1164000     input_15[0][0]                   
                                                                 lstm_8[0][1]               

In [265]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
