# MIMIC W2V Samples
We have successfully trained w2v using MIMIC-III text, now we will look at some examples of the w2v model.

In [1]:
import time
import sqlite3
import pandas as pd
from gensim.models import Word2Vec

In [3]:
# load dataset
# load data
print('\n===== Load Data =====')
time0 = time.time()
conn = sqlite3.connect('../database/mimic.db')
sql = 'SELECT a.text_cleaned as text ' \
      'FROM notes_cleaned a ' \
      'INNER JOIN (SELECT DISTINCT hadm_id '\
                  'FROM diagnoses_icd '\
                  'WHERE icd9_code in (\'4019\', \'42731\', \'4280\', \'51881\', \'5849\')) b ON '\
      'a.hadm_id = b.hadm_id' \
      ';'

df = pd.read_sql_query(sql, conn)
time_elapsed = time.time() - time0
task = 'Loading data'
print('{} complete.    Total elapsed time: {}'.format(task, time_elapsed))
#print(f'df shape: {df.shape}')


===== Load Data =====
Loading data complete.    Total elapsed time: 280.5883173942566


In [2]:
# load trained model
model = Word2Vec.load('../model/w2v_top5_diag.model')
print(model.wv.vectors.shape)

(321973, 50)


In [57]:
model.most_similar('surgery')[:10]

  """Entry point for launching an IPython kernel.


[('sugery', 0.8311212062835693),
 ('surgey', 0.7585265636444092),
 ('surgery-', 0.7437612414360046),
 ('Surgery', 0.7404195070266724),
 ('surger', 0.6999047994613647),
 ('catheterization', 0.6837339401245117),
 ('catherization', 0.6793178915977478),
 ('revascularization', 0.677638828754425),
 ('surgeon', 0.6676056981086731),
 ('debridment', 0.6624088883399963)]

In [56]:
model.most_similar(positive=['surgery', 'procedure'], negative=[])

  """Entry point for launching an IPython kernel.


[('operation', 0.843580961227417),
 ('proceedure', 0.7427452802658081),
 ('revascularization', 0.737432062625885),
 ('sugery', 0.7370858192443848),
 ('procedures', 0.7348600625991821),
 ('surgey', 0.7267059087753296),
 ('catheterization', 0.7070623636245728),
 ('embolization', 0.6908447742462158),
 ('scope', 0.6849787831306458),
 ('catherization', 0.6755216121673584)]

In [6]:
print(df['text'][0])

Admission Date:                Discharge Date:   

Date of Birth:               Sex:   F

Service: SURGERY

Allergies:
Patient recorded as having No Known Allergies to Drugs

Attending:
Chief Complaint:
headache and neck stiffness

Major Surgical or Invasive Procedure:
central line placed, arterial line placed

History of Present Illness:
54 year old female with recent diagnosis of ulcerative colitis
on 6-mercaptopurine, prednisone 40-60 mg daily, who presents
with a new onset of headache and neck stiffness. The patient is
in distress, rigoring and has aphasia and only limited history
is obtained. She reports that she was awaken 1AM the morning of
 with a headache which she describes as bandlike. She
states that headaches are unusual for her. She denies photo- or
phonophobia. She did have neck stiffness. On arrival to the ED
at 5:33PM, she was afebrile with a temp of 96.5, however she
later spiked with temp to 104.4 (rectal), HR 91, BP 112/54, RR
24, O2 sat 100 %. Head CT was done and 

In [12]:
model.most_similar('Fluoxetine')[:10]

  """Entry point for launching an IPython kernel.


[('Citalopram', 0.9135635495185852),
 ('Duloxetine', 0.9038496017456055),
 ('Omeprazole', 0.8965630531311035),
 ('Pravastatin', 0.8799841403961182),
 ('Quinapril', 0.8669581413269043),
 ('Simvastatin', 0.8654094934463501),
 ('Stavudine', 0.8622251749038696),
 ('Nexium', 0.8546934127807617),
 ('Paroxetine', 0.8492091298103333),
 ('Lovastatin', 0.8454515337944031)]