## Vamos a utilizar un método muy popular para visualizar representaciones vectoriales T-SNE

In [8]:
import numpy as np
from sklearn.manifold import TSNE
import plotly.offline as plt
import plotly.graph_objs as go
import pickle
from gensim.models.keyedvectors import KeyedVectors
plt.init_notebook_mode(connected=True)

# load embeddings
embeddings = KeyedVectors.load_word2vec_format('PubMed-shuffle-win-2.bin', binary=True)


In [7]:
def get_coordinates(model, words):

    arr = np.empty((0,200), dtype='f')
    for w in words:
        wrd_vector = model[w]
        arr = np.append(arr, np.array([wrd_vector]), axis=0)

    tsne = TSNE(n_components=3, random_state=0, early_exaggeration = 1)
    np.set_printoptions(suppress=True)
    Y = tsne.fit_transform(arr)
    x_coords = Y[:, 0]
    y_coords = Y[:, 1]
    z_coords = Y[:, 2]
    return x_coords, y_coords, z_coords


# tomamos drogas y diagnósticos
medset = ['pantoprazole', 'atorvastatin', 'albuterol', 'melatonin']
diagset = ['inflammation', 'hypertrophy', 'dysfunction', 'paresthesia']

x, y, z = get_coordinates(embeddings, medset + diagset)
medl = len(medset)

plot1 = go.Scatter3d(x=x[:medl],
                     y=y[:medl],
                     z=z[:medl],
                     mode='markers+text',
                     text=medset,
                     textposition='bottom center',
                     hoverinfo='text',
                     marker=dict(
                         color='rgb(12, 12, 12)',
                         size=12,
                         symbol='circle',
                         line=dict(color='rgba(217, 217, 217, 0.14)',
                                   width=1),
                         opacity=0.9
        ))

plot2 = go.Scatter3d(x=x[medl:],
                     y=y[medl:],
                     z=z[medl:],
                     mode='markers+text',
                     text=diagset,
                     textposition='bottom center',
                     hoverinfo='text',
                     marker=dict(
                         color='rgb(180, 180, 180)',
                         size=12,
                         symbol='circle',
                         line=dict(color='rgba(217, 217, 217, 0.14)',
                                   width=1),
                         opacity=0.9
        ))


layout = go.Layout(title='Meds & Diagnoses')
fig = go.Figure(data=[plot1, plot2], layout=layout)
plt.iplot(fig)