In [None]:
# RUN THE FIRST TIME YOU RUN THIS NOTEBOOK
!pip install plotly==4.5.0
!pip install scikit-learn==0.22.1
!pip install git+https://github.ibm.com/Kalyan-Dutia/encoders

# Visualise Embeddings
A tool to let you visualise sentence embeddings of all utterances in your training set. 

These will give you **visual clues as to how an intent classifier understands and separates utterances**.

In [None]:
# ADD FILENAME HERE (relative to data folder)
# the file should be a CSV with columns utterance, intent; no header row
filename = "workspace_training/puppy_questions.csv"

In [None]:
# external
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import plotly.express as px
from encoders import encoders

import sys
import os

sys.path.append('..')

# internal
import config

pd.set_option('max_colwidth', -1)

In [None]:
df = pd.read_csv(os.path.join(config.data_dir, filename), header=None, names=['utterance', 'intent'])

df.head()

In [None]:
# get embeddings
embeddings = encoders(df['utterance'].tolist())
df['embedding'] = [item for item in embeddings]

# project to 2D using T-SNE
# notes on using T-SNE effectively: https://distill.pub/2016/misread-tsne/
embeddings_2d = TSNE(n_components=2).fit_transform(df['embedding'].tolist())
df['X0'] = embeddings_2d[:,0]
df['X1'] = embeddings_2d[:,1]

In [None]:
def plot_embeddings_pex(df):
    fig = px.scatter(df, x='X0', y='X1', color='intent', hover_data=['utterance'],
                    color_discrete_sequence=px.colors.qualitative.Dark24,
                    )
    fig.update_xaxes(showticklabels=False, zeroline=False)
    fig.update_yaxes(showticklabels=False, zeroline=False)
    fig.show()
    
plot_embeddings_pex(df)