In [24]:
import numpy as np
import pandas as pd
import torch
import sys
import pickle

from sklearn.manifold import TSNE
import plotly.graph_objects as go

# Loading Model and Vocabulary

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = torch.load(f"result_model/cbow_network.pt", map_location=device)

with open('./data/training_data.pkl', 'rb') as file:
        data = pickle.load(file, encoding="latin1")

vocab = data['chords_vocabulary']





# Getting Embeddings

In [26]:
# embedding from first model layer
embeddings = list(model.parameters())[0]
embeddings = embeddings.cpu().detach().numpy()

# normalization
norms = (embeddings ** 2).sum(axis=1) ** (1 / 2)
norms = np.reshape(norms, (len(norms), 1))
embeddings_norm = embeddings / norms
embeddings_norm.shape

(298, 25)

# Visualization with t-SNE

In [27]:
# get embeddings
embeddings_df = pd.DataFrame(embeddings)

# t-SNE transform
tsne = TSNE(n_components=2)
embeddings_df_trans = tsne.fit_transform(embeddings_df)
embeddings_df_trans = pd.DataFrame(embeddings_df_trans)

# get token order
chord_suffixes = map(lambda c : ''.join(sorted(c.note_suffixes)), vocab.indexes_to_chords.values())
embeddings_df_trans.index = chord_suffixes

embeddings_df_trans.head()

Unnamed: 0,0,1
A,-0.015762,-14.448978
AA#,-12.021301,-2.630247
AA#B,-8.510359,-0.943555
AA#C,5.172655,-4.65792
AA#C#,-9.296827,13.21204


In [28]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=embeddings_df_trans[0],
        y=embeddings_df_trans[1],
        mode="text",
        text=embeddings_df_trans.index,
        textposition="middle center"
    )
)