In [127]:
import numpy as np
import pandas as pd
import torch
import sys
import pickle

from sklearn.manifold import TSNE
import plotly.graph_objects as go

from src.models.scale import Scale
import src.visualization.scale_color_filter as scf

# Loading Model and Vocabulary

In [128]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = torch.load(f"result_model/cbow_network.pt", map_location=device)

with open('./data/training_data.pkl', 'rb') as file:
        data = pickle.load(file, encoding="latin1")

vocab = data['chords_vocabulary']





# Getting Embeddings

In [129]:
# embedding from first model layer
embeddings = list(model.parameters())[0]
embeddings = embeddings.cpu().detach().numpy()

# normalization
norms = (embeddings ** 2).sum(axis=1) ** (1 / 2)
norms = np.reshape(norms, (len(norms), 1))
embeddings_norm = embeddings / norms

print(f'embeddings shape: {embeddings.shape}')
print(f'embeddings norms shape: {embeddings_norm.shape}')

embeddings shape: (298, 25)
embeddings norms shape: (298, 25)


# Visualization with t-SNE

In [169]:
# get embeddings
embeddings_df = pd.DataFrame(embeddings)

# t-SNE transform
tsne = TSNE(n_components=2)
embeddings_df_trans = tsne.fit_transform(embeddings_df)
embeddings_df_trans = pd.DataFrame(embeddings_df_trans)

# get token order
chord_suffixes = np.array(list(vocab.suffixes_to_indexes.keys()))
embeddings_df_trans.index = chord_suffixes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
A,0.598284,0.033928,0.828478,0.425911,0.051963,-0.560773,-0.009697,0.569291,-0.47307,-0.368943,...,0.213646,-0.529174,-0.2158,0.302438,0.244629,-0.17089,0.465255,-0.27665,0.826157,-0.396281
AA#,-0.469162,1.212833,-1.042399,1.189924,-0.970144,0.005587,0.385951,0.592552,0.138509,-1.677441,...,-0.714791,-0.610175,-0.019497,0.047153,0.450328,-0.675545,0.677474,1.756028,0.244865,-0.10268
AA#B,-0.306258,2.044793,-0.723958,-0.416823,-1.430263,-0.885552,-0.999008,-0.466763,-0.720835,0.355847,...,-0.560078,0.896476,-0.234412,0.152262,0.70393,-0.242927,-0.06065,0.464694,-0.209719,-0.149951
AA#C,0.978991,-0.04899,0.073358,-0.551217,0.085184,-0.284838,1.144518,0.295441,-0.778304,0.100828,...,0.52544,0.617921,-1.779949,0.117646,0.55213,-0.029832,-0.671777,0.678286,0.149106,-0.303883
AA#C#,0.309307,-0.984279,0.868472,0.138445,-0.372418,-0.600469,-0.61638,0.689277,-0.13856,1.016573,...,-1.041945,0.101959,0.591907,0.963561,1.437131,-1.9636,0.482133,0.364495,1.301444,-0.340515


In [170]:
colors = ['lightgrey'] * len(chord_suffixes)
scale = Scale('C', 'major')

colors = scf.get_scale_all_triads_filter(colors, chord_suffixes, scale, 'blue')
colors = scf.get_scale_chord_degress(colors, chord_suffixes, 0, 'major', 'green')
colors = scf.get_scale_main_triads_filter(colors, chord_suffixes, scale, 'red')

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=embeddings_df_trans[0],
        y=embeddings_df_trans[1],
        mode="text",
        text=embeddings_df_trans.index,
        textposition="middle center",
        textfont=dict(color=colors)
    )
)

# Similare chords

In [171]:
def get_top_similar(chord_suffix: str, topN: int = 10):
    chord_id = vocab.suffixes_to_indexes[chord_suffix]

    if chord_id == 0:
        print("Out of vocabulary chord")
        return

    chord_vec = embeddings_norm[chord_id]
    chord_vec = np.reshape(chord_vec, (len(chord_vec), 1))
    dists = np.matmul(embeddings_norm, chord_vec).flatten()
    topN_ids = np.argsort(-dists)[1 : topN + 1]

    topN_dict = {}
    for sim_chord_id in topN_ids:
        sim_chord = vocab.indexes_to_chords[sim_chord_id]
        sim_chord_suffix = ''.join(sorted(sim_chord.note_suffixes))
        topN_dict[sim_chord_suffix] = dists[sim_chord_id]

    return topN_dict

In [172]:
for chord, sim in get_top_similar("CEG").items():
    print("{}: {:.3f}".format(chord, sim))

ACE: 0.707
CDG: 0.631
BDG: 0.626
ACF: 0.566
ACG: 0.518
BEG: 0.513
CDE: 0.513
BCG: 0.509
CEF: 0.500
A#DF: 0.466
