In [11]:
import warnings
warnings.filterwarnings("ignore")
import pathlib
import os
from src.midi_processing import get_song_vector, DOC2VEC_MODELS_PATHS
import numpy as np
from sklearn.manifold import TSNE
import joblib
from tqdm import tqdm
import multiprocessing
import matplotlib.pyplot as plt

models = {name: joblib.load(os.path.join(DOC2VEC_MODELS_PATHS, f'{name}_model.jblib')) for name in ['drums', 'melody', 'harmony']}

In [2]:
midi_files = pathlib.Path('Data/midi_files/')
all_midi_paths = list(midi_files.glob('*'))

In [36]:
songs_vectors = []
song_names = []

for midi_file in tqdm(all_midi_paths, total=len(all_midi_paths)):
    try:
        songs_vectors.append(get_song_vector(str(midi_file), models))
        song_names.append(midi_file.name)
    except Exception as e:
        print(e)
        print(f"Invalid song: {midi_file.name}")
        continue        

  3%|██▌                                                                                                | 16/625 [00:04<02:33,  3.96it/s]

data byte must be in range 0..127
Invalid song: Aaron_Neville_-_Tell_It_Like_It_Is.mid


 20%|███████████████████▎                                                                              | 123/625 [00:43<02:14,  3.73it/s]

Could not decode key with 1 flats and mode 255
Invalid song: Beastie_Boys_-_Girls.mid


 27%|██████████████████████████                                                                        | 166/625 [00:58<02:09,  3.54it/s]

data byte must be in range 0..127
Invalid song: Billy_Joel_-_Movin'_Out.mid


 27%|██████████████████████████▊                                                                       | 171/625 [00:59<01:48,  4.19it/s]

data byte must be in range 0..127
Invalid song: Billy_Joel_-_Pressure.mid


 39%|██████████████████████████████████████                                                            | 243/625 [01:23<01:47,  3.56it/s]


Invalid song: Brian_McKnight_-_On_The_Down_Low.mid


 58%|████████████████████████████████████████████████████████▌                                         | 361/625 [02:02<01:10,  3.74it/s]

Could not decode key with 4 flats and mode 255
Invalid song: Dan_Fogelberg_-_Leader_of_the_Band.mid


 59%|█████████████████████████████████████████████████████████▌                                        | 367/625 [02:03<01:15,  3.44it/s]

Could not decode key with 16 sharps and mode 1
Invalid song: David_Bowie_-_Lazarus.mid


100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 625/625 [03:35<00:00,  3.79it/s]


In [39]:
songs_vectors = np.array(songs_vectors)

tsne = TSNE(n_components=2, verbose=3)
vectors_transformed = tsne.fit_transform(songs_vectors)

In [59]:
from bokeh.plotting import figure, output_file, save
from bokeh.models import ColumnDataSource, HoverTool

# output to static HTML file
output_file("melody_scatter.html")

data_source = ColumnDataSource(data=dict(x=vectors_transformed[:, 0], y=vectors_transformed[:, 1], song_name=song_names))
hover = HoverTool(tooltips=[
    ("song_name", "@song_name"),
])

p = figure(sizing_mode='stretch_both', title="Melody embedding TSNE visualization")
p.add_tools(hover)


# add a circle renderer with a size, color, and alpha
p.circle(source=data_source, x='x', y='y', size=10, color="navy", alpha=0.5)

# show the results
save(p)

'C:\\Users\\Eli\\Workspace\\Deep-Learning-Intro\\assignment3\\melody_scatter.html'