In [8]:
from gensim.models import FastText
from sklearn.manifold import TSNE
from note_reader import get_notes_as_pitch_classes
from wv_utils import angle_between
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob

In [2]:
plt.rcParams["font.family"] = ["Times"]
UNK_TAG = "<UNK>"
file_list = glob.glob("data/txt/*.txt")

In [3]:
docs, counts = get_notes_as_pitch_classes(file_list, makam="nihavent")

In [6]:
counts.most_common()

[('D', 10493),
 ('G', 8833),
 ('A#4', 7567),
 ('A', 7324),
 ('C', 6747),
 ('D#4', 5662),
 ('F', 3273),
 ('F#4', 2651),
 ('C#4', 442),
 ('B', 306),
 ('D#8', 304),
 ('E', 241),
 ('A#8', 132),
 ('D#5', 96),
 ('G#4', 71),
 ('F#5', 67),
 ('C#5', 32),
 ('G#5', 16),
 ('A#5', 11)]

In [9]:
ft_model = FastText(sentences=docs, vector_size=8, epochs=50, window=5)
wv = ft_model.wv
wv.most_similar(positive=["C", "D"], negative=["G"])  # D#4

[('D#4', 0.4183763265609741),
 ('B', 0.35199740529060364),
 ('A#4', 0.24067234992980957),
 ('C#5', 0.21032927930355072),
 ('F', 0.11454134434461594),
 ('C#4', 0.043371450155973434),
 ('E', -0.015814026817679405),
 ('G#4', -0.04347018897533417),
 ('A#5', -0.04624444618821144),
 ('F#5', -0.07450830191373825)]

In [10]:
angles_dict = {}

for current in wv.key_to_index:
    # if current == UNK_TAG:
    #     continue

    angles_dict[current] = {}
    for pitch in wv.key_to_index:
        # if pitch == UNK_TAG:
        #     continue

        angle = round(angle_between(wv[current], wv[pitch]), 2)
        angles_dict[current][pitch] = angle

df_nihavent = pd.DataFrame.from_dict(angles_dict)
df_nihavent.to_excel("results/angles_nihavent.xlsx")