In [1]:
from extraer_notas import jams_a_vec,scale_modes   
import pickle
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd


In [2]:
# Cargar nombres de archivos
files = [file for file in os.listdir('./data/annotation') if file.endswith('.jams')]
len(files)

360

In [4]:
# convertir a one hot
pitch_vecs, dur_vecs, offset_vecs, chord_vecs,possible_scales = jams_a_vec('./data/annotation/'+files[0])
len(pitch_vecs[0]), len(dur_vecs[0]), len(offset_vecs[0]), len(chord_vecs[0]),len(possible_scales[0])

(128, 14, 53, 12, 5)

In [6]:


def calculate_entropies(file_name,show_plot=False,save_plot=None,generated=False):
    if generated:
        gen_np = np.load('./data/generated/'+file_name)
        pitch_vecs = gen_np[:,0:128]
        dur_vecs = gen_np[:,128:142]
        offset_vecs = gen_np[:,142:195]

    # convertir a one hot
    else:
        pitch_vecs, dur_vecs, offset_vecs, _,_ = jams_a_vec('./data/annotation/'+file_name)
    #count notes 
    # print(len(pitch_vecs[0]), len(dur_vecs[0]), len(offset_vecs[0]))
    count_pitch  = np.array(pitch_vecs).sum(axis=0)
    
    #calculate entropy pitch
    count_pitch_normalized = count_pitch/sum(count_pitch)
    entropy_pitch = -sum(count_pitch_normalized*np.log2(count_pitch_normalized,where=count_pitch_normalized!=0,out=np.zeros_like(count_pitch_normalized)))
    #lo mismo para la duración
    count_dur  = np.array(dur_vecs).sum(axis=0)
    count_dur_normalized = count_dur/sum(count_dur)
    entropy_dur = -sum(count_dur_normalized*np.log2(count_dur_normalized,where=count_dur_normalized!=0,out=np.zeros_like(count_dur_normalized)))
    # lo mismo para el offset
    count_offset  = np.array(offset_vecs).sum(axis=0)
    count_offset_normalized = count_offset/sum(count_offset)
    entropy_offset = -sum(count_offset_normalized*np.log2(count_offset_normalized,where=count_offset_normalized!=0,out=np.zeros_like(count_offset_normalized)))
    if show_plot or save_plot:
        fig, axs = plt.subplots(3, 1, figsize=(8, 10))
        axs[0].bar(list(range(1,len(count_pitch)+1)), count_pitch)
        axs[0].set_xlabel('Note')
        axs[1].bar(list(range(len(count_dur))), count_dur)
        axs[1].set_xlabel('Duration')
        axs[2].bar(list(range(len(count_offset))), count_offset)
        axs[2].set_xlabel('Offset')
        fig.text(0.06, 0.5, 'Counts', ha='center', va='center', rotation='vertical')
        if save_plot:
            plt.savefig(save_plot)
        if show_plot:
            plt.show()
        else:
            plt.close()
    return entropy_pitch,entropy_dur,entropy_offset

In [7]:

data = [ ]
for file in files:
    pitch,duration,offset = calculate_entropies(file,show_plot=False,save_plot='./data/metrics/plots/'+file[:-5]+'.png')
    data.append({'file': file, 'entropy_pitch': pitch, 'entropy_duration': duration, 'entropy_offset': offset})
    
entropies = pd.DataFrame(data)

entropies.to_csv('./data/metrics/entropies.csv', index=False)

print(entropies[['entropy_pitch', 'entropy_duration', 'entropy_offset']].mean())

entropy_pitch       3.556122
entropy_duration    2.799534
entropy_offset      2.895642
dtype: float64


In [8]:
entropies = pd.read_csv('./data/metrics/entropies.csv')
#calculate std
print(entropies[['entropy_pitch', 'entropy_duration', 'entropy_offset']].std())

entropy_pitch       0.439438
entropy_duration    0.308698
entropy_offset      0.603527
dtype: float64


In [9]:
def midi_to_name(vect):
    octava_separada = [vect[i:i +12] for i in range(0, len(vect),12)]
    octava_separada[-1] = np.insert(octava_separada[-1],-1,[0,0,0,0])[:12]
    note_any_octave = np.array(np.ufunc.reduce(np.logical_or, octava_separada)).astype(int)
    return note_any_octave

#check harmonic coherence
def harmonic_coherence(pitch_vecs,possible_scales):
    matches = []
    for i in range(len(pitch_vecs)):
        note = midi_to_name(pitch_vecs[i])
        this_note_possible_scales = np.array(possible_scales[i])
        match = np.dot(this_note_possible_scales,note).sum()/len(this_note_possible_scales)
        matches.append(match)
    percentage = np.array(matches).mean()
    return percentage

In [10]:
harmonic_coherences = []
for file in files:
    pitch_vecs, dur_vecs, offset_vecs, chord_vecs,possible_scales = jams_a_vec('./data/annotation/'+file)
    harmonic_coherences.append(harmonic_coherence(pitch_vecs,possible_scales))
hc = pd.DataFrame({'file': files, 'harmonic_coherence': harmonic_coherences})
hc.to_csv('./data/metrics/harmonic_coherence.csv', index=False)
print(hc['harmonic_coherence'].mean())

0.864992465757785


In [11]:
hc = pd.read_csv('./data/metrics/harmonic_coherence.csv')
print(hc['harmonic_coherence'].std())

0.08184772531588434


In [12]:
#transform all jam files to vec
all_file_list = []
for file in files:
    pitch_vecs, dur_vecs, offset_vecs, chord_vecs,possible_scales = jams_a_vec('./data/annotation/'+file)
    all_file_list.append([pitch_vecs, dur_vecs, offset_vecs, chord_vecs,possible_scales])
#save all files
import pickle
with open('./data/metrics/all_files.pkl', 'wb') as f:
    pickle.dump(all_file_list, f)


In [13]:
all_file_list = pickle.load(open('./data/metrics/all_files.pkl', 'rb'))

In [14]:
def relative_distances(pitch_vecs):
    distances = []
    for i in range(0,len(pitch_vecs)-1):
        note_1 = pitch_vecs[i]
        note_2 = pitch_vecs[i+1]
        distance = abs(np.where(note_1==1)[0]  - np.where(note_2==1)[0])
        distances.append(distance[0])
    distances.append(0)
    return distances
# relative_distances_ = relative_distances(pitch_vecs)

In [15]:
from extraer_notas import longest_contiguous_common_subsequence
def common_subsequence(jams1,jams2):
    pitch_vecs1, dur_vecs1, _,_,_= jams1
    pitch_vecs2, dur_vecs2, _,_,_= jams2
    relative_distances1 = relative_distances(pitch_vecs1)
    relative_distances2 = relative_distances(pitch_vecs2)
    array1 = [[relative_distances1[i],dur_vecs1[i].astype(int).tolist()] for i in range(len(relative_distances1))]
    array2 = [[relative_distances2[i],dur_vecs2[i].astype(int).tolist()] for i in range(len(relative_distances2))]
    if longest_contiguous_common_subsequence(array1,array2):
        return len(longest_contiguous_common_subsequence(array1,array2))
    else:
        return 0
# common_subsequence(all_file_list[1],all_file_list[1])


In [16]:
matrix = np.zeros((len(all_file_list),len(all_file_list)))
for i in range(len(all_file_list)):
    matrix[i,i] = 0
    for j in range(i+1,len(all_file_list)):
        matrix[i,j] = common_subsequence(all_file_list[i],all_file_list[j])
        matrix[j,i] = matrix[i,j]
        # print(files[i],files[j],common_subsequence(all_file_list[i],all_file_list[j]))


In [17]:
np.save('./data/metrics/common_subsequence.npy', matrix)

In [18]:
common_subsequence_file = np.load('./data/metrics/common_subsequence.npy')


In [19]:
#average common subsequence per file
average_common_subsequence = common_subsequence_file.mean(axis=1)
acs = pd.DataFrame({'file': files, 'average_common_subsequence': average_common_subsequence,'std': common_subsequence_file.std(axis=1)})
acs.to_csv('./data/metrics/average_common_subsequence.csv', index=False)

In [20]:
acs.sample(5)

Unnamed: 0,file,average_common_subsequence,std
102,01_Rock2-85-F_comp.jams,2.038889,0.944951
52,00_SS2-107-Ab_comp.jams,1.788889,0.557663
285,04_Rock3-117-Bb_solo.jams,1.955556,0.580443
172,02_SS2-107-Ab_comp.jams,2.325,0.990195
243,04_BN1-147-Gb_solo.jams,1.780556,0.54687


In [21]:
acs['std'].mean()

0.6824011947579548

In [22]:
acs = pd.read_csv('./data/metrics/average_common_subsequence.csv')
print(acs['average_common_subsequence'].mean())
print(acs['average_common_subsequence'].std())


1.884151234567901
0.21893433874155213


In [23]:
# only_solos = [file for file in files if 'solo' in file]
# matrix = np.zeros((len(only_solos),len(only_solos)))
# for i in range(len(only_solos)):
#    for j in range(i+1,len(only_solos)):
#        matrix[i][j] = common_subsequence('./data/annotation/'+only_solos[i],'./data/annotation/'+only_solos[j])

# matrix.save('./data/metrics/common_subsequence.npy')
jams1 = jams_a_vec('./data/annotation/'+files[0])
jams2 = jams_a_vec('./data/annotation/'+files[1])

In [24]:
common_subsequence(jams1,jams2)


1

# generated

In [68]:
generated_files = [file for file in os.listdir('./data/generated') if file.endswith('.npy')]
len(generated_files)

100

In [69]:
gen_file = np.load('./data/generated/'+generated_files[0])

In [70]:
gen_file.shape

(120, 219)

In [71]:
pitch_vecs, dur_vecs, offset_vecs, chord_vecs,next_chord = gen_file[:,:128],gen_file[:,128:142],gen_file[:,142:195],gen_file[:,195:207],gen_file[:,207:]

In [72]:
pitch_vecs.shape, dur_vecs.shape, offset_vecs.shape, chord_vecs.shape,next_chord.shape

((120, 128), (120, 14), (120, 53), (120, 12), (120, 12))

In [73]:
data = []
for file in generated_files:
    pitch,duration,offset=calculate_entropies(file,show_plot=False,save_plot='./data/metrics/generated/plots/'+file[:-4]+'.png',generated=True)
    data.append({'file': file, 'entropy_pitch': pitch, 'entropy_duration': duration, 'entropy_offset': offset})
data = pd.DataFrame(data)
data.to_csv('./data/metrics/generated/entropies.csv', index=False)
print(data[['entropy_pitch', 'entropy_duration', 'entropy_offset']].mean())
print(data[['entropy_pitch', 'entropy_duration', 'entropy_offset']].std())


entropy_pitch       3.278294
entropy_duration    2.867671
entropy_offset      3.129005
dtype: float64
entropy_pitch       0.436090
entropy_duration    0.242717
entropy_offset      0.529673
dtype: float64


In [74]:
def possible_scales_from_vec(vec):
    chord = np.where(vec == 1)[0]
    type_of_chord = chord-chord[0]
    possible_scales = []
    for key in scale_modes.keys():
        if set(type_of_chord) <= set(scale_modes[key]):
            possible_scales.append(key)
    possible_scale_vecs = []
    for scale in possible_scales:
        scale_vec = np.zeros(12)
        scale_idx = scale_modes[scale]
        for idx in scale_idx:
            scale_vec[(chord[0] + idx) % 12] = 1
        possible_scale_vecs.append(scale_vec)
    return possible_scale_vecs

In [75]:
# coherencia 
harmonic_coherences = []
for file in generated_files:
    gen_np = np.load('./data/generated/'+file)
    pitch_vecs, dur_vecs, offset_vecs, chord_vecs,next_chord_vec = gen_np[:,:128],gen_np[:,128:142],gen_np[:,142:195],gen_np[:,195:207],gen_np[:,207:]
    possible_scales =[possible_scales_from_vec(vec) for vec in chord_vecs]
    harmonic_coherences.append(harmonic_coherence(pitch_vecs,possible_scales))
    hc = pd.DataFrame({'file': file, 'harmonic_coherence': harmonic_coherences})
hc.to_csv('./data/metrics/harmonic_coherence.csv', index=False)
print(hc['harmonic_coherence'].mean())
print(hc['harmonic_coherence'].std())

0.8615361111111111
0.053794238313759735


In [76]:
# common subsecuence
matrix = np.zeros((len(generated_files),len(all_file_list)))
for i in range(len(generated_files)):
    file_1 = np.load('./data/generated/'+generated_files[i])
    for j in range(len(all_file_list)):
        file_2 = all_file_list[j]
        matrix[i,j] = common_subsequence([file_1[:,:128],file_1[:,128:142],file_1[:,142:195],file_1[:,195:207],file_1[:,207:]],file_2)
np.save('./data/metrics/generated/common_subsequence.npy', matrix)
print(matrix.mean(axis=1))
print(matrix.std(axis=1))

[1.97777778 1.95       2.01666667 1.875      1.91388889 1.90833333
 2.03333333 1.98611111 2.10555556 2.01111111 2.13055556 2.06388889
 2.14722222 2.00277778 1.92777778 2.00555556 1.88333333 1.82777778
 2.06388889 1.975      1.94166667 1.94166667 1.95277778 2.10833333
 1.98055556 1.70277778 1.83333333 1.71944444 2.03055556 2.08888889
 1.86666667 1.94166667 2.00833333 1.96666667 1.99722222 1.95555556
 1.96111111 2.03055556 2.15833333 2.03333333 2.21666667 1.97777778
 1.97222222 1.92777778 2.06666667 1.83333333 2.00277778 2.03055556
 1.75555556 2.04444444 2.03055556 2.00833333 1.89444444 1.875
 1.825      2.05277778 1.95277778 1.95555556 1.71666667 1.72777778
 2.1        2.06388889 2.04722222 2.12222222 1.725      1.87222222
 2.08611111 1.92777778 2.06111111 1.93055556 1.94444444 1.88611111
 1.69166667 1.925      2.07222222 1.94444444 1.98888889 2.01944444
 1.93611111 2.04722222 1.93611111 2.10555556 2.         2.12777778
 1.88055556 2.06944444 2.10555556 1.96111111 2.00277778 1.98333333


In [77]:
print(matrix.mean(axis=1).mean())
print(matrix.mean(axis=1).std())

1.9695277777777775
0.10623571436279251


In [78]:
np.array(all_file_list[0][0]).sum(axis=0).shape

(128,)

# divergencia de kl


In [64]:
# calculate the kl divergence
def kl_divergence(P, Q):
    epsilon = 0.00001
    # Convert inputs to numpy arrays
    P = np.asarray(P, dtype=np.float64)
    Q = np.asarray(Q, dtype=np.float64)
    P = P+epsilon
    Q = Q+epsilon
    # Compute the KL divergence
    kl_div = np.sum(np.where(P != 0, P * np.log2(P / Q), 0))
    
    return kl_div
kl_divergences_pitch = np.zeros((len(generated_files),len(all_file_list)))
kl_divergences_dur = np.zeros((len(generated_files),len(all_file_list)))
kl_divergences_offset = np.zeros((len(generated_files),len(all_file_list)))

for i in range(len(generated_files)):
    gen_np = np.load('./data/generated/'+generated_files[i])
    pitch_vecs, dur_vecs, offset_vecs, _,__dict__ = gen_np[:,:128],gen_np[:,128:142],gen_np[:,142:195],gen_np[:,195:207],gen_np[:,207:]
    pitch_vecs_counts_normalized = np.array(pitch_vecs).sum(axis=0)/sum(np.array(pitch_vecs).sum(axis=0))
    dur_vecs_counts_normalized = np.array(dur_vecs).sum(axis=0)/sum(np.array(dur_vecs).sum(axis=0))
    offset_vecs_counts_normalized = np.array(offset_vecs).sum(axis=0)/sum(np.array(offset_vecs).sum(axis=0))
    for j in range(len(all_file_list)):
        pitch_vecs_org, dur_vecs_org, offset_vecs_org, _,_ = all_file_list[j]
        pitch_vecs_org_counts_normalized = np.array(pitch_vecs_org).sum(axis=0)/sum(np.array(pitch_vecs_org).sum(axis=0))
        dur_vecs_org_counts_normalized = np.array(dur_vecs_org).sum(axis=0)/sum(np.array(dur_vecs_org).sum(axis=0))
        offset_vecs_org_counts_normalized = np.array(offset_vecs_org).sum(axis=0)/sum(np.array(offset_vecs_org).sum(axis=0))
        kl_divergences_pitch[i,j] = kl_divergence(pitch_vecs_org_counts_normalized,pitch_vecs_counts_normalized)
        kl_divergences_dur[i,j] = kl_divergence(dur_vecs_org_counts_normalized,dur_vecs_counts_normalized)
        kl_divergences_offset[i,j] = kl_divergence(offset_vecs_org_counts_normalized,offset_vecs_counts_normalized)
        
# print(kl_divergences_pitch.mean(axis=1))
# print(kl_divergences_pitch.std(axis=1))

# print(kl_divergences_dur.mean(axis=1))
# print(kl_divergences_dur.std(axis=1))

# print(kl_divergences_offset.mean(axis=1))
# print(kl_divergences_offset.std(axis=1))

print(kl_divergences_pitch.mean(axis=1).mean())
print(kl_divergences_dur.mean(axis=1).mean())
print(kl_divergences_offset.mean(axis=1).mean())
print(kl_divergences_pitch.mean(axis=1).std())
print(kl_divergences_dur.mean(axis=1).std())
print(kl_divergences_offset.mean(axis=1).std())


6.240699024927538
1.3822571816779348
3.3342948217464157
1.3702818670695018
0.7182674569193387
1.3433411903153627


In [66]:
np.save('./data/metrics/generated/kl_divergences_pitch.npy', kl_divergences_pitch)
np.save('./data/metrics/generated/kl_divergences_dur.npy', kl_divergences_dur)
np.save('./data/metrics/generated/kl_divergences_offset.npy', kl_divergences_offset)