# Data loading

In [1]:
import pickle
import numpy as np
import pandas as pd
import plotly.express as px

with open('./data/training_data.pkl', 'rb') as file:
        data = pickle.load(file, encoding="latin1")

vocabulary = data['chords_vocabulary']

# Get chord statistics

In [2]:
all_chords = []

for song in data['train']:
    all_chords += song

chord_counts_indices = dict()

# chord count from index
for chord in all_chords:
    if chord not in chord_counts_indices:
        chord_counts_indices[chord] = 0

    chord_counts_indices[chord] += 1

# chord count from name
chord_counts = dict()

for chord_index, count in chord_counts_indices.items():
    chord = vocabulary.indexes_to_chords[chord_index]
    chord_notes = ''.join(sorted(chord.note_suffixes))

    chord_counts[chord_notes] = count

chord_counts = dict(sorted(chord_counts.items(), key = lambda x: x[1], reverse = True))

# Visualize data

In [3]:
print(f'min value: {min(chord_counts.values())}')
print(f'mean value: {int(sum(chord_counts.values()) / len(chord_counts.values())) }')
print(f'max value: {max(chord_counts.values())}')

chord_counts_df = pd.DataFrame(chord_counts.items(), columns=['chord', 'count'])
chord_counts_df.head()
print(f'{chord_counts_df.shape[0]}/{len(vocabulary.indexes_to_chords)} total chords')


min value: 63
mean value: 2183
max value: 29554
298/298 total chords


In [4]:
fig = px.bar(chord_counts_df, x='chord', y='count', color='count')
fig.show()