In [None]:
# make a data generator that yeild the features and labels
# we first need to cut the audio into frames
# then get the labels to fit those frames


In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
# cut audio into frames
import os

import numpy as np
import pandas as pd
import random


from pitch_tracker.utils.constants import (HOP_LENGTH, N_FFT, N_MELS,
                                           PICKING_FRAME_SIZE,
                                           PICKING_FRAME_STEP,
                                           SAMPLE_RATE,
                                           STEP_FRAME, STEP_TIME,
                                           N_CLASS, PRE_MIDI_START)
from pitch_tracker.utils.audio import load_audio_mono
from pitch_tracker.utils import files
from pitch_tracker.utils import dataset


In [None]:
audio_path = '../medleydb/medleydb/data/Audio/Phoenix_ScotchMorris/Phoenix_ScotchMorris_MIX.wav'
signal, sample_rate = load_audio_mono(
    audio_path, SAMPLE_RATE, keep_channel_dim=True)
mel_features = dataset.extract_melspectrogram_feature(
    signal, N_FFT, HOP_LENGTH, N_MELS, SAMPLE_RATE, backend='librosa')
print(mel_features.shape)

stft_feature = dataset.extract_stft_feature(signal, N_FFT, HOP_LENGTH)
print(stft_feature.shape)


In [None]:
csv_path = '../content/gen_label/2560/Melody2_midi/AClassicEducation_NightOwl.csv'
label_dir = '../content/gen_label/2560/Melody2_midi/'
label_dict = dataset.create_label_dict_from_dir(label_dir)
for k, v in label_dict.items():
    print(k, v.shape)


In [None]:
dataset_paths = dataset.create_dataset_path_dict(label_dir)
dataset_paths_df = pd.DataFrame(dataset_paths).transpose()
# dataset_paths_df.columns=['track_id', 'label_path', 'audio_path']
display(dataset_paths_df.head(5))
del dataset_paths_df


In [None]:
feature_label_gen = dataset.create_feature_label_generator(
    dataset_path_dict=dataset_paths,
    sample_rate=SAMPLE_RATE,
    n_fft=N_FFT,
    n_mels=N_MELS,
    n_class=N_CLASS,
    hop_length=HOP_LENGTH,
    picking_frame_step=PICKING_FRAME_STEP,
    picking_frame_size=PICKING_FRAME_SIZE,
    step_frame=STEP_FRAME,
    step_time=STEP_TIME,
    dist_threshold=0.1,
    empty_threshold=0.3,
)

output_dir = '../content/pickled_database/'
passed_songs = dataset.write_feature_label_to_disk_by_frame(
    feature_label_gen, output_dir, categorize_by_subdir = True)
failed_songs = [label for label in dataset_paths if label not in passed_songs]
display(failed_songs)
del failed_songs

In [None]:
pickled_paths = list(files.list_file_paths_in_dir(output_dir))
sample_pickled_path = random.choice(pickled_paths)
feature, label = files.load_pickle(sample_pickled_path)

label_onsets:np.ndarray = label['onset']
label_durations:np.ndarray = label['duration']
label_pitches:np.ndarray = label['pitch']

print(sample_pickled_path)
print(feature.shape)
print(label_onsets.shape)
print(label_durations.shape)
print(label_pitches.shape)


In [None]:


# Pitch label is one-hot encoded
# decode the pitch label:
# - apply flatnonzero to get the index of the label
# - add premidistart value to get the correct pitch
label_pitches = np.apply_along_axis(func1d=np.flatnonzero, axis=2, arr=label['pitch']).squeeze(-1)
label_pitches[label_pitches>0] += PRE_MIDI_START
print(label_pitches.shape)

In [None]:
# Note: currently, each frame overlapped its next frame by 50%
def merge_frame(batch, picking_frame_step=PICKING_FRAME_STEP):
    # last_frame = batch[-1].copy()
    merged = batch[:, :picking_frame_step]
    merged = np.append(merged, batch[-1:, picking_frame_step:], axis=0)
    return merged



In [None]:
label_onsets = merge_frame(label_onsets)
label_durations = merge_frame(label_durations)
label_pitches = merge_frame(label_pitches)

In [None]:
label_onsets = label_onsets.flatten().astype(int)
label_durations = label_durations.flatten().astype(int)
label_pitches = label_pitches.flatten()

In [None]:
print(f'{"id":<10}{"time":<10}{"onset":<10}{"duration":<10}{"pitch":<10}')
for i, (onset, duration, pitch) in enumerate(zip(label_onsets, label_durations, label_pitches)):
    f_time = STEP_TIME * i
    s = f'{i+1:<10}{f_time:<10,.3f}{onset:<10}{duration:<10}{pitch:<10}'
    print(s)

In [None]:
import pandas as pd
from pitch_tracker.utils import files

f_time = np.arange(start=0, stop=label_onsets.shape[0],dtype=np.float32) * STEP_TIME

csv_dir = '../content/gen_csv_label/'
csv_name = files.get_file_name(sample_pickled_path)
csv_path = os.path.join(csv_dir, csv_name + '.csv')

os.makedirs(csv_dir, exist_ok=True)

df = pd.DataFrame(
    np.array([f_time, label_onsets, label_durations, label_pitches], dtype=object),
    index=['time', 'onset', 'duration', 'pitch'],
).T
df.to_csv(csv_path, float_format='%.3f')

In [None]:
sample_path = '..\content\pickled_database\LizNelson_Rainfall\LizNelson_Rainfall_0.pkl'
feature, label = files.load_pickle(sample_path)
feature.shape, len(label)

In [None]:
feature_label_gen = dataset.create_feature_label_generator(
    dataset_path_dict=dataset_paths,
    sample_rate=SAMPLE_RATE,
    n_fft=N_FFT,
    n_mels=N_MELS,
    n_class=N_CLASS,
    hop_length=HOP_LENGTH,
    picking_frame_step=PICKING_FRAME_STEP,
    picking_frame_size=PICKING_FRAME_SIZE,
    step_frame=STEP_FRAME,
    step_time=STEP_TIME,
    dist_threshold=0.1,
    empty_threshold=0.3,
)

for label_name, feature_label_pairs in feature_label_gen:
    n_durs_pos = 0
    n_pitches = 0
    for i, (feature, (onsets, durations, pitches)) in enumerate(feature_label_pairs):
        n_durs_pos += np.count_nonzero(durations)
        n_pitches += np.count_nonzero(pitches[:,1:])
    print(f'{label_name}: {n_durs_pos}/{210*(i+1)} \t {n_pitches}/{210*(i+1)}')