In [3]:
import numpy as np
import medleydb as mdb
from medleydb import download
import librosa
import os
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
def get_hcqt_params():
    bins_per_octave=120
    n_octaves = 5
    harmonics = [1, 2, 3, 4, 5, 6]
    sr = 22050
    fmin = 32.7
    hop_length = 128
    return bins_per_octave, n_octaves, harmonics, sr, fmin, hop_length

In [5]:
def compute_hcqt(audio_fpath):
    bins_per_octave, n_octaves, harmonics, sr, f_min, hop_length = get_hcqt_params()
    y, fs = librosa.load(audio_fpath, sr=sr)

    cqt_list = []
    shapes = []
    for h in harmonics:
        cqt = librosa.cqt(
            y, sr=fs, hop_length=hop_length, fmin=f_min*float(h),
            n_bins=bins_per_octave*n_octaves,
            bins_per_octave=bins_per_octave
        )
        cqt_list.append(cqt)
        shapes.append(cqt.shape)
    
    shapes_equal = [s == shapes[0] for s in shapes]
    if not all(shapes_equal):
        min_time = np.min([s[1] for s in shapes])
        new_cqt_list = []
        for i, cqt in enumerate(cqt_list):
            new_cqt_list.append(cqt[:, :min_time])
            cqt_list.pop(i)
        cqt_list = new_cqt_list

    log_hcqt = 20.0*np.log10(np.abs(np.array(cqt_list)) + 0.0001)
    log_hcqt = log_hcqt - np.min(log_hcqt)
    log_hcqt = log_hcqt / np.max(log_hcqt)
    return log_hcqt

In [6]:
def get_freq_grid():
    bins_per_octave, n_octaves, harmonics, sr, f_min, hop_length = get_hcqt_params()
    freq_grid = librosa.cqt_frequencies(
        bins_per_octave*n_octaves, f_min, bins_per_octave=bins_per_octave
    )
    return freq_grid

def get_time_grid(n_time_frames):
    bins_per_octave, n_octaves, harmonics, sr, f_min, hop_length = get_hcqt_params()
    time_grid = librosa.core.frames_to_time(
        range(n_time_frames), sr=sr, hop_length=hop_length)
    return time_grid

In [7]:
def grid_to_bins(grid, start_bin_val, end_bin_val):
    bin_centers = (grid[1:] + grid[:-1])/2.0
    bins = np.concatenate([[start_bin_val], bin_centers, [end_bin_val]])
    return bins

In [8]:
def create_annotation_target(freq_grid, time_grid, annotation_times, annotation_freqs):

    time_bins = grid_to_bins(time_grid, 0.0, time_grid[-1])
    freq_bins = grid_to_bins(freq_grid, 0.0, freq_grid[-1])

    annot_time_idx = np.digitize(annotation_times, time_bins) - 1
    annot_freq_idx = np.digitize(annotation_freqs, freq_bins) - 1
    
    #my thing
    for i in range(0,len(annot_time_idx)):
        if annot_time_idx[i] >= len(time_grid):
            annot_time_idx[i] = len(time_grid) -1
    for i in range(0,len(annot_freq_idx)):
        if annot_freq_idx[i] >= len(freq_grid):
            annot_freq_idx[i] = len(freq_grid) -1
            

    annotation_target = np.zeros((len(freq_grid), len(time_grid)))
    #print(len(time_grid), " ", len(freq_grid))
    #print(annot_time_idx,annot_freq_idx)
    annotation_target[annot_freq_idx, annot_time_idx] = 1

    return annotation_target

In [9]:
def get_all_pitch_annotations(mtrack):
    annot_times = []
    annot_freqs = []
    for stem in mtrack.stems.values():
        data = stem.pitch_annotation
        data2 = stem.pitch_estimate_pyin
        if data is not None:
            annot = data
        elif data2 is not None:
            annot = data2
        else:
            continue

        annot = np.array(annot).T
        annot_times.append(annot[0])
        annot_freqs.append(annot[1])

    annot_times = np.concatenate(annot_times)
    annot_freqs = np.concatenate(annot_freqs)

    return annot_times, annot_freqs

In [10]:
def plot_annot_target(annot_target, hcqt, annot_times, annot_freqs):
    plt.figure(figsize=(15,30))
    
    plt.subplot(3, 1, 1)
    plt.imshow(hcqt, origin='lower')
    plt.axis('auto')
    plt.axis('tight')
    
    plt.subplot(3, 1, 2)
    plt.imshow(annot_target, origin='lower')
    plt.axis('auto')
    plt.axis('tight')

    plt.subplot(3, 1, 3)
    plt.plot(annot_times, annot_freqs, ',')
    plt.axis('tight')

    plt.show()

In [11]:
def get_input_output_pairs(mtrack):
    hcqt = compute_hcqt(mtrack.mix_path)

    freq_grid = get_freq_grid()
    time_grid = get_time_grid(len(hcqt[0][0]))

    annot_times, annot_freqs = get_all_pitch_annotations(mtrack)

    annot_target = create_annotation_target(
        freq_grid, time_grid, annot_times, annot_freqs
    )
    plot_annot_target(annot_target, hcqt[0], annot_times, annot_freqs)
    return hcqt, annot_target

In [12]:
def get_input_output_pairs_solo_pitch(audio_path, annot_times, annot_freqs, plot=False):
    hcqt = compute_hcqt(audio_path)

    freq_grid = get_freq_grid()
    time_grid = get_time_grid(len(hcqt[0][0]))
    annot_target = create_annotation_target(
        freq_grid, time_grid, annot_times, annot_freqs
    )
    if plot:
        plot_annot_target(annot_target, hcqt[0], annot_times, annot_freqs)

    return hcqt, annot_target, freq_grid, time_grid

In [23]:
#mtracks = mdb.load_all_multitracks(dataset_version=['V1'])
"""for x in mtracks:
    print(x.track_id)"""

audio_path = "/mnt/Stuff/Acads/UGP/data/adc2004_full_set/daisy1.wav"
annot_times = []
annot_freqs = []

f = open("/mnt/Stuff/Acads/UGP/data/adc2004_full_set/daisy1REF.txt")
for line in f:
    line = line.split()
    annot_times.append(float(line[0]))
    annot_freqs.append(float(line[1]))
    #print(float(line[0])+" "+float(line[1]))

get_input_output_pairs_solo_pitch(audio_path, annot_times, annot_freqs)

(array([[[0.48329002, 0.48314534, 0.48320726, ..., 0.38850927,
          0.38852674, 0.38811538],
         [0.4642061 , 0.46419373, 0.46419162, ..., 0.51539157,
          0.51539096, 0.51539526],
         [0.50871164, 0.50878623, 0.5087506 , ..., 0.58393584,
          0.58393606, 0.58390607],
         ...,
         [0.4331477 , 0.43374381, 0.43534111, ..., 0.48008947,
          0.48354996, 0.48448608],
         [0.4672642 , 0.46532477, 0.45958645, ..., 0.47840018,
          0.48024777, 0.48067241],
         [0.53252684, 0.53185026, 0.52994783, ..., 0.45842706,
          0.46239014, 0.46344327]],
 
        [[0.48115214, 0.48135063, 0.48234765, ..., 0.56905525,
          0.56902212, 0.56887331],
         [0.27455808, 0.27480473, 0.27557202, ..., 0.44499416,
          0.44471649, 0.44465459],
         [0.32605897, 0.32532481, 0.32159192, ..., 0.32727775,
          0.32477605, 0.32269241],
         ...,
         [0.34111956, 0.35414147, 0.3775698 , ..., 0.44630405,
          0.46451535, 0.

In [17]:
save_dir = "../output2/training_data/"

failed_tracks = [
    'ChrisJacoby_BoothShotLincoln',
    'HezekiahJones_BorrowedHeart',
    'Handel_TornamiAVagheggiar',
    'JoelHelander_Definition',
    'JoelHelander_ExcessiveResistancetoChange',
    'JoelHelander_IntheAtticBedroom'
]

tracks = ["daisy","pop","midi","jazz","opera_fem","opera_male"]
#tracks = ["pop"]



#mtracks = mdb.load_all_multitracks(dataset_version=['V1'])
audio_dir = "/mnt/Stuff/Acads/UGP/data/adc2004_full_set/"
for track in tracks:
    
    i=1
    while i<=5:
        audio_path = audio_dir + track + str(i) + ".wav"
        
        annot_freqs=[]
        annot_times=[]
        
        
        annot_path = audio_dir + track + str(i) + "REF.txt"
        flag = True
        try:
            f = open(annot_path)
        except:
            flag=False
        
        if  flag:
            save_path = os.path.join(save_dir,"{}.npz".format(track+str(i)))
           
            for line in f:
                line=line.split()
                annot_times.append(float(line[0]))
                annot_freqs.append(float(line[1]))
           
            data_in, data_out, freq, time = get_input_output_pairs_solo_pitch(audio_path, annot_times, annot_freqs)
            print(data_in.shape)
            print(data_out.shape)
            #np.savez(save_path, data_in=data_in, data_out=data_out, freq=freq, time=time)
            print(track + str(i))
        i+=1
            


(6, 600, 3633)
(600, 3633)
daisy1
(6, 600, 3790)
(600, 3790)
daisy2
(6, 600, 2934)
(600, 2934)
daisy3
(6, 600, 2549)
(600, 2549)
daisy4
(6, 600, 3912)
(600, 3912)
pop1
(6, 600, 3350)
(600, 3350)
pop2
(6, 600, 3448)
(600, 3448)
pop3
(6, 600, 3731)
(600, 3731)
pop4
(6, 600, 3314)
(600, 3314)
midi1
(6, 600, 2863)
(600, 2863)
midi2
(6, 600, 4376)
(600, 4376)
midi3
(6, 600, 3311)
(600, 3311)
midi4
(6, 600, 2880)
(600, 2880)
jazz1
(6, 600, 2663)
(600, 2663)
jazz2
(6, 600, 2556)
(600, 2556)
jazz3
(6, 600, 1733)
(600, 1733)
jazz4
(6, 600, 2776)
(600, 2776)
opera_fem2
(6, 600, 2397)
(600, 2397)
opera_fem4
(6, 600, 3446)
(600, 3446)
opera_male3
(6, 600, 3821)
(600, 3821)
opera_male5


In [None]:
 '''if mtrack.track_id in failed_tracks:
        continue
    
    print(mtrack.track_id)
    
    stem = mtrack.predominant_stem
    if stem is None:
        continue

    data = stem.pitch_annotation
    save_path = os.path.join(
        save_dir,
        "{}_STEM_{}.npz".format(mtrack.track_id, stem.stem_idx)
    )

    if data is not None:
        print("    > Stem {} {}".format(stem.stem_idx, stem.instrument))
        annot = np.array(data).T
    else:
        continue

    if os.path.exists(save_path):
        print("Path exists")
        one_stem_done = True
        continue

    if not os.path.exists(stem.audio_path):
        pass
        #print("        >downloading stem...")
        #download.download_stem(mtrack, stem.stem_idx)
        #print("         done!")

    try:
        data_in, data_out, freq, time = get_input_output_pairs_solo_pitch(
            stem.audio_path, annot[0], annot[1]
        )

        np.savez(save_path, data_in=data_in, data_out=data_out, freq=freq, time=time)
    except:
        print("    > Something failed :(")'''
    

In [None]:
mtrack = mdb.MultiTrack("MusicDelta_Beatles")
data_input, data_target = get_input_output_pairs(mtrack)