In [5]:
# Since we are not pip installing this package, we will need to manually specify the module path for packages import
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import the class from data_preparation.py
from model_development.data_preparation import data_preparation

In [8]:
# Initiate the class
# A few parameter need to be specified here
# directory_path is the path to the root directory of the dataset. This class assume the use of GMD / E-GMD dataset
# dataset is the type of data we use,  either "gmd" or "egmd" is acceptable for now
# sample_ratio is the fraction of dataset want to be used in creating the training/test/eval dataset
# diff_threshold is a parameter to ilter out the midi/audio pair that has the  duration difference > specified  value (in seconds) 

data_container=data_preparation(directory_path=r'', dataset='egmd', sample_ratio=0.01, diff_threshold=1)
# For demo purpose, we will sample 1% of the dataset to create the training data. Since the dataset is too big and we can't upload it to Github,
# please download the dataset to your computer and update the directory_path parameter

Filtering out the midi/audio pair that has a duration difference > 1 second


  0%|          | 0/455 [00:00<?, ?it/s]

In [9]:
# after we initiate the class, we will get access to a few object immediately
# first, the midi_wav_map, where contains the file name of the track pair of your sample
data_container.midi_wav_map

Unnamed: 0,track_id,midi_filename,audio_filename,duration,wav_length,diff
0,7278,drummer1/session1/149_latin-brazilian-baiao_95...,drummer1/session1/149_latin-brazilian-baiao_95...,2.526327,2.526327,0.000000e+00
1,21474,drummer1/session3/9_rock_135_beat_4-4_26.midi,drummer1/session3/9_rock_135_beat_4-4_26.wav,197.546735,197.546735,2.842171e-14
2,44801,drummer7/session2/8_rock_95_beat_4-4_56.midi,drummer7/session2/8_rock_95_beat_4-4_56.wav,108.260113,108.260113,0.000000e+00
3,43370,drummer4/session1/5_latin-brazilian_184_beat_4...,drummer4/session1/5_latin-brazilian_184_beat_4...,38.543515,38.543515,7.105427e-15
4,20031,drummer1/session2/74_punk_144_fill_4-4_54.midi,drummer1/session2/74_punk_144_fill_4-4_54.wav,1.644263,1.644263,0.000000e+00
...,...,...,...,...,...,...
450,5919,drummer1/session1/113_funk_95_fill_4-4_41.midi,drummer1/session1/113_funk_95_fill_4-4_41.wav,5.052630,5.052630,0.000000e+00
451,4264,drummer7/session3/24_hiphop_67_beat_4-4_16.midi,drummer7/session3/24_hiphop_67_beat_4-4_16.wav,96.577596,96.577596,0.000000e+00
452,2918,drummer5/session2/4_reggae_141_beat_4-4_55.midi,drummer5/session2/4_reggae_141_beat_4-4_55.wav,206.830680,206.830680,0.000000e+00
453,15440,drummer1/session2/125_afrocuban-bembe_122_fill...,drummer1/session2/125_afrocuban-bembe_122_fill...,1.967234,1.967234,0.000000e+00


In [10]:
# Also the midi_note_map where contain the midi note mapping information
data_container.midi_note_map

{36: 'KD',
 38: 'SD',
 40: 'SD',
 37: 'SD',
 48: 'TT',
 50: 'TT',
 45: 'TT',
 47: 'TT',
 43: 'TT',
 58: 'TT',
 46: 'HH',
 26: 'HH',
 42: 'HH',
 22: 'HH',
 44: 'HH',
 49: 'CC',
 57: 'CC',
 55: 'CC',
 52: 'CC',
 51: 'RC',
 59: 'RC',
 53: 'RC',
 39: 'CB',
 54: 'CB',
 56: 'CB'}

In [11]:
# To create the training data, just simply do this
# A few parameters here need to aware of

# pad_before control the padding added to the begining of each clip. Default setting is 0.02 seconds

# pad_after control the padding added to the end of each clip. Default setting is 0.02 seconds

# fix_length control the total length of each extracted clip. accpet value in seconds. If this is not None, the function will ignore pad_after parameter because fix_length is already adding padding to the eacd of each clip 

# batching control the batching implementation. Only set thei to True if you are processing >10% of egmd dataset. The egmd data have a size of ~110Gb, there is no way you can store it all in your company memory unless you have a very powerful machine  
# By default, it will divide the dataset into 50 batches and created 50 pkl files. If batching is true, the function will also do the train test split automatically, so you will see a set of 50 training pkl files, 50 val pkl files.... created etc.

# dir_path control the directory path of the store location of those output pkl files

data_container.create_audio_set(pad_before=0.02, pad_after=0, fix_length=0.2, batching=False, dir_path='')

Generating Dataset


  0%|          | 0/455 [00:00<?, ?it/s]

  0%|          | 0/119968 [00:00<?, ?it/s]

Done!


In [12]:
# If you are not doing batching, the full dataframe will be saved to the notes_collection attribute of the class
# The output dataframe will contain the training input "audio_wav" in numpy array format and the label
# It will also save a copy of the df (as .pkl_) in the root directory (if dir not specfied)
data_container.notes_collection

Unnamed: 0,label,start,end,track_id,audio_wav,sampling_rate
0,KD,0.01,0.11,7278,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",44100
1,TT,0.15,0.25,7278,"[0.010437012, 0.010375977, 0.010284424, 0.0101...",44100
2,TT,0.29,0.39,7278,"[-0.009155273, -0.009185791, -0.009155273, -0....",44100
3,TT,0.45,0.55,7278,"[0.0010070801, 0.0010681152, 0.0011291504, 0.0...",44100
4,KD,0.46,0.56,7278,"[-0.0052490234, -0.0053100586, -0.0054016113, ...",44100
...,...,...,...,...,...,...
119963,SD,215.72,215.79,9370,"[-0.005340576, -0.007232666, -0.0064697266, -0...",44100
119964,HH,215.96,216.03,9370,"[0.0011901855, 0.0007019043, 0.00021362305, 0....",44100
119965,TT,216.13,216.20,9370,"[-9.1552734e-05, -6.1035156e-05, -9.1552734e-0...",44100
119966,TT,216.44,216.51,9370,"[-0.02154541, -0.021636963, -0.021728516, -0.0...",44100


In [None]:
#If you are processing the full dataset and implemented the batching setting, then you will need to read the pickles by yourselves and combining it for training phase