In [21]:
import os
import re
import matplotlib.pyplot as plt

#for loading and visualizing audio files
import librosa
import librosa.display
import numpy as np

In [19]:
dataset_folder = '../datasets/maestro-v3.0.0'
folder_data = os.listdir(dataset_folder)
valid_folder_data = [folder for folder in folder_data if re.match(r'\d{4}', folder)]
print(valid_folder_data)

['2013', '2014', '2015', '2008', '2006', '2009', '2017', '2018', '2011', '2004']


In [7]:
def get_general_data(folder_year):
  data_path = '../datasets/maestro-v3.0.0/' + folder_year
  general_data = os.listdir(data_path)
  return general_data

In [8]:
def get_wav_files(general_data):
  audio_files = []
  for file_name in general_data:
    if file_name.endswith('.wav'):
      audio_files.append(file_name)
  print(audio_files)
  print('Original size: ', len(general_data))
  print('Filtered size: ', len(audio_files))

  return audio_files


In [9]:
def remove_potential_duplicates(data):
  return list(set(data))

In [22]:
def create_folder_if_none_exists(folder_name):
  if not os.path.exists(folder_name):
    os.makedirs(folder_name)

In [23]:
def generate_spectograms_for_audio_files(folder_data, folder_year):
  print('-> Working on folder: ', folder_year)
  count = 1
  for file_name in folder_data:
    x, sr = librosa.load('../datasets/maestro-v3.0.0/'+folder_year+'/'+file_name)
    plt.figure(figsize=(14, 5))
    X = librosa.stft(x)
    sgram_mag, _ = librosa.magphase(X)
    mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sr)
    mel_sgram = librosa.amplitude_to_db(mel_scale_sgram, ref=np.max)
    librosa.display.specshow(mel_sgram, sr=sr, x_axis=None, y_axis=None)
    plt.axis('off')
    plt.tight_layout(pad=0)
    name_for_save = file_name.replace('.wav', '')+'.png'
    destination_folder = 'spectrograms/'+folder_year
    create_folder_if_none_exists(destination_folder)
    plt.savefig(destination_folder+'/'+name_for_save)
    print('Processed file # '+str(count)+': ', name_for_save)
    count += 1
    plt.clf()
    plt.close()

In [24]:
for folder_year in valid_folder_data:
  general_data = get_general_data(folder_year)
  audio_files_only = get_wav_files(general_data)
  audio_files_only = remove_potential_duplicates(audio_files_only)
  generate_spectograms_for_audio_files(audio_files_only, folder_year)

['ORIG-MIDI_03_7_6_13_Group__MID--AUDIO_09_R1_2013_wav--1.wav', 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_05_R1_2013_wav--1.wav', 'ORIG-MIDI_01_7_8_13_Group__MID--AUDIO_07_R2_2013_wav--2.wav', 'ORIG-MIDI_01_7_6_13_Group__MID--AUDIO_02_R1_2013_wav--2.wav', 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_08_R1_2013_wav--1.wav', 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_06_R1_2013_wav--4.wav', 'ORIG-MIDI_03_7_6_13_Group__MID--AUDIO_09_R1_2013_wav--2.wav', 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_05_R1_2013_wav--2.wav', 'ORIG-MIDI_01_7_8_13_Group__MID--AUDIO_07_R2_2013_wav--1.wav', 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_08_R1_2013_wav--2.wav', 'ORIG-MIDI_01_7_6_13_Group__MID--AUDIO_02_R1_2013_wav--1.wav', 'ORIG-MIDI_01_7_6_13_Group__MID--AUDIO_01_R1_2013_wav--4.wav', 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_08_R1_2013_wav--3.wav', 'ORIG-MIDI_01_7_7_13_Group__MID--AUDIO_14_R1_2013_wav--4.wav', 'ORIG-MIDI_02_7_6_13_Group__MID--AUDIO_05_R1_2013_wav--3.wav', 'ORIG-MIDI_03_7_6_13_Group__MID--AUDIO_09_R1_2013_wav-