In [4]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks/
%pwd

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks


'/content/drive/MyDrive/Colab Notebooks'

In [10]:
!pip install colab-env -qU
import json
import os
import math
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder
import utils

  Building wheel for colab-env (setup.py) ... [?25l[?25hdone


Here we set the path for the audio files and designate a file name for the output. Then the code runs through all audio files, splits songs into segments and extracts Mfcc features for each segment. It's reccomended to do this in a few steps as Colab is prone to crash.

In [None]:
DATASET_PATH = "gognin"
JSON_PATH = "Moppur_031_060.json"
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION


def save_mfcc(dataset_path, json_path, num_mfcc=20, n_fft=2048, hop_length=512, num_segments=5):
    # dictionary to store mapping, labels, and MFCCs
    data = {
        "trackId": [],
        "labels": [],
        "segment": [],
        "mfcc": []
    }
    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)
    # loop through all sub-folders
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        filenames.sort()
        #ensuring right level of folder
        if dirpath is not dataset_path:
            #all files in sub-folder
            for f in filenames:
                if f.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')):
		                # load audio file
                    file_path = os.path.join(dirpath, f)
                    signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
                    if signal.shape[0]/650000 > 1: #ensuring right length
                        trackId = f.lstrip('0').rsplit('.',1)[0]
                        # process all segments of audio file
                        for d in range(num_segments):
                            # calculate start and finish sample for current segment
                            start = samples_per_segment * d
                            finish = start + samples_per_segment

                            # extract mfcc
                            mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, 
                                                        n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                            mfcc = mfcc.T

                            # store only mfcc feature with expected number of vectors
                            if len(mfcc) == num_mfcc_vectors_per_segment:
                                data["mfcc"].append(mfcc.tolist())
                                data["labels"].append(i-1)
                                data["trackId"].append(trackId)
                                data["segment"].append(d)
                                print("{}, segment:{}".format(file_path, d+1))

    # save MFCCs to json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
         
if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)

Loading data from json file on to numpy arrays.

In [2]:
def load_data(dataset_path):
  with open(dataset_path,"r") as fp:
    data = json.load(fp)
  inputs = np.array(data['mfcc'])
  targets = np.array(data['trackId']) #for matching up labels
  return inputs, targets

In [6]:
inputs1, targets1 = load_data("Moppur_000_030.json") #folders 000-030

In [7]:
inputs2, targets2 = load_data("trjatiuogeinn.json") #folders 031-090

In [8]:
inputs3, targets3 = load_data("niutiuogeinn.json") #folders 091-155

Building training data from inputs and targets, making sure that segments from each song end up in the right place.

In [11]:
tracks = utils.load('Data/fma_metadata/tracks.csv')
genres = utils.load('Data/fma_metadata/genres.csv')
train = tracks[(tracks['set', 'split'] == 'training') & (tracks['set', 'subset'] == 'small')]['track','genre_top']
val = tracks[(tracks['set', 'split'] == 'validation') & (tracks['set', 'subset'] == 'small')]['track','genre_top']
test = tracks[(tracks['set', 'split'] == 'test') & (tracks['set', 'subset'] == 'small')]['track','genre_top']
train.shape, val.shape, test.shape

((6400,), (800,), (800,))

In [19]:
y_train = []
y_test = []
y_val = []
X_train_list = []
X_test_list = []
X_val_list = []

def build_data(inputs,targets):
  n = inputs.shape[0]
  for i in range(n):
    if int(targets[i]) in val.index: #Matching up trackId to correct genre
      X_val_list.append(inputs[i])
      y_val.append(val[int(targets[i])])
    elif (int(targets[i]) in test.index):
      X_test_list.append(inputs[i])
      y_test.append(test[int(targets[i])])
    else:
      X_train_list.append(inputs[i])
      y_train.append(train[int(targets[i])])
  return 

build_data(inputs1,targets1)
build_data(inputs2,targets2)
build_data(inputs3,targets3)

X_train = np.array(X_train_list)
X_test = np.array(X_test_list)
X_val = np.array(X_val_list)

In [29]:
X_train.shape,X_test.shape,X_val.shape, len(y_train), len(y_test), len(y_val)

((28218, 259, 20), (3564, 259, 20), (3544, 259, 20), 28218, 3564, 3544)

Saving the numpy arrays to files to avoid having to load data often as it takes alot of time.

In [22]:
np.save('x_train_file',X_train)
np.save('x_test_file',X_test)
np.save('x_val_file',X_val)
np.save('y_train_file',y_train)
np.save('y_test_file',y_test)
np.save('y_val_file',y_val)

Code to load arrays from files

In [None]:
X_train = np.load('time_dependant_fylki/x_train_file.npy')
X_test = np.load('time_dependant_fylki/x_test_file.npy')
X_val = np.load('time_dependant_fylki/x_val_file.npy')
y_train = np.load('time_dependant_fylki/y_train_file.npy')
y_test = np.load('time_dependant_fylki/y_test_file.npy')
y_val = np.load('time_dependant_fylki/y_val_file.npy')