In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd gdrive/MyDrive/EE698V/

Mounted at /content/gdrive
/content/gdrive/MyDrive/EE698V


In [2]:
NUM_DATA = 5  # number of training samples
min_duration = 0.5  # in seconds
sampling_rate = 44100
row_len = 513 # Number of rows: 1 + n_fft/2
frames = 50 # frames*(min_duration*1/0.01) = columns

In [3]:
import numpy as np
import pandas as pd

import librosa
from keras.utils.np_utils import to_categorical

In [4]:
def wav2feat(wavfile):
    x, _ = librosa.core.load(wavfile, sr = Fs, mono = True)
    hop = int(0.01*Fs) # 10ms
    win = int(0.02*Fs) # 20ms
    X = librosa.stft(x, n_fft = 1024, hop_length = hop, win_length = win, window = 'hann', center = True, pad_mode = "reflect")
    X = np.abs(X)

    if X.shape[1] < frames:
        return []

    if X.shape[0] > row_len:
        max_offset = X.shape[0] - row_len
        offset = np.random.randint(max_offset)
        X = X[offset : (row_len + offset), :]
    else:
        if X.shape[0] < row_len:
            max_offset = row_len - X.shape[0]
            offset = np.random.randint(max_offset)
        else:
            offset = 0
        X = np.pad(X, ((offset, row_len - X.shape[0] - offset), (0, 0)), "constant")

    STFTs = []
    C = int(X.shape[1]/frames)
    for c in range(C):
        STFT_sample = X[:, c*frames : (c + 1)*frames]
        STFTs.append(STFT_sample)

    return STFTs

def prepare_data(df, data_dir):
    print("Number of training samples processed: ")
    X = []
    labels = []
    for i, fname in enumerate(df["slice_file_name"]):
        fpath = data_dir + "/" + fname
        STFTs = wav2feat(fpath)
        for _, stft in enumerate(STFTs):
            stft = np.expand_dims(stft, axis = -1)
            X.append(stft)
            labels.append(train.iloc[i]['class'])

        if(i != 0 and i%200 == 0):
            print(i, end = ".. ")
    print(df.shape[0], end = ".. ")
    print("Done!")

    X = np.stack(X)
    labels = pd.Series(labels)
    Y = labels.apply(lambda x : label_idx[x])
    Y = to_categorical(Y, num_classes = n_classes)

    np.save("train_data/train_slices", X)
    np.save("train_data/slices_target", Y)
    return X, Y

In [5]:
train = pd.read_csv("labels_train.csv")

LABELS = list(train['class'].unique())
label_idx = {label: i for i, label in enumerate(LABELS)}
n_classes = len(train["class"].unique())

X, Y = prepare_data(train, "train_data")

Number of training samples processed: 
200.. 400.. 600.. 800.. 1000.. 1200.. 1400.. 1600.. 1761.. Done!


In [6]:
print(X.shape, Y.shape)

(12488, 513, 50, 1) (12488, 10)
