In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd gdrive/MyDrive/EE698V/

Mounted at /content/gdrive
/content/gdrive/MyDrive/EE698V


In [2]:
sampling_rate = 44100
row_len = 513 # Number of columns: 1 + n_fft/2
col_len = 401 # Number of rows: 1 + (sampling_rate*audio_duration)/(0.01*sampling_rate); 0.01*sampling_rate = hop 
# audio_length = sampling_rate*audio_duration
# audio_duration = 4 seconds

In [3]:
import numpy as np
import pandas as pd

import librosa
from keras.utils.np_utils import to_categorical

In [4]:
def wav2feat(wavfile, Fs = sampling_rate):
    x, _ = librosa.core.load(wavfile, sr = Fs, mono = True)
    hop = int(0.01*Fs) # 10ms
    win = int(0.02*Fs) # 20ms
    X = librosa.stft(x, n_fft = 1024, hop_length = hop, win_length = win, window = 'hann', center = True, pad_mode = "reflect")
    X = np.abs(X)

    if X.shape[0] > row_len:
        max_offset = X.shape[0] - row_len
        offset = np.random.randint(max_offset)
        X = X[offset : (row_len + offset), :]
    else:
        if X.shape[0] < row_len:
            max_offset = row_len - X.shape[0]
            offset = np.random.randint(max_offset)
        else:
            offset = 0
        X = np.pad(X, ((offset, row_len - X.shape[0] - offset), (0, 0)), "constant")

    if X.shape[1] > col_len:
        max_offset = X.shape[1] - col_len
        offset = np.random.randint(max_offset)
        X = X[:, offset : (col_len + offset)]
    else:
        if X.shape[1] < col_len:
            max_offset = col_len - X.shape[1]
            offset = np.random.randint(max_offset)
        else:
            offset = 0
        X = np.pad(X, ((0, 0), (offset, col_len - X.shape[1] - offset)), "constant")

    return X

def prepare_data(df, data_dir):
    print("Number of training samples processed: ")
    X = np.empty(shape = (df.shape[0], row_len, col_len, 1))
    for i, fname in enumerate(df["slice_file_name"]):
        fpath = data_dir + "/" + fname
        STFT = wav2feat(fpath)
        STFT = np.expand_dims(STFT, axis = -1)
        X[i, ] = STFT

        if(i != 0 and i%200 == 0):
            print(i, end = ".. ")
    print(df.shape[0], end = ".. ")
    print("Done!")
        
    Y = to_categorical(train["label_idx"], num_classes = n_classes)
    np.save("train_data/input_data", X)
    np.save("train_data/input_target", Y)
    return X, Y

In [5]:
train = pd.read_csv("labels_train.csv")

LABELS = list(train['class'].unique())
label_idx = {label: i for i, label in enumerate(LABELS)}
train["label_idx"] = train["class"].apply(lambda x : label_idx[x])

n_classes = len(train["class"].unique())

In [6]:
X_train, Y_train = prepare_data(train, "train_data")

Number of training samples processed: 
200.. 400.. 600.. 800.. 1000.. 1200.. 1400.. 1600.. 1761.. Done!
