In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd gdrive/MyDrive/EE698V/

In [None]:
NUM = 10 # Total number of Models
sampling_rate = 44100
row_len = 513 # Number of columns: 1 + n_fft/2
col_len = 401 # Number of rows: 1 + (sampling_rate*audio_duration)/(0.01*sampling_rate); 0.01*sampling_rate = hop 
# audio_length = sampling_rate*audio_duration
# audio_duration = 4 seconds

outname = "Task1.csv" # header is off, columns are "File" and "Class"

In [None]:
import numpy as np
import pandas as pd

from tensorflow import config, distribute
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, EarlyStopping

In [None]:
gpus = config.list_physical_devices('GPU');
print(gpus)

if len(gpus) == 1:
    strategy = distribute.OneDeviceStrategy(device="/gpu:0")
else:
    strategy = distribute.MirroredStrategy()

config.optimizer.set_experimental_options({"auto_mixed_precision": True})

In [None]:
def dim_correction(npfile):
    X = np.load(npfile)

    if X.shape[0] > row_len:
        max_offset = X.shape[0] - row_len
        offset = np.random.randint(max_offset)
        X = X[offset : (row_len + offset), :]
    else:
        if X.shape[0] < row_len:
            max_offset = row_len - X.shape[0]
            offset = np.random.randint(max_offset)
        else:
            offset = 0
        X = np.pad(X, ((offset, row_len - X.shape[0] - offset), (0, 0)), "constant")

    if X.shape[1] > col_len:
        max_offset = X.shape[1] - col_len
        offset = np.random.randint(max_offset)
        X = X[:, offset : (col_len + offset)]
    else:
        if X.shape[1] < col_len:
            max_offset = col_len - X.shape[1]
            offset = np.random.randint(max_offset)
        else:
            offset = 0
        X = np.pad(X, ((0, 0), (offset, col_len - X.shape[1] - offset)), "constant")

    return X

def prepare_data(df, data_dir):
    print("Number of training samples processed: ")
    X = np.empty(shape = (df.shape[0], row_len, col_len, 1))
    for i, fname in enumerate(df["File"]):
        fpath = data_dir + "/" + fname + ".npy"
        STFT = dim_correction(fpath)
        STFT = np.expand_dims(STFT, axis = -1)
        X[i, ] = STFT

        if(i != 0 and i%200 == 0):
            print(i, end = ".. ")
    print(df["File"].shape[0], end = ".. ")
    print("Done!")
    
    return X

def build_model():

    model = Sequential()
    model.add(Conv2D(32, kernel_size = 7, strides = 2, activation = 'relu', input_shape = (row_len, col_len, 1)))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size = 4))
    model.add(Conv2D(64, kernel_size = 5, strides = 2, activation = 'relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size = 4))
    model.add(Conv2D(128, kernel_size = 3, strides = 2, activation = 'relu'))
    model.add(BatchNormalization())
    
    model.add(Flatten())
    model.add(Dropout(0.4))
    model.add(Dense(32, activation = 'relu'))
    model.add(Dropout(0.4))
    model.add(Dense(n_classes, activation = 'softmax'))

    model.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])
    
    return model

In [None]:
train = pd.read_csv("labels_train.csv")
test = pd.read_csv("predict_test.csv", header = None)
test.columns =["File", "Class"]
sub = test

LABELS = list(train['class'].unique())
label_idx = {label: i for i, label in enumerate(LABELS)}
train["label_idx"] = train["class"].apply(lambda x : label_idx[x])

n_classes = len(train["class"].unique())

X_test = prepare_data(test, "test_data")

normalize = np.load("models/normalize.npy")
MEAN = normalize[0]
STD = normalize[1]
X_test = (X_test - MEAN)/STD

Number of training samples processed: 
10.. Done!


In [None]:
result = np.zeros((test.shape[0], n_classes))

with strategy.scope():
    model = build_model()

for num in range(NUM):
    model.load_weights("models/best_%d.h5"%(num + 1))
    predictions = model.predict(X_test, batch_size = 32, verbose = 0)
    
    for i in range(result.shape[0]):
        result[i, predictions.argmax(axis = 1)[i]] = result[i, predictions.argmax(axis = 1)[i]] + 1

result = np.array(LABELS)[np.argmax(result, axis = 1)]
sub["Class"] = result.astype('str')
sub.to_csv(outname, header = None, index = False)

In [None]:
print(sub)

     File             Class
0  a00001          dog_bark
1  a00002          drilling
2  a00003          drilling
3  a00004      street_music
4  a00005        jackhammer
5  a00006          dog_bark
6  a00007  children_playing
7  a00008          drilling
8  a00009     engine_idling
9  a00010      street_music
