# Baseline 0.8177
## @最大的梦想家

# File structure
- data/
    - train/
    - test/
    - submission.csv
- baseline.ipynb

In [1]:
import numpy as np
import librosa as lb
import pandas as pd

import os

In [2]:
NUM_CLASSES = 30
SR = 16_000

# Preprocessing

In [3]:
def get_melspec(x, sr, n_mels=64):
    melspec = lb.feature.melspectrogram(x, sr=sr, n_mels=n_mels)
    lb.power_to_db(melspec).astype(np.float32)
    return melspec

In [4]:
from tqdm import tqdm
from sklearn.utils import shuffle

def crop_or_pad(y, length):
    if len(y) < length:
        y = np.concatenate([y, np.zeros(length - len(y))])
    elif len(y) > length:
        y = y[: length]
    return y

def preprocess_train():
    labels = os.listdir('data/train')
    x, y = [], []

    for i, label in enumerate(labels):
        label_dir = f'data/train/{label}'
        for wav_file in tqdm(os.listdir(label_dir)):
            wav_path = label_dir + f'/{wav_file}'
            wav, sr = lb.load(wav_path, sr=SR)
            wav = crop_or_pad(wav, 1*SR)
            melspec = get_melspec(wav, sr)
            x.append(melspec)
            y.append(i)

    x, y = np.r_[x], np.r_[y]
    x, y = shuffle(x, y, random_state=7)

    return x.astype(np.float32), y.astype(np.int64)

In [5]:
def preprocess_test():
    x, keys = [], []

    for wav_file in tqdm(os.listdir('data/test')):
        wav_path = f'data/test/{wav_file}'
        wav, sr = lb.load(wav_path, sr=SR)
        wav = crop_or_pad(wav, 1*SR)
        melspec = get_melspec(wav, sr)
        x.append(melspec)
        keys.append(wav_file)

    x = np.r_[x]

    return x.astype(np.float32), keys

In [6]:
x_train, y_train = preprocess_train()
x_train.shape, y_train.shape

100%|██████████| 1537/1537 [00:04<00:00, 359.49it/s]
100%|██████████| 1573/1573 [00:04<00:00, 360.40it/s]
100%|██████████| 1567/1567 [00:04<00:00, 357.05it/s]
100%|██████████| 1566/1566 [00:04<00:00, 355.83it/s]
100%|██████████| 2106/2106 [00:05<00:00, 353.40it/s]
100%|██████████| 2095/2095 [00:05<00:00, 357.87it/s]
100%|██████████| 2086/2086 [00:05<00:00, 358.85it/s]
100%|██████████| 2119/2119 [00:05<00:00, 354.92it/s]
100%|██████████| 2121/2121 [00:05<00:00, 358.90it/s]
100%|██████████| 1562/1562 [00:04<00:00, 358.62it/s]
100%|██████████| 1600/1600 [00:04<00:00, 360.79it/s]
100%|██████████| 2086/2086 [00:05<00:00, 355.53it/s]
100%|██████████| 1584/1584 [00:04<00:00, 356.61it/s]
100%|██████████| 2105/2105 [00:05<00:00, 358.17it/s]
100%|██████████| 2123/2123 [00:05<00:00, 359.57it/s]
100%|██████████| 2095/2095 [00:05<00:00, 357.33it/s]
100%|██████████| 2121/2121 [00:05<00:00, 356.38it/s]
100%|██████████| 2122/2122 [00:05<00:00, 357.88it/s]
100%|██████████| 2108/2108 [00:05<00:00, 356.7

((57886, 64, 32), (57886,))

# Training

In [7]:
import keras4torch as k4t
import torch
import torch.nn as nn

In [12]:
model = nn.Sequential(
    k4t.layers.Conv1d(128, 5), nn.ReLU(),
    k4t.layers.BatchNorm1d(),
    k4t.layers.Conv1d(64, 5), nn.ReLU(),
    nn.MaxPool1d(2),
    k4t.layers.Conv1d(64, 4), nn.ReLU(),
    nn.AdaptiveAvgPool1d(2),
    nn.Flatten(),
    k4t.layers.Linear(16), nn.ReLU(),
    k4t.layers.Linear(NUM_CLASSES)
)

model = k4t.Model(model).build(input_shape=[64, 32])

model.compile(optimizer='adam', loss='ce_loss', metrics=['acc'])

model.count_params()

101390

In [13]:
history = model.fit(x_train, y_train,
        epochs=32,
        batch_size=64,
        validation_split=0.2,
        callbacks=[k4t.callbacks.ModelCheckpoint('best.pt', monitor='val_acc')]
)

Train on 46309 samples, validate on 11577 samples:
Epoch 1/32 - 4.1s - loss: 2.8121 - acc: 0.1754 - val_loss: 2.2991 - val_acc: 0.3421 - lr: 1e-03
Epoch 2/32 - 2.8s - loss: 1.8458 - acc: 0.4461 - val_loss: 1.7178 - val_acc: 0.5312 - lr: 1e-03
Epoch 3/32 - 2.8s - loss: 1.4595 - acc: 0.5655 - val_loss: 1.5178 - val_acc: 0.5844 - lr: 1e-03
Epoch 4/32 - 2.9s - loss: 1.2346 - acc: 0.6347 - val_loss: 1.2641 - val_acc: 0.6563 - lr: 1e-03
Epoch 5/32 - 2.8s - loss: 1.0808 - acc: 0.6781 - val_loss: 1.1072 - val_acc: 0.6946 - lr: 1e-03
Epoch 6/32 - 2.8s - loss: 0.9690 - acc: 0.7113 - val_loss: 1.0050 - val_acc: 0.7245 - lr: 1e-03
Epoch 7/32 - 2.7s - loss: 0.8852 - acc: 0.7343 - val_loss: 0.9039 - val_acc: 0.7493 - lr: 1e-03
Epoch 8/32 - 2.7s - loss: 0.8107 - acc: 0.7570 - val_loss: 0.8433 - val_acc: 0.7607 - lr: 1e-03
Epoch 9/32 - 2.8s - loss: 0.7525 - acc: 0.7730 - val_loss: 0.8436 - val_acc: 0.7655 - lr: 1e-03
Epoch 10/32 - 2.7s - loss: 0.7108 - acc: 0.7875 - val_loss: 0.8267 - val_acc: 0.7678 

# Inference

In [14]:
x_test, keys = preprocess_test()
x_test.shape

100%|██████████| 6835/6835 [00:19<00:00, 351.41it/s]


(6835, 64, 32)

In [15]:
model.load_weights('best.pt')

y_pred = model.predict(x_test, activation=lambda x:x.argmax(-1))

labels = os.listdir('data/train')

submission = pd.read_csv('data/submission.csv')

mapping = {k:labels[v] for k,v in zip(keys, y_pred)}

submission['label'] = [mapping[k] for k in submission['file_name']]

submission.to_csv('baseline_submission.csv', index=None)