In [10]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import pandas as pd
import numpy as np

from dataset import _label_dict_
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.neural_network import MLPClassifier

from tqdm.notebook import tqdm

---
---
# Prepare data

In [3]:
def true_filename(fn):
    return '.'.join(fn.split('.')[:2])

@np.vectorize
def genre(true_fn):
    return true_fn.split('.')[0]

In [74]:
data = pd.read_csv("../res/features_30_sec.csv")

In [75]:
data["filename"] = data["filename"].apply(true_filename)
data

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000,661794,0.350088,0.088757,0.130228,0.002827,1784.165850,129774.064525,2002.449060,85882.761315,...,52.420910,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001,661794,0.340914,0.094980,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.106190,0.531217,45.786282,blues
2,blues.00002,661794,0.363637,0.085275,0.175570,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.439720,46.639660,-2.231258,30.573025,blues
3,blues.00003,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.319130,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.195160,blues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,rock.00095,661794,0.352063,0.080487,0.079486,0.000345,2008.149458,282174.689224,2106.541053,88609.749506,...,45.050526,-13.289984,41.754955,2.484145,36.778877,-6.713265,54.866825,-1.193787,49.950665,rock
996,rock.00096,661794,0.398687,0.075086,0.076458,0.000588,2006.843354,182114.709510,2068.942009,82426.016726,...,33.851742,-10.848309,39.395096,1.881229,32.010040,-7.461491,39.196327,-2.795338,31.773624,rock
997,rock.00097,661794,0.432142,0.075268,0.081651,0.000322,2077.526598,231657.968040,1927.293153,74717.124394,...,33.597008,-12.845291,36.367264,3.440978,36.001110,-12.588070,42.502201,-2.106337,29.865515,rock
998,rock.00098,661794,0.362485,0.091506,0.083860,0.001211,1398.699344,240318.731073,1818.450280,109090.207161,...,46.324894,-4.416050,43.583942,1.556207,34.331261,-5.041897,47.227180,-3.590644,41.299088,rock


In [76]:
unique_files = np.unique(data["filename"]).astype(str)
labels = genre(unique_files)
splits = [
    (unique_files[split[0]], unique_files[split[1]])
    for split in StratifiedKFold(n_splits=5, shuffle=True, random_state=123456789).split(unique_files, labels)]

---
---
# Prepare training

In [77]:
def train_one_epoch(model, trn_loader, loss_fn, optimizer):
    correctly_classified = 0
    incorrectly_classified = 0
    running_loss = 0.

    for batch in trn_loader:
        optimizer.zero_grad()

        x = batch[:, :-1]
        y = batch[:, -1].long()

        logits = model(x)
        loss = loss_fn(logits, y)

        loss.backward()
        optimizer.step()

        preds = torch.argmax(logits, dim=1)

        running_loss += loss.item()
        n_correct = torch.sum(preds == y)
        correctly_classified += n_correct
        incorrectly_classified += (preds.size(0) - n_correct)

    avg_acc = correctly_classified / (correctly_classified + incorrectly_classified)
    avg_loss = running_loss / len(trn_loader)
    return avg_loss, avg_acc

def validate(model, val_loader, loss_fn):
    correctly_classified = 0
    incorrectly_classified = 0
    running_loss = 0.

    with torch.no_grad():
        for batch in val_loader:
            x = batch[:, :-1]
            y = batch[:, -1].long()

            logits = model(x)
            loss = loss_fn(logits, y)

            preds = torch.argmax(logits, dim=1)

            running_loss += loss.item()
            n_correct = torch.sum(preds == y)
            correctly_classified += n_correct
            incorrectly_classified += (preds.size(0) - n_correct)

    avg_acc = correctly_classified / (correctly_classified + incorrectly_classified)
    avg_loss = running_loss / len(val_loader)
    return avg_loss, avg_acc



def train(model, n_epochs, trn_loader, val_loader, early_stopping, loss_fn, optimizer, scheduler):
    output = dict()
    epochs_without_improvement = 0

    best_val_loss = torch.inf
    output["best_val_loss"] = best_val_loss
    best_val_acc = 0.
    output["best_val_acc"] = best_val_acc

    output["train/loss"] = []
    output["train/acc"]  = []
    output["val/loss"]   = []
    output["val/acc"]    = []

    pbar = tqdm(range(n_epochs))
    for epoch in pbar:
        model.train()
        trn_loss, trn_acc = train_one_epoch(
            model, trn_loader, loss_fn, optimizer,
        )
        if scheduler is not None:
            scheduler.step()
        output["train/loss"].append(trn_loss)
        output["train/acc"].append(trn_acc)

        model.eval()
        val_loss, val_acc = validate(
            model, val_loader, loss_fn
        )
        output["val/loss"].append(trn_loss)
        output["val/acc"].append(trn_acc)

        pbar.set_postfix_str(f"loss = {trn_loss:>6.4f} | acc. = {trn_acc * 100:>5.2f} % | "
                             f"val. loss = {val_loss:>6.4f} | val. acc = {val_acc * 100:>5.2f} %")

        if val_loss < output["best_val_loss"]:
            output["best_loss_state"] = model.state_dict()
            epochs_without_improvement = 0
            output["best_val_loss"] = val_loss
        elif val_acc > output["best_val_acc"]:
            output["best_acc_state"] = model.state_dict()
            epochs_without_improvement = 0
            output["best_val_acc"] = val_acc
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= early_stopping:
                output["last_state"] = model.state_dict()
                return output

    output["last_state"] = model.state_dict()
    return output

---
---
# Train

In [78]:
data["label"] = data["label"].apply(lambda k: _label_dict_[k])

In [81]:
results = {}
fold_pbar = tqdm(range(5))


for num_fold in fold_pbar:
    results[num_fold] = dict()
    train_files, val_files = splits[num_fold]

    train_idx = []
    for fn in train_files:
        train_idx.extend(data.index[data["filename"] == fn])
    val_idx = []
    for fn in val_files:
        val_idx.extend(data.index[data["filename"] == fn])

    std_scaler = StandardScaler()

    X = data.to_numpy()[train_idx, 2:].astype(np.float32)
    X[:, :-1] = std_scaler.fit_transform(X[:, :-1])
    trn_loader = DataLoader(X, batch_size=64, shuffle=True)

    val_X = data.to_numpy()[val_idx, 2:].astype(np.float32)
    val_X[:, :-1] = std_scaler.fit_transform(val_X[:, :-1])
    val_loader = DataLoader(val_X, batch_size=64, shuffle=False)

    seed_pbar = tqdm([11111, 22222, 33333, 44444, 55555])
    for seed in seed_pbar:
        torch.manual_seed(seed)

        model = nn.Sequential(
            nn.Linear(57, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 10))

        loss_fn = nn.CrossEntropyLoss()
        scheduler=None
        optimizer=torch.optim.Adam(model.parameters(), weight_decay=0.0001)

        results[num_fold][seed] = train(model, 300, trn_loader, val_loader, 30, loss_fn, optimizer, scheduler)

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

---
# Show results

In [82]:
for fold in range(5):
    best_val_accs = [results[fold][seed]["best_val_acc"] for seed in results[fold]]
    best = np.max(best_val_accs)
    mean_val_acc = np.mean(best_val_accs)
    std_val_acc = np.std(best_val_accs)
    print(f"Fold {fold} val. accuracy = {mean_val_acc:.4f} +- {std_val_acc:.4f} | Best: {best:.4f}")

Fold 0 val. accuracy = 0.7690 +- 0.0116 | Best: 0.7850
Fold 1 val. accuracy = 0.7920 +- 0.0098 | Best: 0.8050
Fold 2 val. accuracy = 0.7480 +- 0.0098 | Best: 0.7650
Fold 3 val. accuracy = 0.7890 +- 0.0159 | Best: 0.8150
Fold 4 val. accuracy = 0.7570 +- 0.0140 | Best: 0.7800


---
# Save results

In [83]:
import joblib
joblib.dump(results, "../results/mlp_30sec_5fold_5seed.pkl")

['../results/mlp_30sec_5fold_5seed.pkl']