In [1]:
import warnings
warnings.filterwarnings('ignore')

import yaml
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold 
from audio_classification.data import BMWDataset

In [2]:
with open("/nfs/homedirs/yuny/project-1/audio_classification/configs/crnn_bmw_test.yaml", "r") as config_file:
    configs = yaml.load(config_file)

In [3]:
bmw_set = BMWDataset(configs, [10], transform=None)
print("BMW dataset contains {} clips".format(len(bmw_set)))

BMW dataset contains 667 clips


In [10]:
X = np.array(bmw_set.audios)
y = np.array(bmw_set.labels)

In [11]:
# STRATIFIES K-FOLD CROSS VALIDATION { 10-fold } 
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
meta = []
fold = 1
for train_ix, test_ix in kfold.split(X, y):
    train_X, test_X = X[train_ix], X[test_ix]
    train_y, test_y = y[train_ix], y[test_ix]
    unique, counts = np.unique(test_y, return_counts=True)
    
    print("Fold {} contains {} samples with class distribution: ".format(fold, len(test_y)))
    print(dict(zip(unique, counts)))
    fold_meta = np.vstack((X[test_ix] ,np.full(len(test_y), fold) ,y[test_ix])).T.tolist()
    meta += fold_meta
    
    fold = fold + 1

Fold 1 contains 67 samples with class distribution: 
{0: 3, 1: 4, 2: 15, 3: 27, 4: 15, 5: 3}
Fold 2 contains 67 samples with class distribution: 
{0: 3, 1: 4, 2: 15, 3: 27, 4: 15, 5: 3}
Fold 3 contains 67 samples with class distribution: 
{0: 3, 1: 4, 2: 15, 3: 27, 4: 15, 5: 3}
Fold 4 contains 67 samples with class distribution: 
{0: 2, 1: 5, 2: 14, 3: 27, 4: 15, 5: 4}
Fold 5 contains 67 samples with class distribution: 
{0: 2, 1: 5, 2: 14, 3: 27, 4: 15, 5: 4}
Fold 6 contains 67 samples with class distribution: 
{0: 2, 1: 5, 2: 14, 3: 27, 4: 15, 5: 4}
Fold 7 contains 67 samples with class distribution: 
{0: 2, 1: 5, 2: 14, 3: 27, 4: 15, 5: 4}
Fold 8 contains 66 samples with class distribution: 
{0: 2, 1: 5, 2: 14, 3: 27, 4: 15, 5: 3}
Fold 9 contains 66 samples with class distribution: 
{0: 2, 1: 4, 2: 15, 3: 27, 4: 15, 5: 3}
Fold 10 contains 66 samples with class distribution: 
{0: 2, 1: 4, 2: 15, 3: 27, 4: 15, 5: 3}


In [12]:
df = pd.DataFrame(meta) # construct data frame and transpose
df = df.rename(columns={0: "slice_file_name", 1: "fold", 2: "classID"})
file_name = '../datasets/BMW/bmw.csv'
df.to_csv(file_name, header=True, index=False, sep=',')
print("Meta records of {} BMW clips is writen to {}".format(df.shape[0], file_name))

Meta records of 667 BMW clips is writen to ../datasets/BMW/bmw.csv
