In [1]:
import os
import sys

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

In [2]:
sys.path.append('..')

from msos.msos_preprocess import generate_label_name_map, mel_spectro_msos
from utils import get_project_root

In [3]:
DATA_DIR = os.path.join(get_project_root(), 'data/msos')

LABEL_NAME_MAP_L1_DF = pd.read_csv(
    os.path.join(DATA_DIR, 'label_name_map_l1.csv'))
LABEL_NAME_MAP_L1 = dict(zip(LABEL_NAME_MAP_L1_DF['label'],
                             LABEL_NAME_MAP_L1_DF['name']))
LABEL_NAME_MAP_L2_DF = pd.read_csv(
    os.path.join(DATA_DIR, 'label_name_map_l2.csv'))
LABEL_NAME_MAP_L2 = dict(zip(LABEL_NAME_MAP_L2_DF['label'],
                             LABEL_NAME_MAP_L2_DF['name']))

LOGSHEET_DEV = pd.read_csv(os.path.join(DATA_DIR, 'Logsheet_Development.csv'))


In [4]:
logsheet_df, label_name_map_l1, label_name_map_l2 = generate_label_name_map(os.path.join(DATA_DIR, 'Logsheet_Development.csv'))
print(logsheet_df.head())

  Category Event     File  label_l1  label_l2
0  Effects  Beep  TN7.wav         0         0
1  Effects  Beep  RTS.wav         0         0
2  Effects  Beep  NUO.wav         0         0
3  Effects  Beep  CPU.wav         0         0
4  Effects  Beep  SB2.wav         0         0


In [9]:
data_dev, labels_dev = mel_spectro_msos(logsheet_df, DATA_DIR)


In [8]:
print(data_dev.shape)
print(data_dev[0][:6, :6])
print(labels_dev.shape)
print(labels_dev[:6])


(1500, 431, 128)
[[1.13089200e-05 3.12628428e-04 1.07118487e-03 8.46658051e-02
  2.36185268e-02 1.66730899e-02]
 [1.77054553e-05 2.46369455e-04 8.24516232e-04 1.20281905e-01
  8.86434019e-01 1.18545580e+00]
 [6.02070242e-04 6.75385527e-04 1.02397159e-03 3.45195383e-02
  1.70631611e+00 2.62378073e+00]
 [7.61008868e-03 6.40402362e-03 5.46289748e-03 1.47223426e-02
  7.79110670e-01 1.61585546e+00]
 [7.35904649e-03 7.01652654e-03 8.17175768e-03 1.31903065e-03
  1.02061838e-01 5.07537663e-01]
 [3.98447132e-03 2.92373705e-03 1.35473930e-03 6.03581732e-03
  4.11186507e-03 1.06369138e-01]]
(1500, 2)
[[0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]]


In [9]:
data_train, data_val, labels_train, labels_val = train_test_split(data_dev, labels_dev, train_size=0.8, random_state=42)

In [10]:
print(data_train.shape)
print(data_val.shape)
print(labels_train.shape)
print(labels_val.shape)


(1200, 431, 128)
(300, 431, 128)
(1200, 2)
(300, 2)


In [11]:
np.save(os.path.join(get_project_root(), 'data/msos_train_data.npy'), data_train)
np.save(os.path.join(get_project_root(), 'data/msos_val_data.npy'), data_val)
np.save(os.path.join(get_project_root(), 'data/msos_train_labels_1.npy'), labels_train[:, 0])
np.save(os.path.join(get_project_root(), 'data/msos_train_labels_2.npy'), labels_train[:, 1])
np.save(os.path.join(get_project_root(), 'data/msos_val_labels_1.npy'), labels_val[:, 0])
np.save(os.path.join(get_project_root(), 'data/msos_val_labels_2.npy'), labels_val[:, 1])


In [3]:
logsheet_eval_df, _, _ = generate_label_name_map(os.path.join(DATA_DIR, 'Logsheet_EvaluationMaster.csv'))
data_eval, labels_eval = mel_spectro_msos(logsheet_eval_df, DATA_DIR, eval=True)
np.save(os.path.join(get_project_root(), 'data/msos_test_data.npy'), data_eval)
np.save(os.path.join(get_project_root(), 'data/msos_test_labels_1.npy'), labels_eval[:, 0])
np.save(os.path.join(get_project_root(), 'data/msos_test_labels_2.npy'), labels_eval[:, 1])
