In [1]:
import os
import sys
sys.path.insert(0, './../')
import bbdc2021 as bbdc

In [2]:
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.04, 'frame_stride': 0.02495,
                  'mel_filter': 32, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'standard', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission.csv'}
unet_param = {'channels': [32, 64, 64],
              'val_split_range': (6400, -1), # 0.2
              'loss': 'categorical_crossentropy', # bbdc.dice_loss moeglich
              'learning_rate': 0.001,
              'batch_size': 20,
              'epochs': 30,
              'model_save_path': './',
              'load_path': None} # Wenn trainiert werden soll: None; Submission 1: 'model9561.h5'

In [2]:
def pipeline_u_net2d_1(pipe_param, model_param):
    """Current u_net2d pipeline from loading data to prediction.
    Takes paramater dictionary as argument"""
    print('Pipeline parameter:', pipe_param)
    print('Model parameter:', model_param)
    # load
    x_dev, y_dev, timep, filelist_dev, x_ch, filelist_ch = bbdc.loading_block2(pipe_param)
    # shuffle
    x_dev, y_dev, filelist_dev = bbdc.shuffle_block1(x_dev, y_dev, filelist_dev)
    # split (tv=train and val set)
    x_tv, x_test, y_tv, y_test, filelist_test = bbdc.split_block1(x_dev, y_dev, timep, filelist_dev, pipe_param)
    # model fit (or load)
    history, model = bbdc.model_block1_unet2d(x_tv, y_tv, model_param)
    # evaluate
    scores_list, model_psds = bbdc.evaluation_block1(x_test, y_test, timep, filelist_test, model, pipe_param)
    # post processing and its evaluation
    pp_pred_test, pp_pred_ch, pp_psds = bbdc.postprocessing_with_evaluation_block1(x_test, y_test, timep,
                                                                                   filelist_test, x_ch,
                                                                                   filelist_ch, model, pipe_param)
    # challenge prediction
    bbdc.challenge_prediction_block1(pp_pred_ch, timep, filelist_ch, pipe_param)
    return scores_list, model_psds, pp_psds

In [None]:
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.04, 'frame_stride': 0.02495,
                  'mel_filter': 32, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'standard', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission64mel.csv'}
unet_param = {'channels': [32, 64, 64],
              'val_split_range': (6400, -1), # 0.2
              'loss': 'categorical_crossentropy', # bbdc.dice_loss moeglich
              'learning_rate': 0.001,
              'batch_size': 256,
              'epochs': 100,
              'model_save_path': './',
              'load_path': None} # Wenn trainiert werden soll: None; Submission 1: 'model9561.h5'

pipeline_u_net2d_1(pipe_param, unet_param)

Pipeline parameter: {'data_folder': './../data/', 'wav_files_folder': 'final_pre_dataset', 'frame_size': 0.04, 'frame_stride': 0.02495, 'mel_filter': 32, 'nfft': 512, 'dev_csv': 'dev-labels.csv', 'eval_csv': 'challenge_filelist_dummy.csv', 'scaling': 'standard', 'test_split_range': (8000, -1), 'prediction_path': './', 'post_processing': 'fill', 'post_timethres': 0.8, 'post_noisethres': 0.3, 'post_base': 2, 'submission_file_path': './challenge_submission64mel.csv'}
Model parameter: {'channels': [32, 64, 64], 'val_split_range': (6400, -1), 'loss': 'categorical_crossentropy', 'learning_rate': 0.001, 'batch_size': 256, 'epochs': 100, 'model_save_path': './', 'load_path': None}
Mel filter version loading block.
Csv from wav files already existend. Skipping calc_fft_mel.
Scaled numpy files already existend. Skipping scaling and load_data function.
Splitting test set at indices 8000 to -1 from dev set.
Tensorflow version: 2.4.1
Splitting val set at indices 6400 to -1 from train set.
Epoch 1/1

In [4]:
# smaller discretization
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.04, 'frame_stride': 0.0998,
                  'mel_filter': 100, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'standard', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission_.csv'}
unet_param = {'channels': [32, 64, 90],
              'val_split_range': (6400, -1), # 0.2
              'loss': 'categorical_crossentropy', # bbdc.dice_loss moeglich
              'learning_rate': 0.001,
              'batch_size': 256,
              'epochs': 3,
              'model_save_path': './',
              'load_path': None} # Wenn trainiert werden soll: None; Submission 1: 'model9561.h5'

pipeline_u_net2d_1(pipe_param, unet_param)

3it [00:00, 23.83it/s]

Pipeline parameter: {'data_folder': './../data/', 'wav_files_folder': 'final_pre_dataset', 'frame_size': 0.04, 'frame_stride': 0.0998, 'mel_filter': 100, 'nfft': 512, 'dev_csv': 'dev-labels.csv', 'eval_csv': 'challenge_filelist_dummy.csv', 'scaling': 'standard', 'test_split_range': (8000, -1), 'prediction_path': './', 'post_processing': 'fill', 'post_timethres': 0.8, 'post_noisethres': 0.3, 'post_base': 2, 'submission_file_path': './challenge_submission_.csv'}
Model parameter: {'channels': [32, 64, 90], 'val_split_range': (6400, -1), 'loss': 'categorical_crossentropy', 'learning_rate': 0.001, 'batch_size': 256, 'epochs': 3, 'model_save_path': './', 'load_path': None}
Mel filter version loading block.
Starting transformation from wav files to csv files (mel).
Processing dev files:


10000it [08:24, 19.81it/s]
3it [00:00, 26.88it/s]

Processing eval files:


3000it [02:28, 20.26it/s]
0it [00:00, ?it/s]

Loading dev set:


30044it [02:06, 238.02it/s]


Loading eval set:


3000it [00:35, 83.72it/s]


Scaling files.
Saving to numpy arrays.
Splitting test set at indices 8000 to -1 from dev set.
Existing model.h5 removed.
Tensorflow version: 2.4.1
(100, 100)
Splitting val set at indices 6400 to -1 from train set.
Epoch 1/3
 7/40 [====>.........................] - ETA: 25:41 - loss: 6.1654 - mae: 0.1459 - accuracy: 0.0425

KeyboardInterrupt: 

In [3]:
# frame size and not stride! smaller discretization, robust scaler
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.2, 'frame_stride': 0.098,
                  'mel_filter': 100, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'robust', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission_.csv'}
unet_param = {'channels': [32, 64, 90],
              'val_split_range': (6400, -1), # 0.2
              'loss': 'categorical_crossentropy', # bbdc.dice_loss moeglich
              'learning_rate': 0.001,
              'batch_size': 256,
              'epochs': 3,
              'model_save_path': './',
              'load_path': None} # Wenn trainiert werden soll: None; Submission 1: 'model9561.h5'

x_dev, y_dev, timep, filelist_dev, x_ch, filelist_ch = bbdc.loading_block2(pipe_param)

0it [00:00, ?it/s]

Mel filter version loading block.
Starting transformation from wav files to csv files (mel).
Processing dev files:


10000it [11:20, 14.70it/s]
3it [00:00, 23.53it/s]

Processing eval files:


3000it [03:20, 14.98it/s]
22it [00:00, 214.35it/s]

Loading dev set:


30044it [02:15, 222.48it/s]
9it [00:00, 82.73it/s]

Loading eval set:


3000it [00:38, 78.04it/s]


Scaling files.
Saving to numpy arrays.


In [2]:
# frame size and not stride! smaller discretization, 
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.2, 'frame_stride': 0.098,
                  'mel_filter': 16, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'standard', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission_.csv'}

x_dev, y_dev, timep, filelist_dev, x_ch, filelist_ch = bbdc.loading_block2(pipe_param)

113it [00:00, 1116.96it/s]

Mel filter version loading block.
Csv from wav files already existend. Skipping calc_fft_mel.
Loading dev set:


30044it [00:25, 1166.23it/s]
43it [00:00, 424.29it/s]

Loading eval set:


3000it [00:06, 453.48it/s]


Scaling files.
standard scaling chosen.
Saving to numpy arrays.


In [3]:
# frame size and not stride! smaller discretization
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.2, 'frame_stride': 0.098,
                  'mel_filter': 16, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'minmax', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission_.csv'}

x_dev, y_dev, timep, filelist_dev, x_ch, filelist_ch = bbdc.loading_block2(pipe_param)

Mel filter version loading block.
Csv from wav files already existend. Skipping calc_fft_mel.
Loading dev set:


30044it [00:25, 1180.25it/s]
47it [00:00, 463.86it/s]

Loading eval set:


3000it [00:06, 455.18it/s]


Scaling files.
minmax scaling chosen.
Saving to numpy arrays.


In [4]:
# frame size and not stride! smaller discretization
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.2, 'frame_stride': 0.098,
                  'mel_filter': 16, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'minmax097', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission_.csv'}

x_dev, y_dev, timep, filelist_dev, x_ch, filelist_ch = bbdc.loading_block2(pipe_param)

105it [00:00, 1049.39it/s]

Mel filter version loading block.
Csv from wav files already existend. Skipping calc_fft_mel.
Loading dev set:


30044it [00:27, 1108.58it/s]
39it [00:00, 383.87it/s]

Loading eval set:


3000it [00:06, 444.79it/s]


Scaling files.
minmax097 scaling chosen.
Saving to numpy arrays.


In [5]:
# frame size and not stride! smaller discretization
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.2, 'frame_stride': 0.098,
                  'mel_filter': 16, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'robust', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission_.csv'}

x_dev, y_dev, timep, filelist_dev, x_ch, filelist_ch = bbdc.loading_block2(pipe_param)

Mel filter version loading block.
Csv from wav files already existend. Skipping calc_fft_mel.
Loading dev set:


30044it [00:25, 1168.57it/s]
46it [00:00, 453.12it/s]

Loading eval set:


3000it [00:06, 448.67it/s]


Scaling files.
robust scaling chosen.


AttributeError: 'RobustScaler' object has no attribute 'partial_fit'

In [6]:
# frame size and not stride! smaller discretization
pipe_param = {'data_folder': './../data/',
                  'wav_files_folder': 'final_pre_dataset', # mel parameter
                  'frame_size': 0.1, 'frame_stride': 0.0496,
                  'mel_filter': 32, 'nfft': 512,
                  'dev_csv': 'dev-labels.csv', # filelisten, falls man doch auf kleinerem probieren moechte
                  'eval_csv': 'challenge_filelist_dummy.csv',
                  'scaling': 'standard', # 'standard', 'minmax' oder 'no' wenn kein scaling sein soll
                  'test_split_range': (8000, -1), # index anfang und ende aus dev set 
                  'prediction_path': './', # wo prediction csvs gespeichert werden sollen
                  'post_processing': 'fill', # 'fill' is das von Jannes.
                  'post_timethres': 0.8, # post processing parameter
                  'post_noisethres': 0.3,
                  'post_base': 2,
                  'submission_file_path': './challenge_submission_.csv'}

x_dev, y_dev, timep, filelist_dev, x_ch, filelist_ch = bbdc.loading_block2(pipe_param)

2it [00:00, 19.76it/s]

Mel filter version loading block.
Starting transformation from wav files to csv files (mel).
Processing dev files:


10000it [08:58, 18.56it/s]
2it [00:00, 15.15it/s]

Processing eval files:


3000it [02:36, 19.11it/s]
0it [00:00, ?it/s]

Loading dev set:


30044it [01:40, 300.34it/s]
6it [00:00, 56.21it/s]

Loading eval set:


3000it [00:30, 99.09it/s] 


Scaling files.
standard scaling chosen.
Saving to numpy arrays.


In [None]:
# see colab notebook unet for 2dunet calculations