In [83]:
import os 
import pandas as pd
import numpy as np

import IPython.display as ipd
import warnings
import librosa
import librosa.display
import librosa.feature
import matplotlib.pyplot as plt
from tqdm import tqdm
import utils

import processing
import event_manager
import model_training
import epoching

import pickle
warnings.filterwarnings('ignore')

In [84]:
def select_nsample_normal(df_label,max_sample=50):
    if len(df_label.label.unique()) > 1:
        max_sample = df_label.label[df_label.label!=0].value_counts().max()
        df_label_0 = df_label[df_label.label==0].iloc[:max_sample]
        max_sample=df_label.label[df_label.label!=0].value_counts().max()
        df_label_o = df_label[df_label.label!=0]
        df_label_list = pd.concat([df_label_o,df_label_0],axis=0)
    else:
        df_label_list = df_label.iloc[:max_sample]
    df_label_list = df_label_list.reset_index(drop=True)
    return df_label_list

In [85]:
audio_folder = '../../data/ICBHI_final_database'
list_audio_files = processing.get_list_recording(audio_folder)
print('Found {} recording files'.format(len(list_audio_files)))

Found 920 recording files


In [87]:
epochs_list = []
n_fft=100
win_len=0.010
win_shift=0.005
mean=False
df_label_list=list()

for audio_file in tqdm(list_audio_files):
    data_file = processing.load_file_from_recording_name(audio_folder,audio_file)
    data_filtered,df_label = processing.preprocess_data(data_file,annotations='event')
    df_label = event_manager.fill_gap_event_frame(df_label,data_filtered)
    data_feature= processing.apply_wavelets(data_filtered)
    df_label = event_manager.segment_event_annotation(data_feature,win_len=win_len,win_shift=win_shift,threshold_class=[1,1,1])  
    print(df_label.label.value_counts())
    df_label = select_nsample_normal(df_label)  
    epochs_,_ = epoching.get_epoching_from_label(data_feature,df_label,mean=mean)
    epochs_list.append(epochs_)
    df_label['file'] = audio_file
    df_label_list.append(df_label)

  0%|          | 0/920 [00:00<?, ?it/s]

0    3498
1     476
Name: label, dtype: int64


  0%|          | 2/920 [00:03<21:58,  1.44s/it]

0    2914
Name: label, dtype: int64


  0%|          | 3/920 [00:04<22:44,  1.49s/it]

0    3998
Name: label, dtype: int64


  0%|          | 4/920 [00:06<26:01,  1.70s/it]

0    3982
2      10
Name: label, dtype: int64


  1%|          | 5/920 [00:08<26:29,  1.74s/it]

0    3758
2     108
Name: label, dtype: int64


  1%|          | 6/920 [00:10<26:01,  1.71s/it]

0    3877
2      44
Name: label, dtype: int64
1    2351
0    1628
Name: label, dtype: int64


  1%|          | 7/920 [00:12<30:18,  1.99s/it]

0    9761
2    4020
1    2651
Name: label, dtype: int64


  1%|          | 9/920 [00:35<1:31:51,  6.05s/it]

0    3672
1     198
Name: label, dtype: int64


  1%|          | 10/920 [00:36<1:11:08,  4.69s/it]

0    3861
1      54
2      36
Name: label, dtype: int64
0    3225
1     557
2      76
Name: label, dtype: int64


  1%|▏         | 12/920 [00:40<49:21,  3.26s/it]  

0    3932
1      58
2       1
Name: label, dtype: int64


  1%|▏         | 13/920 [00:42<41:27,  2.74s/it]

0    3998
Name: label, dtype: int64
0    3125
1     826
Name: label, dtype: int64


  2%|▏         | 14/920 [00:44<38:05,  2.52s/it]

0    2433
1    1489
2      18
Name: label, dtype: int64


  2%|▏         | 16/920 [00:57<1:13:48,  4.90s/it]

0    13484
2       76
Name: label, dtype: int64


  2%|▏         | 17/920 [00:59<59:02,  3.92s/it]  

0    3815
1     169
2       3
Name: label, dtype: int64


  2%|▏         | 18/920 [01:00<48:36,  3.23s/it]

0    3964
1      30
Name: label, dtype: int64


  2%|▏         | 19/920 [01:02<41:17,  2.75s/it]

0    3998
Name: label, dtype: int64


  2%|▏         | 20/920 [01:03<35:57,  2.40s/it]

0    3998
Name: label, dtype: int64
0    9849
2    2724
Name: label, dtype: int64


  2%|▏         | 22/920 [01:20<1:10:56,  4.74s/it]

0    3757
1     223
2       2
Name: label, dtype: int64


  2%|▎         | 23/920 [01:21<57:08,  3.82s/it]  

0    3788
1     196
Name: label, dtype: int64


  3%|▎         | 24/920 [01:23<47:10,  3.16s/it]

0    3906
2      42
Name: label, dtype: int64
0    3185
1     558
2      86
Name: label, dtype: int64


  3%|▎         | 25/920 [01:25<43:00,  2.88s/it]

0    2695
1    1181
2      43
Name: label, dtype: int64


  3%|▎         | 27/920 [01:29<37:14,  2.50s/it]

0    3999
Name: label, dtype: int64


  3%|▎         | 28/920 [01:31<33:00,  2.22s/it]

0    3998
Name: label, dtype: int64


  3%|▎         | 29/920 [01:33<30:14,  2.04s/it]

0    3960
2      19
Name: label, dtype: int64
0    2800
1    1178
Name: label, dtype: int64


  3%|▎         | 31/920 [01:37<30:30,  2.06s/it]

0    3717
2     193
Name: label, dtype: int64


  3%|▎         | 32/920 [01:38<28:17,  1.91s/it]

0    3998
Name: label, dtype: int64


  4%|▎         | 33/920 [01:40<27:34,  1.87s/it]

0    3605
1     362
2       5
Name: label, dtype: int64


  4%|▎         | 34/920 [01:42<26:27,  1.79s/it]

0    3890
1      39
2      23
Name: label, dtype: int64
1    2467
0    1514
Name: label, dtype: int64


  4%|▍         | 36/920 [01:46<27:46,  1.88s/it]

0    3998
Name: label, dtype: int64
0    3223
1     682
2      44
Name: label, dtype: int64


  4%|▍         | 38/920 [01:50<27:44,  1.89s/it]

0    3958
Name: label, dtype: int64


  4%|▍         | 39/920 [01:51<26:28,  1.80s/it]

0    3993
2       2
Name: label, dtype: int64


  4%|▍         | 40/920 [01:53<25:28,  1.74s/it]

0    3998
Name: label, dtype: int64


  4%|▍         | 41/920 [01:54<22:29,  1.54s/it]

0    3144
1      87
Name: label, dtype: int64


  5%|▍         | 42/920 [01:55<21:58,  1.50s/it]

0    3989
Name: label, dtype: int64
0    4925
2    1674
Name: label, dtype: int64


  5%|▍         | 44/920 [02:02<32:17,  2.21s/it]

0    3904
2      45
Name: label, dtype: int64


  5%|▍         | 45/920 [02:04<30:15,  2.07s/it]

0    3445
2     314
1      37
Name: label, dtype: int64


  5%|▌         | 46/920 [02:05<28:02,  1.92s/it]

0    3998
Name: label, dtype: int64


  5%|▌         | 47/920 [02:07<26:23,  1.81s/it]

0    3998
Name: label, dtype: int64
0    3310
1     489
2      88
Name: label, dtype: int64


  5%|▌         | 49/920 [02:11<27:21,  1.88s/it]

0    3765
2     119
Name: label, dtype: int64


  5%|▌         | 50/920 [02:12<26:07,  1.80s/it]

0    3998
Name: label, dtype: int64


  6%|▌         | 51/920 [02:14<25:00,  1.73s/it]

0    3998
Name: label, dtype: int64


  6%|▌         | 52/920 [02:15<24:43,  1.71s/it]

0    3760
2     117
Name: label, dtype: int64
0    2957
1     538
2     133
Name: label, dtype: int64


  6%|▌         | 54/920 [02:19<25:42,  1.78s/it]

0    3873
2      68
Name: label, dtype: int64


  6%|▌         | 55/920 [02:21<25:27,  1.77s/it]

0    3887
1      95
2       4
Name: label, dtype: int64


  6%|▌         | 56/920 [02:23<24:56,  1.73s/it]

0    3853
1     137
Name: label, dtype: int64


  6%|▌         | 57/920 [02:24<24:35,  1.71s/it]

0    3785
1     198
Name: label, dtype: int64


  6%|▋         | 58/920 [02:26<23:45,  1.65s/it]

0    3998
Name: label, dtype: int64


  6%|▋         | 59/920 [02:27<23:12,  1.62s/it]

0    3998
Name: label, dtype: int64


  7%|▋         | 60/920 [02:29<24:38,  1.72s/it]

0    3860
1      44
2      34
Name: label, dtype: int64


  7%|▋         | 61/920 [02:32<27:14,  1.90s/it]

0    5163
1     205
Name: label, dtype: int64


  7%|▋         | 62/920 [02:33<26:06,  1.83s/it]

0    3919
2      37
Name: label, dtype: int64


  7%|▋         | 63/920 [02:35<25:00,  1.75s/it]

0    3998
Name: label, dtype: int64


  7%|▋         | 64/920 [02:36<24:28,  1.72s/it]

0    3768
1     117
2      51
Name: label, dtype: int64


  7%|▋         | 65/920 [02:39<26:48,  1.88s/it]

0    3517
2     269
Name: label, dtype: int64


  7%|▋         | 66/920 [02:40<26:02,  1.83s/it]

0    3906
2      46
Name: label, dtype: int64


  7%|▋         | 67/920 [02:42<25:21,  1.78s/it]

0    3793
2     122
Name: label, dtype: int64


  7%|▋         | 68/920 [02:44<24:28,  1.72s/it]

0    3965
2      17
Name: label, dtype: int64


  8%|▊         | 69/920 [02:45<24:06,  1.70s/it]

0    3842
2      79
Name: label, dtype: int64
0    2548
1    1066
2     224
Name: label, dtype: int64


  8%|▊         | 70/920 [02:47<26:15,  1.85s/it]

0    2175
1    1755
Name: label, dtype: int64


  8%|▊         | 72/920 [02:52<28:33,  2.02s/it]

0    3998
Name: label, dtype: int64


  8%|▊         | 73/920 [02:54<26:48,  1.90s/it]

0    3819
1      65
2      37
Name: label, dtype: int64


  8%|▊         | 74/920 [02:55<25:22,  1.80s/it]

0    3959
1      35
Name: label, dtype: int64


  8%|▊         | 75/920 [02:57<24:28,  1.74s/it]

0    3992
2       3
Name: label, dtype: int64


  8%|▊         | 76/920 [02:58<23:34,  1.68s/it]

0    3998
Name: label, dtype: int64
0    2746
1     987
2     124
Name: label, dtype: int64


  8%|▊         | 77/920 [03:01<26:47,  1.91s/it]

0    3297
1     683
Name: label, dtype: int64


  9%|▊         | 79/920 [03:04<25:39,  1.83s/it]

0    3998
Name: label, dtype: int64
1    1986
0    1804
2      67
Name: label, dtype: int64


  9%|▊         | 80/920 [03:07<28:37,  2.05s/it]

0    2956
1     913
2      48
Name: label, dtype: int64


  9%|▉         | 82/920 [03:11<27:34,  1.97s/it]

0    3642
1     281
2      28
Name: label, dtype: int64


  9%|▉         | 83/920 [03:13<27:32,  1.97s/it]

0    3933
2      44
Name: label, dtype: int64


  9%|▉         | 84/920 [03:14<26:18,  1.89s/it]

0    3979
2       7
Name: label, dtype: int64


  9%|▉         | 85/920 [03:16<25:40,  1.84s/it]

0    3828
2      93
Name: label, dtype: int64


  9%|▉         | 86/920 [03:18<24:24,  1.76s/it]

0    3998
Name: label, dtype: int64


  9%|▉         | 87/920 [03:19<23:33,  1.70s/it]

0    3998
Name: label, dtype: int64


 10%|▉         | 88/920 [03:21<23:00,  1.66s/it]

0    3998
Name: label, dtype: int64


 10%|▉         | 89/920 [03:23<24:40,  1.78s/it]

0    3928
2      48
Name: label, dtype: int64


 10%|▉         | 90/920 [03:25<24:22,  1.76s/it]

0    3941
2      26
Name: label, dtype: int64


 10%|▉         | 91/920 [03:26<24:13,  1.75s/it]

0    3715
1     233
2       7
Name: label, dtype: int64


 10%|█         | 92/920 [03:28<23:45,  1.72s/it]

0    3884
1      41
2      30
Name: label, dtype: int64


 10%|█         | 93/920 [03:30<23:39,  1.72s/it]

0    3738
1     246
Name: label, dtype: int64


 10%|█         | 94/920 [03:32<24:17,  1.76s/it]

0    3969
1      26
Name: label, dtype: int64
0    2567
1    1960
2    1056
Name: label, dtype: int64


 10%|█         | 95/920 [03:35<32:19,  2.35s/it]

0    3231
1     637
2      35
Name: label, dtype: int64


 11%|█         | 97/920 [03:39<28:51,  2.10s/it]

0    3781
2     128
Name: label, dtype: int64
0    12003
1      483
Name: label, dtype: int64


 11%|█         | 99/920 [03:50<46:10,  3.37s/it]

0    3998
Name: label, dtype: int64


 11%|█         | 100/920 [03:51<38:51,  2.84s/it]

0    3896
2      35
1       9
Name: label, dtype: int64
0    4415
1     642
Name: label, dtype: int64


 11%|█         | 101/920 [03:54<36:50,  2.70s/it]

0    3538
1     442
Name: label, dtype: int64


 11%|█         | 103/920 [03:57<31:29,  2.31s/it]

0    3766
1     183
2      17
Name: label, dtype: int64
0    3403
1     581
Name: label, dtype: int64


 11%|█▏        | 105/920 [04:01<28:11,  2.08s/it]

0    3563
1     364
2      22
Name: label, dtype: int64


 12%|█▏        | 106/920 [04:03<26:39,  1.96s/it]

0    3726
1     122
2      83
Name: label, dtype: int64


 12%|█▏        | 107/920 [04:04<25:15,  1.86s/it]

0    3933
1      24
2      23
Name: label, dtype: int64


 12%|█▏        | 108/920 [04:06<25:15,  1.87s/it]

0    3998
Name: label, dtype: int64


 12%|█▏        | 109/920 [04:08<23:59,  1.78s/it]

0    3998
Name: label, dtype: int64


 12%|█▏        | 110/920 [04:10<23:56,  1.77s/it]

0    3578
1     276
2      60
Name: label, dtype: int64


 12%|█▏        | 111/920 [04:11<23:49,  1.77s/it]

0    3757
1     230
Name: label, dtype: int64


 12%|█▏        | 112/920 [04:13<23:26,  1.74s/it]

0    3741
2     120
Name: label, dtype: int64
0    3226
1     743
Name: label, dtype: int64


 12%|█▏        | 114/920 [04:17<24:54,  1.85s/it]

0    3998
Name: label, dtype: int64


 12%|█▎        | 115/920 [04:19<23:59,  1.79s/it]

0    3985
2       5
Name: label, dtype: int64


 13%|█▎        | 116/920 [04:20<23:02,  1.72s/it]

0    3998
Name: label, dtype: int64


 13%|█▎        | 117/920 [04:22<22:42,  1.70s/it]

0    3939
2      29
Name: label, dtype: int64


 13%|█▎        | 118/920 [04:23<22:01,  1.65s/it]

0    3988
2       6
Name: label, dtype: int64


 13%|█▎        | 119/920 [04:25<21:40,  1.62s/it]

0    3975
2      12
Name: label, dtype: int64


 13%|█▎        | 120/920 [04:27<22:49,  1.71s/it]

0    3962
2      19
Name: label, dtype: int64


 13%|█▎        | 121/920 [04:29<23:08,  1.74s/it]

0    3526
2     257
Name: label, dtype: int64


 13%|█▎        | 122/920 [04:30<22:52,  1.72s/it]

0    3855
2      54
Name: label, dtype: int64


 13%|█▎        | 123/920 [04:32<22:12,  1.67s/it]

0    3998
Name: label, dtype: int64


 13%|█▎        | 124/920 [04:33<21:40,  1.63s/it]

0    3998
Name: label, dtype: int64


 14%|█▎        | 125/920 [04:35<21:25,  1.62s/it]

0    3998
Name: label, dtype: int64


 14%|█▎        | 126/920 [04:37<22:44,  1.72s/it]

0    3998
Name: label, dtype: int64


 14%|█▍        | 127/920 [04:39<23:03,  1.74s/it]

0    3694
1     161
2      62
Name: label, dtype: int64
0    2549
1    1367
Name: label, dtype: int64


 14%|█▍        | 129/920 [04:43<24:41,  1.87s/it]

0    3805
2      83
Name: label, dtype: int64


 14%|█▍        | 130/920 [04:45<23:35,  1.79s/it]

0    3998
Name: label, dtype: int64


 14%|█▍        | 131/920 [04:46<23:00,  1.75s/it]

0    3942
2      27
Name: label, dtype: int64


 14%|█▍        | 132/920 [04:48<24:08,  1.84s/it]

0    3998
Name: label, dtype: int64


 14%|█▍        | 132/920 [04:49<28:47,  2.19s/it]


KeyboardInterrupt: 

In [None]:
if mean:
    X = model_training.prepare_data_for_modeling(epochs_list,df_label_list)
    print(X.shape)
else:
    epoch_final, df_all_label = model_training.prepare_data_time_for_modeling(epochs_list,df_label_list)
    print(epoch_final.shape)
    print(df_all_label.shape)

In [None]:
if mean: 
    print(X.label.value_counts())
else:
    print(df_all_label.label.value_counts())

In [None]:
if not mean: 
     X = {'feature': epoch_final,
     'label':df_all_label}

In [None]:
file = '../../data/data_model/data_sound_event_mel_norm_augmented_10ms'

if mean:
    X.to_feather(file + '.ftr')
    
else:
    with open(file + '.pickle', 'wb') as handle:
        pickle.dump(X, handle)