In [1]:
import os
import sys
path_to_this_notebook = os.path.abspath('.')
PATH_TO_PROJECT = path_to_this_notebook[: path_to_this_notebook.find('notebooks')]
sys.path.append(PATH_TO_PROJECT)

import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from src.preprocessing.process_sounds_metadata import generate_sounds_metadata, make_fixed_size_sounds
from src.preprocessing.filters import filter_recs_metadata
from src.preprocessing.load_data import load_recs_dict, load_sounds
from src.deep_learning.create_model import create_conv_model
from src.data_representations.process_wavs import *
from src.data_representations.process_images import *
from src.deep_learning.splitting import *
%matplotlib inline


In [2]:
# path to metadata
path_to_recordings_metadata = PATH_TO_PROJECT + 'data_no-african/'
recordings_metadata_name = 'recordings_metadata.csv'

# what data to load
dates = 'all'
train_colonies = ['baratheon', 'stark', 'targaryen', 'dothrakia', 'freefolka']
test_colonies = ['martell']
experiments = 'all'
stages = ['traced and checked', 'labeled and checked']

#!
class_augment_dict = {'sound' : 3,  'noise' : 3}
sounds_max_length = 10000
sounds_min_length = 1337

In [4]:
# load metadata
recs_metadata = pd.read_csv(path_to_recordings_metadata + recordings_metadata_name)

mask_train = filter_recs_metadata(recs_metadata, dates, train_colonies, stages, experiments)
mask_test = filter_recs_metadata(recs_metadata, dates, test_colonies, stages, experiments)

recs_for_training = recs_metadata[mask_train]
recs_for_test = recs_metadata[mask_test]

recs_dict_train = load_recs_dict(recs_for_training)
recs_dict_test = load_recs_dict(recs_for_test)

sounds_metadata_train = generate_sounds_metadata(recs_for_training).reset_index(drop=True)
sounds_metadata_test = generate_sounds_metadata(recs_for_test).reset_index(drop=True)

print(sounds_metadata_train['cl'].value_counts())
print(sounds_metadata_test['cl'].value_counts())

softchirp          70832
noise              35399
weirdo              8895
loudchirp           2614
downsweep           1655
grunt                715
whistle              708
badsplit             483
combo                432
upsweep              373
combotwo             370
scream               201
mordent               63
tweet                 48
vtrill                33
phee                  22
invertedmordent       10
RFIDscanner            7
hiccup                 5
Name: cl, dtype: int64
softchirp    1674
noise         315
grunt          77
weirdo         46
whistle        25
upsweep        24
downsweep       4
Name: cl, dtype: int64


In [19]:
# drop some softchirp for the sake of class balancing
max_softchirps = 22000
softchirps_inds = np.where(sounds_metadata_train['cl'] == 'softchirp')[0]
np.random.shuffle(softchirps_inds)
sounds_metadata_train = sounds_metadata_train.drop(softchirps_inds[max_softchirps :]).reset_index(drop = True)
sounds_metadata_train['cl'] = sounds_metadata_train['cl'].apply(lambda x : 'noise' if x == 'noise' else 'sound')
sounds_metadata_test['cl'] = sounds_metadata_test['cl'].apply(lambda x : 'noise' if x == 'noise' else 'sound' )
                                                                
lengths = 22050 * (sounds_metadata_train['e'] - sounds_metadata_train['s'])
mask_normal_sounds = (lengths >= sounds_min_length) & (lengths <= sounds_max_length)
sounds_metadata_train = sounds_metadata_train[mask_normal_sounds].reset_index(drop = True)
print(sounds_metadata_train['cl'].value_counts())

sound    36000
noise    34429
Name: cl, dtype: int64


In [58]:
%%time
resolution = 1024 # ~0.05 sec
step = 512 # ~0.025 sec

def process(sounds_metadata, recs_dict, resolution=1024, step=512, n_fft=512, n_mel=80):
    sound_processing = {'stretching_lim_train' : None,
                        'stretching_lim_test' : None,
                        'noise_lim_train' : (0.001, 0.005),
                        'noise_lim_test' : None,
                        'filtering_th' : 3000
                       }

    sounds_metadata_ints_split = make_fixed_size_sounds(sounds_metadata, resolution, step)
    sounds_npy_split = load_sounds(sounds_metadata_ints_split, recs_dict, noisy_sampling=False, timestamps='int')
    sounds_npy_pr_iterator = process_waves(sounds_npy_split, sound_processing['stretching_lim_train'],
                                           sound_processing['noise_lim_train'], sound_processing['filtering_th'])
    melspecs_array = np.array(extract_melspecs(sounds_npy_pr_iterator, n_fft, n_mel))

    img_shape = melspecs_array[0].shape
    all_classes = ['noise', 'sound']
    y_train_num = np.array([all_classes.index(yi) for yi in sounds_metadata_ints_split['cl']])

    return melspecs_array, y_train_num

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 6.68 µs


In [None]:
melspecs_array_train, y_train_num = process(sounds_metadata_train, recs_dict_train, 
                                            resolution=1024, step=512, n_fft=512, n_mel=80)

In [14]:

def make_fixed_size_sounds(sounds_metadata, resolution=1024, step=512, sr=22050):
    """ Changes timesstamps s.t. sounds are all of the same size """
    sounds_metadata_split = []
    
    s_ints = sounds_metadata['s'].apply(lambda x : int(sr * x))
    e_ints = sounds_metadata['e'].apply(lambda x : int(sr * x))
    sizes = e_ints - s_ints
    
    s_col_ind = list(sounds_metadata.columns).index('s')
    e_col_ind = list(sounds_metadata.columns).index('e')
    
    for ind in range(len(sounds_metadata)):
        s_int, e_int, size = s_ints.iloc[ind], e_ints.iloc[ind], sizes.iloc[ind]
        parts_in_sound = int(size // resolution) 
        useless_space = size - parts_in_sound * resolution
        
        s_int_new = int(s_int + useless_space // 2)
        e_int_new = int(e_int - useless_space // 2)
        
        for s_p in range(s_int_new, e_int_new + 1 - resolution, step):
            e_p = s_p + step
            row = list(sounds_metadata.iloc[ind])
            row[s_col_ind] = s_p
            row[e_col_ind] = e_p
            sounds_metadata_split.append(tuple(row))
            
    return pd.DataFrame(sounds_metadata_split, columns = sounds_metadata.columns)