In [3]:
import os
import sys
path_to_this_notebook = os.path.abspath('.')
PATH_TO_PROJECT = path_to_this_notebook[: path_to_this_notebook.find('notebooks')]
sys.path.append(PATH_TO_PROJECT)

import numpy as np
import pandas as pd
import os
import pickle
import matplotlib.pyplot as plt

from src.deep_learning.soundtype_classifier.classifier_datasets import create_data_for_classifier
from src.metadata_processing.load_data import load_recordings_metadata, load_sounds
from collections import defaultdict

%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
path_to_recordings_metadata = PATH_TO_PROJECT + 'data/'
recordings_metadata_name = 'recordings_metadata.csv'

# What data to use
dates = 'all'
colonies = ['stark', 'lannister', 'martell', 'targaryen', 'baratheon', 'tyrell', 'arryn']
# colonies = ['hvdkleinannotated']
experiments = 'all'
stages = ['traced and checked', 'labeled and checked', 'split and checked',]
classes_to_drop = ['weirdo', 'badsplit', 'noise', 'sound']
max_sounds_per_class = {'softchirp': 4000}
max_sound_length = 12000
min_sound_length = 1337
min_sound_occurrences = 30

# Denoising parameters
use_rec_denoising = False
rec_denoising_params = {'stationary': False, 'n_fft': 512, 'time_constant_s': 0.05, 
                        'freq_mask_smooth_hz': 500, 'time_mask_smooth_ms': 10}

# Sounds metadata parameters
classes_balance_factor = 5/7
train_ratio = .7
columns_to_copy = ['colony', 'ratids', 'date', 'experiment']

# Processing features
extract_specs_params = {'num_freq_bins': 128,
                        'num_time_bins': 64,
                        'nperseg': 512, 
                        'noverlap': None, 
                        'min_freq': 3000,
                        'max_freq': 11025,
                        'spec_min_val': -10, 
                        'spec_max_val': 0,
                        'remove_dc_offset': True, 
                        'mel': False, 
                        'max_dur': 0.4, 
                        'time_stretch': True}
sr = 22050
highpass_filtering = 3000
specs_type = 'new-specs' # 'mel' , 'specs', 'new-specs'
target_shape = (extract_specs_params['num_freq_bins'], extract_specs_params['num_time_bins'])

# Data augmentation parameters
wave_augment_params = {'augment': False, 
                       'times_each_sound': 1, 
                       'stretching_lims': None,
                       'add_noise_lims': (0.001, 0.005), }
times_augment_im = 5
augment_im = True

# Where to store
path_to_save = PATH_TO_PROJECT + 'models/classifier/datasets/'

data_str = 'berlin'
augment_str = 'no-augment' if (times_augment_im == 1 or not augment_im) else 'im-augment=%d' % times_augment_im

dataset_name = '%s_%s_%s_nfft=%d_shape=%s_denoise=%s_hpass=%s' % (data_str, specs_type, augment_str,
                                                                  extract_specs_params['nperseg'],
                                                                  target_shape,  use_rec_denoising, 
                                                                  highpass_filtering)
random_seed = 42
print(dataset_name)

berlin_new-specs_im-augment=5_nfft=512_shape=(128, 64)_denoise=False_hpass=3000


In [11]:
recs_metadata, recs_dict = load_recordings_metadata(path_to_recordings_metadata, recordings_metadata_name,
                                                    dates, colonies, experiments, stages, {})

Using 904 recordings


In [14]:
# Create config dictionary
all_params_dict = {'sr': sr,
                   
                   'recs_metadata': 
                       {'dates': dates, 'colonies': colonies, 'experiments': experiments, 'stages': stages,
                        'path_to_recordings_metadata': path_to_recordings_metadata, 
                        'recordings_metadata_name': recordings_metadata_name},
                   
                   'rec_denoising':
                       {'use_rec_denoising': use_rec_denoising, 'rec_denoising_params': rec_denoising_params},
                   
                   'sounds_metadata':
                       {'classes_to_drop': classes_to_drop, 'max_sounds_per_class': max_sounds_per_class, 
                        'max_sound_length': max_sound_length, 'min_sound_length': min_sound_length, 
                        'min_sound_occurrences': min_sound_occurrences,
                        'classes_balance_factor': classes_balance_factor, 'train_ratio': train_ratio,
                        'columns_to_copy': columns_to_copy, 'random_seed': random_seed},
                   
                   'features': 
                       {'target_shape': target_shape, 'highpass_filtering': highpass_filtering,
                        'wave_augment_params': wave_augment_params,  
                        'extract_specs_params': extract_specs_params, 'specs_type': specs_type, 
                        'times_augment_im': times_augment_im, 'augment_im': augment_im}
                  }

In [16]:
# Create and save datasets
(all_classes, sounds_metadata_train, sounds_npy_train, melspecs_train,  sounds_metadata_test, 
 sounds_npy_test,  melspecs_test) = create_data_for_classifier(all_params_dict, dataset_name, 
                                                               path_to_save, save=True, 
                                                               preloaded_recs_dict=recs_dict,
                                                               balance_train=True)

Using 904 recordings


  sounds_metadata = pd.concat(sounds_metadata, 0).reset_index(drop=True)
  return pd.concat(sounds_metadata_balanced, 0)


Using 464 recordings (5275 sounds)for train and  200 recordings (1824 sounds)for test

############ Classes in train:############ 

softchirp      2894
downsweep       553
pupcombo        477
whistle         256
combotwo        219
combo           189
scream          170
grunt           120
loudchirp       117
pupcheveron     108
upsweep          92
pupsquawk        63
hiccup           17
Name: cl, dtype: int64
############ Classes in test: ############ 

softchirp      1105
pupcombo        215
downsweep       192
pupcheveron      70
whistle          59
combo            38
pupsquawk        38
grunt            37
combotwo         26
upsweep          19
loudchirp        19
hiccup            6
Name: cl, dtype: int64
After balancing:
############ Classes in train:############ 

softchirp      2894
downsweep      1659
pupcombo       1431
combo          1323
combotwo       1314
whistle        1280
scream         1190
grunt          1080
pupcheveron    1080
loudchirp      1053
upsweep        

  for sound in sounds_npy_train])
  for sound in sounds_npy_test])
  for _ in range(times_augment_im):
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Saved !!! 


In [17]:
# Print bincounts
img_shape = melspecs_train.shape[1:]
y_train_str = sounds_metadata_train['cl']
y_test_str = sounds_metadata_test['cl']
all_classes_str = sorted(list(set(y_train_str.unique()) | set(y_test_str.unique())))
y_train = y_train_str.apply(lambda x: all_classes_str.index(x))
y_test = y_test_str.apply(lambda x: all_classes_str.index(x))
print(np.bincount(y_train))
print(np.bincount(y_test))

[ 6615  6570  8295  5400  3315  5265  5400  7155  4725  5950 14470  5060
  6400]
[  38   26  192   37    6   19   70  215   38    0 1105   19   59]
