In [1]:
import os
import sys
path_to_this_notebook = os.path.abspath('.')
PATH_TO_PROJECT = path_to_this_notebook[: path_to_this_notebook.find('notebooks')]
sys.path.append(PATH_TO_PROJECT)

import numpy as np
import pandas as pd
import os
import pickle
import matplotlib.pyplot as plt

from src.deep_learning.soundtype_classifier.classifier_datasets import create_data_for_classifier
from src.metadata_processing.load_data import load_recordings_metadata, load_sounds
from collections import defaultdict

%load_ext autoreload
%autoreload 2
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
path_to_recordings_metadata = PATH_TO_PROJECT + 'data/'
recordings_metadata_name = 'recordings_metadata.csv'

# What data to use
dates = 'all'
colonies = ['stark', 'lannister', 'martell', 'targaryen', 'baratheon', 'tyrell', 'arryn']
# colonies = ['hvdkleinannotated']
experiments = 'all'
stages = ['traced and checked', 'labeled and checked', 'split and checked',]
classes_to_drop = ['weirdo', 'badsplit', 'noise', 'sound']
max_sounds_per_class = {'softchirp': 4000}
max_sound_length = 12000
min_sound_length = 1337
min_sound_occurrences = 30
# Denoising parameters
use_rec_denoising = False
rec_denoising_params = {'stationary': False, 'n_fft': 512, 'time_constant_s': 0.05, 
                        'freq_mask_smooth_hz': 500, 'time_mask_smooth_ms': 10}
# Sounds metadata parameters
classes_balance_factor = 5/7
train_ratio = .7
columns_to_copy = ['colony', 'ratids', 'date', 'experiment']
# Processing features
frequency_threshold = 3000
sr = 22050
n_fft = 512
n_mel = 80
t_dim = 64
target_shape = (n_mel, t_dim)
use_melspecs = True
# Data augmentation parameters
wave_augment_params = {'augment': False, 
                       'times_each_sound': 4, 
                       'stretching_lims': None,
                       'add_noise_lims': (0.001, 0.005), }
times_augment_im = 5
#times_augment_im = 10
augment_im = True
# Where to store
path_to_save = PATH_TO_PROJECT + 'models/classifier/datasets/'
augment_str = 'no-augment' if (times_augment_im == 1 or not augment_im) else 'im-augment=%d' % times_augment_im
dataset_name = 'berlin_%s_nfft=%d_nmel=%d_tdim=%d_rec-denoise=%s_highpass=%s' % (augment_str, n_fft, 
                                                                                 n_mel, t_dim, use_rec_denoising, 
                                                                                 frequency_threshold)
random_seed = 42
print(dataset_name)

berlin_im-augment=5_nfft=512_nmel=80_tdim=64_rec-denoise=False_highpass=3000


In [3]:
recs_metadata, recs_dict = load_recordings_metadata(path_to_recordings_metadata, recordings_metadata_name,
                                                    dates, colonies, experiments, stages, {})

Using 944 recordings


In [4]:
# Create config dictionary
all_params_dict = {'sr': sr,
                   
                   'recs_metadata': 
                       {'dates': dates, 'colonies': colonies, 'experiments': experiments, 'stages': stages,
                        'path_to_recordings_metadata': path_to_recordings_metadata, 
                        'recordings_metadata_name': recordings_metadata_name},
                   
                   'rec_denoising':
                       {'use_rec_denoising': use_rec_denoising, 'rec_denoising_params': rec_denoising_params},
                   
                   'sounds_metadata':
                       {'classes_to_drop': classes_to_drop, 'max_sounds_per_class': max_sounds_per_class, 
                        'max_sound_length': max_sound_length, 'min_sound_length': min_sound_length, 
                        'min_sound_occurrences': min_sound_occurrences,
                        'classes_balance_factor': classes_balance_factor, 'train_ratio': train_ratio,
                        'columns_to_copy': columns_to_copy, 'random_seed': random_seed},
                   
                   'features': 
                       {'target_shape': target_shape, 'frequency_threshold': frequency_threshold,
                        'n_fft': n_fft, 'n_mel': n_mel, 'wave_augment_params': wave_augment_params, 
                        'use_melspecs': use_melspecs,  'times_augment_im': times_augment_im, 
                        'augment_im': augment_im}
                  }

In [6]:
# Create and save datasets
(all_classes, sounds_metadata_train, sounds_npy_train, melspecs_train,  sounds_metadata_test, 
 sounds_npy_test,  melspecs_test) = create_data_for_classifier(all_params_dict, dataset_name, 
                                                               path_to_save, save=False, 
                                                               preloaded_recs_dict=recs_dict,
                                                               balance_train=False)

Using 944 recordings


  sounds_metadata = pd.concat(sounds_metadata, 0).reset_index(drop=True)


Using 484 recordings (5005 sounds)for train and  208 recordings (2091 sounds)for test

############ Classes in train:############ 

softchirp      2819
downsweep       543
pupcombo        444
whistle         216
combotwo        180
combo           168
scream          124
pupcheveron     122
loudchirp       118
grunt           101
upsweep          84
pupsquawk        66
hiccup           20
Name: cl, dtype: int64
############ Classes in test: ############ 

softchirp      1177
pupcombo        248
downsweep       202
whistle          99
combotwo         65
combo            59
grunt            56
pupcheveron      56
scream           46
pupsquawk        35
upsweep          27
loudchirp        18
hiccup            3
Name: cl, dtype: int64
After balancing:
############ Classes in train:############ 

softchirp      2819
downsweep       543
pupcombo        444
whistle         216
combotwo        180
combo           168
scream          124
pupcheveron     122
loudchirp       118
grunt          

  for sound in sounds_npy_train])
  for sound in sounds_npy_test])


KeyError: 'extract_specs_params'

In [8]:
2819 + 543+ 444 + 216 + 180 + 168 + 124 + 122 + 118 + 101 + 84 + 66 + 20

5005

In [6]:
# Print bincounts
img_shape = melspecs_train.shape[1:]
y_train_str = sounds_metadata_train['cl']
y_test_str = sounds_metadata_test['cl']
all_classes_str = sorted(list(set(y_train_str.unique()) | set(y_test_str.unique())))
y_train = y_train_str.apply(lambda x: all_classes_str.index(x))
y_test = y_test_str.apply(lambda x: all_classes_str.index(x))
print(np.bincount(y_train))
print(np.bincount(y_test))

[1595 4225 2295 1680 1575]
[  8 192 217  43   7]
