From https://medium.com/m2mtechconnect/classifying-music-and-speech-with-machine-learning-e036ffab002e

In [2]:
import numpy as np
import pandas as pd
import os
import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
from IPython import display
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers, regularizers, models
from sklearn.model_selection import train_test_split
#import tensorflow_datasets as tfds
from datetime import datetime

In [3]:
import time

In [4]:
import ffmpeg
from pydub import AudioSegment

In [5]:
from tf_helpers import *

In [6]:
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score, auc
import plotly.figure_factory as ff
from sklearn.preprocessing import OneHotEncoder

In [7]:
from data_set_params import DataSetParams
from scipy.io import wavfile
params = DataSetParams()

In [8]:
#data_dir = pathlib.Path('/root/tensorflow_datasets/downloads/extracted/TAR_GZ.opihi.cs.uvic.ca_sound_music_speechbya81rFcWfLSW6ey5cynqyeq2qiePcL-7asMoNO6IQ0.tar.gz/music_speech')
data_dir   = 'richfield_birds_split'#'dublin_dl_birds_split'#
categories = np.array(tf.io.gfile.listdir(data_dir))
categories = [category for category in categories if 'wav' not in category]
categories

['Common Buzzard',
 'Common Kestrel',
 'Common Snipe',
 'Eurasian Curlew',
 'European Herring Gull',
 'European Robin',
 'Meadow Pipit',
 'Mute Swan',
 'Northern Lapwing',
 'Rook',
 'Tundra Swan',
 'Tundra Swan (Bewicks)']

In [9]:
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = [filename for filename in filenames if 'wav' in filename]
#filenames = tf.random.shuffle(filenames)
#filenames

In [10]:
num_samples = len(filenames)
print('Number of total examples:', num_samples)
print('Number of categories', len(categories))
#print('Number of examples per label:',
#      len(tf.io.gfile.listdir(str(pathlib.Path(data_dir)/categories[0]))))
print('Example file tensor:', filenames[0])

Number of total examples: 2840
Number of categories 12
Example file tensor: richfield_birds_split\Common Snipe\xc618120_left_start_0_0.wav


In [11]:
AUTOTUNE   = tf.data.experimental.AUTOTUNE
TRAIN_SIZE = 0.8
cutoff     = int(len(filenames) * TRAIN_SIZE)

In [12]:
choice_list = []
choices_lst = ['Mod', 'AbsRe', 'AbsIm', 'AbsAng']
import itertools
for L in range(1,len(choices_lst)+1):
    for subset in itertools.combinations(choices_lst, L):
        choice_list.append(list(subset))

In [13]:
#choice_list = choice_list[:4]

In [14]:
#choice_list = choice_list[4:]

In [15]:
choice_list = [x for x in choice_list if 'Mod' in x]

In [16]:
def find_sub_list(sl,l):
    results=[]
    sll=len(sl)
    for i,e in enumerate(l):
        for j in range(sll):
            if sl[j] == l[i]:
                results.append(i)

    return results

## Outline of script needed:
for i in 1,...,20 iterations
-  shuffles files
-  seperates into train/test
-  For different combinations of Mod (default), AbsRe, AbsIm, AbsArg
    -  create spectrogram features (X) and labels (y)
    -  create training and test tensorflow datasets and batch
    -  train from scratch for 5,10,25,50 epochs
    -  record history(loss, accuracy), yTrue and yPred,  
    -  save confusion matrices
-  Also try 
    - work on making the convolution kernel different for each channel, and for the output to be the mean 
    - freeze layers (all but first or last) to fine tune training
    - Transfer learning from BirdNet


In [17]:
def main_model_run(filenames, index):
    filenames   = tf.random.shuffle(filenames)
    
    all_labs = [get_label(y).numpy().decode() for y in filenames]
    filename_df = pd.DataFrame({'name': filenames.numpy(),
                                'label': all_labs})
    
    train, test = train_test_split(filename_df, test_size=0.2, stratify=filename_df[['label']])
    train_files = tf.random.shuffle(train['name'])
    test_files  = tf.random.shuffle(test['name'])
    #train_files = filenames[:cutoff]
    #test_files  = filenames[cutoff:]
    
    choices_all = ['Mod', 'AbsRe', 'AbsIm', 'AbsAng']

    def concat_xy(ds):
            x_tmp  = [x for x,y in ds]
            x_tmp  = tf.stack(x_tmp)
            xs_tmp = tf.unstack(x_tmp, axis=-1)
            xs_tmp = [tf.expand_dims(x_ind, axis=-1) for x_ind in xs_tmp]
            y      = np.array([y for x,y in ds])
            return xs_tmp, y
    
    train_ds = preprocess_dataset(train_files, choices_all, categories)
    test_ds  = preprocess_dataset(test_files, choices_all, categories)
    
    print('Getting data')
    X_train_all, y_train = concat_xy(train_ds)
    X_test_all, y_test   = concat_xy(test_ds)
    print("Done")
    
    filename_idx = datetime.now().strftime("%Y%m%d-%H%M%S").replace('-', '_')+'_'+data_dir+'_concat'+str(index)
    
    np.save('filenames/'+'filenames'+filename_idx+'.npy', filenames.numpy())
    print("index: ", index)
    for choices in choice_list:
        print('Choices: ', ''.join(choices))
        
        sub_idx  = find_sub_list(choices, choices_all)
        X_train = [X_train_all[idx] for idx in sub_idx]
        X_test  = [X_test_all[idx] for idx in sub_idx]
        #y_train = [y_train_all[idx] for idx in sub_idx]
        #y_test  = [y_test_all[idx] for idx in sub_idx]
        
        input_shape = X_train[0].shape[1:]
            
        num_channels = len(X_train)
        num_classes  = len(categories)
        
        filename_run = filename_idx + ''.join(choices)+'_'
        #print(not os.path.isdir('checkpoints/'+filename_run))
        #if not os.path.isdir('checkpoints/'+filename_run):
        #    os.mkdir('checkpoints/'+filename_run)
        
        #checkpoint_path = 'checkpoints/'+filename_run+"/cp-{epoch:04d}.ckpt"
        #checkpoint_dir = os.path.dirname(checkpoint_path)
        
        #cp_callback = tf.keras.callbacks.ModelCheckpoint(
        #    filepath= checkpoint_path, 
        #    verbose = 1, 
        #    save_weights_only = True,
        #    save_freq = 5*batch_size)
        
        #model   = main_cnn(input_shape, num_classes)
        model = concat_model(input_shape, num_channels, num_classes)
        
        #model.save_weights(checkpoint_path.format(epoch=0))
        
        EPOCHS  = 10
        history = model.fit(X_train, y_train,
                            validation_data = (X_test, y_test),
                            #callbacks       = [cp_callback],
                            epochs          = EPOCHS,
                            batch_size      = 32)
        
        pd.DataFrame(history.history).to_csv('model_history/'+'model_history'+filename_run+'.csv')
        
        
        model.save('models/'+filename_run+'.h5')
        y_pred = np.argmax(model.predict(X_test), axis=-1)
        y_true = y_test
        
        np.save('y_pred/'+'y_pred'+filename_run+'.npy', y_pred)
        np.save('y_true/'+'y_true'+filename_run+'.npy', y_true)
        
        #cm = confusion_matrix(y_true, y_pred)
        
        #fig = plotly_cm(cm, categories)
        #fig.write_html('confusion_matrix/'+'confusion_matrix'+filename_run+'.html')
        
        #pred_lists = model.predict(test_ds)
        #pred_df    = pd.DataFrame(pred_lists, columns = categories)
        #pred_df_softmax = pred_df.apply(lambda x: np.exp(x - np.max(x))/np.exp(x - np.max(x)).sum(), axis=1)
        
        #pred_df_softmax.to_csv('pred_df_softmax/'+'pred_df_softmax'+filename_run+'.csv')

In [18]:
for i in range(9,10):
    print(i)

9


In [19]:
tic = time.time()

In [20]:
for i in range(6):
    main_model_run(filenames, i)
    print(time.time()-tic)

Getting data
Done
index:  0
Choices:  Mod
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsRe
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsIm
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsAng
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsReAbsIm
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsReAbsAng
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsImAbsAng
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsReAbsImAbsAng
Epoch 1/10
Epoch 2/

Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsReAbsIm
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsReAbsAng
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsImAbsAng
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsReAbsImAbsAng
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
6562.400977134705
Getting data
Done
index:  2
Choices:  Mod
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsRe
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsIm
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Choices:  ModAbsReAbsImAbsAng
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
9107.290979385376
Getting data
Done
index:  3
Choices:  Mod
Epoch 1/10

KeyboardInterrupt: 

In [None]:
toc = time.time()

In [None]:
print(toc-tic)

In [None]:
train_files = filenames[:cutoff]
test_files  = filenames[cutoff:]

choices_all = ['Mod', 'AbsRe', 'AbsIm', 'AbsAng']

def concat_xy(ds):
        x_tmp  = [x for x,y in ds]
        x_tmp  = tf.stack(x_tmp)
        xs_tmp = tf.unstack(x_tmp, axis=-1)
        xs_tmp = [tf.expand_dims(x_ind, axis=-1) for x_ind in xs_tmp]
        y      = np.array([y for x,y in ds])
        return xs_tmp, y

train_ds = preprocess_dataset(train_files, choices_all, categories)

In [None]:
#X_train_all, y_train_all = concat_xy(train_ds)

In [None]:

#y_train_all