In [None]:
cd ../dataprocessing

In [None]:
# import audio_processing_test as apt
import audio_processing as ap

import functools
import os
import sys
from absl import logging

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.utils import np_utils
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [None]:
print(os.path.dirname(os.path.realpath('__file__')))

In [None]:
debug = False
logging.set_verbosity(logging.INFO)

In [None]:
src_dir = 'example_src_dir'
dest_dir = 'example_dest_dir'
# filenames should adhere to the following order
# [dataset, validation set, test set]
filenames = ['test_set']
labels = ['Gunshot, gunfire']
features_to_extract = ['mfcc']

In [None]:
def get_dataframes():
    length = len(filenames)
    if length == 3:
        dataset_df = ap.output_df(src_dir, dest_dir, filenames[0], labels, features_to_extract)
        evaluation_df = ap.output_df(src_dir, dest_dir, filenames[1], labels, features_to_extract)
        validation_df = ap.output_df(src_dir, dest_dir, filenames[2], labels, features_to_extract)
        dfs = [dataset_df, evaluation_df, validation_df]
    elif length == 2:
        dataset_df = ap.output_df(src_dir, dest_dir, filenames[0], labels, features_to_extract)
        evaluation_df = ap.output_df(src_dir, dest_dir, filenames[1], labels, features_to_extract)
        dfs = [dataset_df, evaluation_df]
    elif length == 1:
        dataset_df = ap.output_df(src_dir, dest_dir, filenames[0], labels, features_to_extract)
        dfs = [dataset_df]
    else:
        raise ValueError('You must have at least one dataset csv and testing data csv')
    return dfs

In [None]:
dfs = get_dataframes()

In [None]:
dataset_df = dfs[0]
dataset_df.head()

Convert features and classification labels into numpy arrays

In [None]:
X = np.array(dataset_df.mfcc.tolist(), dtype=object)
y = np.array(dataset_df.label.tolist())

In [None]:
def get_data_for_model(dfs, ratio):
    length = len(dfs)
    if length == 3:
        train_x = np.array(dfs[0].mfcc.tolist(), dtype=object)
        train_y = np.array(dfs[0].label.tolist())
        test_x = np.array(dfs[1].mfcc.tolist(), dtype=object)
        test_y = np.array(dfs[1].label.tolist())
        val_x = np.array(dfs[2].mfcc.tolist(), dtype=object)
        val_y = np.array(dfs[2].label.tolist())
        return train_x, train_y, val_x, val_y, test_x, test_y
    elif length == 2:
        train_x = np.array(dfs[0].mfcc.tolist(), dtype=object)
        train_y = np.array(dfs[0].label.tolist())
        test_x = np.array(dfs[1].mfcc.tolist(), dtype=object)
        test_y = np.array(dfs[1].label.tolist())
        return train_x, train_y, test_x, test_y
    elif length == 1:
        X = np.array(dataset_df.mfcc.tolist(), dtype=object)
        y = np.array(dataset_df.label.tolist())
        train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.20, random_state = 42)
        return train_x, train_y, test_x, test_y

# Split the dataset

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state = 42)

In [None]:
# trying to fix bug:
# ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).
# It worked!!!
from keras import backend as K
x_train = K.cast_to_floatx(x_train)
y_train = K.cast_to_floatx(y_train)
x_test = K.cast_to_floatx(x_test)
y_test = K.cast_to_floatx(y_test)

In [None]:
data = (x_train, x_test, y_train, y_test)

In [None]:
def model_config_train_1(data, activation, optimizer, metrics, epochs):
    x_train, x_test, y_train, y_test = data
    model = keras.Sequential([
        keras.Input(shape=(20,)),
        keras.layers.Dense(20, activation=activation),
        keras.layers.Dense(1)
    ])
    model.compile(optimizer=optimizer,
             loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
             metrics=metrics)
    history = model.fit(x_train, y_train,
                   epochs=epochs,
                    verbose=1)
    return history

In [None]:
def visualize_training(history, filename):
    history_dict = history.history
    history_dict.keys()
    acc = history_dict['accuracy']
#     val_acc = history_dict['val_accuracy']
    loss = history_dict['loss']
#     val_loss = history_dict['val_loss']

    epochs = range(1, len(acc) + 1)

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15,7))
    
    # "bo" is for "blue dot"
    ax1.plot(epochs, loss, 'bo', label='Training loss')
    # b is for "solid blue line"
#     ax1.plot(epochs, val_loss, 'b', label='Validation loss')
    ax1.set_title('Training and validation loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()
    
    ax2.plot(epochs, acc, 'bo', label='Training acc')
#     ax2.plot(epochs, val_acc, 'b', label='Validation acc')
    ax2.set_title('Training and validation accuracy')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy')
    ax2.legend(loc='lower right')
    
    plt.savefig(filename, bbox_inches='tight')
    plt.show()


In [None]:
path = os.path.join(dest_dir, 'results1')
history = model_config_train_1(data, 'relu', 'adam', ['accuracy'], 20)
visualize_training(history, path)