## Navigate to the Correct Directory

The following code navigates to the dataprocessing directory.

In [1]:
cd ../dataprocessing

/usr/local/google/home/carverforbes/activity-recognition/location/lbs/activity/audioset/dataprocessing


## Call the Import Statements

The following code imports the necessary code to run the code in the rest of this notebook.

In [2]:
# import statements
import audio_processing as ap

import functools
import os
import sys
import datetime
import gc

from absl import logging
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow import feature_column
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import sklearn

# The following lines adjust the granularity of reporting.
pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format
# tf.keras.backend.set_floatx('float32')

print("Ran the import statements.")

Ran the import statements.


## Feature Extraction
Configure the following parameters to extract the desired features from a specified csv file to a specific destination directory.

In [3]:
# set logging to print logging.INFO logs
logging.set_verbosity(logging.INFO)

In [4]:
# arguments for audio_processing
src_dir = 'example_src_dir'
dest_dir = 'example_dest_dir'
filename = 'gunshot_50_50'
labels = ['Gunshot, gunfire']
available_features = ['chroma_stft',
                       'chroma_cqt',
                       'chroma_cens',
                       'melspectrogram',
                       'mfcc',
                       'rms',
                       'spectral_centroid',
                       'spectral_bandwidth',
                       'spectral_contrast',
                       'spectral_flatness',
                       'spectral_rolloff',
                       'poly_features',
                       'tonnetz',
                       'zero_crossing_rate']
features_to_extract = ['melspectrogram']
last_features = None
redo = False

In [5]:
def dataframe_to_csv(dataframe, dest_path):
    start_time = datetime.datetime.now()
    stop = dataframe.index.stop
    begin = 0
    end = 100
    count = 0
    while end < stop + 100:
        df = dataframe.iloc[begin:end, :].copy()
        for column in df.columns:
            for i in range(df[column].size):
                i = i + count * 100
                if isinstance(df[column][i], np.ndarray):
                    df[column][i] = df[column][i].tolist()
        if count == 0:
            df.to_csv(dest_path, index=False, header=True)
        else:
            df.to_csv(dest_path, mode='a', index=False, header=True)
        begin += 100
        end += 100
        count += 1
    end_time = datetime.datetime.now()
    function_duration = end_time - start_time
    
        
    print('Created the csv file at the destination path in {} seconds.'.format(
        function_duration))

In [6]:
def csv_to_dataframe(csv_path):
    df = pd.read_csv(csv_path)
    for column in df.columns:
        for i in range(df[column].size):
            if isinstance(df[column][i], str):
                temp = df[column][i][1:-1]
                temp = temp.replace(',', ' ').split(']')
                new_list = []
                for item in temp:
                    if item == '':
                        continue
                    item = item.replace('[', '').strip().split()
                    item = [float(num) for num in item]
                    arr = np.array(item)
                    new_list.append(arr)
                new_arr = np.array(new_list)
                df[column][i] = new_arr
    return df

In [7]:
# Import the dataset as a pandas DataFrame object.
features = '_'.join(features_to_extract)
csv_path = os.path.join(dest_dir, filename + '_' + features + '.csv')
features_changed = last_features is None or features_to_extract != last_features
last_features = features_to_extract
df = ap.output_df(src_dir, dest_dir, filename, labels, features_to_extract, redo)

atures
INFO:absl:(11590, 4069.990631)
INFO:absl:extracted features
INFO:absl:(11591, 4070.307408)
INFO:absl:extracted features
INFO:absl:(11592, 4070.556448)
INFO:absl:extracted features
INFO:absl:(11593, 4070.945029)
INFO:absl:extracted features
INFO:absl:(11594, 4071.358178)
INFO:absl:extracted features
INFO:absl:(11595, 4071.70155)
INFO:absl:extracted features
INFO:absl:(11596, 4072.051157)
INFO:absl:extracted features
INFO:absl:(11597, 4072.340873)
INFO:absl:extracted features
INFO:absl:(11598, 4072.640961)
INFO:absl:extracted features
INFO:absl:(11599, 4073.029672)
INFO:absl:extracted features
INFO:absl:(11600, 4073.281926)
INFO:absl:extracted features
INFO:absl:(11601, 4073.581503)
INFO:absl:extracted features
INFO:absl:(11602, 4073.821067)
INFO:absl:extracted features
INFO:absl:(11603, 4074.195092)
INFO:absl:extracted features
INFO:absl:(11604, 4074.455446)
INFO:absl:extracted features
INFO:absl:(11605, 4074.709997)
INFO:absl:extracted features
INFO:absl:(11606, 4074.972605)
INF

In [9]:
# print the first 5 rows of the dataframe.
df.head()

Unnamed: 0,label,melspectrogram
0,1,"[[3.4134238, 2.5367897, 2.295652, 1.9729736, 1..."
1,1,"[[0.009715517, 0.0028677727, 0.001311192, 0.00..."
2,1,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
3,1,"[[0.17823805, 0.099924825, 0.043443326, 0.1058..."
4,0,"[[0.0009629534, 0.0006072721, 0.00039850635, 0..."


## Data Preprocessing

In [12]:
# Shuffle the dataset/dataframe.
def shuffle_dataframe(df):
    df = df.reindex(np.random.permutation(df.index))
    return df

In [13]:
df = shuffle_dataframe(df)
df.head()

Unnamed: 0,label,melspectrogram
1422,1,"[[0.0, 0.0, 0.0, 0.0, 1.0901705e-05, 0.9042145..."
10914,0,"[[0.22225726, 0.06697306, 0.038891166, 0.02848..."
4135,0,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
133,1,"[[0.00025893495, 0.00033576088, 0.00076551415,..."
400,1,"[[221.00229, 112.892944, 192.71011, 489.33264,..."


In [14]:
# temp bug fix for rows with None features
# and rows with different number of elements per frame
def fix_dataframe(dataframe):
    bad_rows = []
    for i in dataframe.index:
        if dataframe.melspectrogram[i] is None:
            bad_rows.append(i)
        elif dataframe.melspectrogram[i][0] is None:
            bad_rows.append(i)
        elif len(dataframe.melspectrogram[i]) != 128:
            bad_rows.append(i)
        elif len(dataframe.melspectrogram[i][0]) != 431:
            bad_rows.append(i)
    new_dataframe = dataframe.drop(bad_rows)
    return new_dataframe

Enter your data preprocessing here.

In [15]:
df = fix_dataframe(df)

In [16]:
def get_data(dataframe):
    X = np.array(dataframe.melspectrogram.tolist(), dtype=object)
    y = np.array(dataframe.label.tolist())
    return X, y

In [18]:
X, y = get_data(df)

In [19]:
del df
gc.collect()

1477

In [21]:
# Flatten melspectrogram's (128, 431) shaped features 
def flatten_features(features):
    temp_x = []
    # print(len(features))
    for arr in features:
        # print(arr)
        # print(type(arr))
        arr = arr.flatten()
        # print(arr)
        temp_x.append(arr)
    features = np.array(temp_x, dtype=object)
    return features

In [22]:
X = flatten_features(X)

In [24]:
x_train, x_val, y_train, y_val = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state = 42)

In [25]:
del X
del y
gc.collect()

3832

In [26]:
# Convert arrays of objects to arrays of floats.
from tensorflow.keras import backend as K
x_train = K.cast_to_floatx(x_train)
y_train = K.cast_to_floatx(y_train)
x_val = K.cast_to_floatx(x_val)
y_val = K.cast_to_floatx(y_val)

In [27]:
print(len(x_train[0]))
gc.collect()

55168


40

## Building and Training Neural Network

In [28]:
# Define the plotting function.
def plot_curve(epochs, hist, list_of_metrics, path, filename, list_of_hyperparameters):
    """Plot a curve of one or more classification metrics vs. epoch and save it to path."""  
    # list_of_metrics should be one of the names shown in:
    # https://www.tensorflow.org/tutorials/structured_data/imbalanced_data#define_the_model_and_metrics  

    plt.figure()
    plt.xlabel("Epoch")
    plt.ylabel("Value")

    for m in list_of_metrics:
        x = hist[m]
        plt.plot(epochs[1:], x[1:], label=m)

    plt.legend()
    
    if not os.path.isdir(path):
        try:
            os.mkdir(path)
        except OSError as error:
            logging.error(error)
            
    path = os.path.join(path, filename)
    
    if not os.path.isdir(path):
        try:
            os.mkdir(path)
        except OSError as error:
            logging.error(error)
        
    list_of_hyperparameters_temp = [str(item) for item in list_of_hyperparameters]
    list_of_metrics_temp = [item if isinstance(item, str) else str(item.name) for item in list_of_metrics]
    filename = '_'.join(list_of_metrics_temp) + '_' + '_'.join(list_of_hyperparameters_temp)
    path = os.path.join(path, filename + '.png')
    plt.savefig(path, bbox_inches='tight')
    
    return plt


print("Defined the plot_curve function.")

Defined the plot_curve function.


## Train 1
Neural Network Type:
* Input Layer: (Number of Nodes)
* Hidden Layer 1:
* Hidden Layer 2: 
* Ouput Layer: 

Hyper-parameters:
* Loss Function: 
* Activation Function: 
* Optimizer Function: 
* Learning Rate: 
* Epochs: 
* Batch_Size: 
* Classification Threshold: 
* Regularization: 
* Regularization Lambda: 

In [29]:
# Define the functions that create and train a model.
def create_model(my_learning_rate, my_metrics, optimizer, regularization, regularization_lambda):
    """Create and compile a simple classification model."""
    # Discard any pre-existing version of the model.
    model = None

    # Most simple tf.keras models are sequential.
    model = tf.keras.models.Sequential()

    # Add the input layer of ___ nodes
    model.add(tf.keras.layers.Dense(units=55168, input_shape=(55168,)))
              
    # Implement ___ regularization in the first hidden layer.
    model.add(tf.keras.layers.Dense(units=55168, 
                                  activation=activation,
                                  kernel_regularizer=regularization(regularization_lambda),
                                  name='Hidden1'))
    
    # # Implement ___ regularization in the second hidden layer.
    # model.add(tf.keras.layers.Dense(units=, 
    #                               activation=activation,
    #                               kernel_regularizer=regularization(regularization_lambda),
    #                               name='Hidden2'))

    # Funnel the regression value through a sigmoid function.
    model.add(tf.keras.layers.Dense(units=1, input_shape=(1,),
                                  activation=tf.sigmoid,
                                  name='Output'))

    # Call the compile method to construct the layers into a model that
    # TensorFlow can execute.  Notice that we're using a different loss
    # function for classification than for regression.    
    model.compile(optimizer=optimizer(lr=my_learning_rate),                                                   
                loss=loss,
                metrics=my_metrics)

    return model        
              
def train_model(model, features, label, epochs, label_name,
                batch_size=None, my_validation_split=0.0,
                validation_data=None, shuffle=True):
    """Feed a dataset into the model in order to train it."""

    # The x parameter of tf.keras.Model.fit can be a list of arrays.
    history = model.fit(x=features, y=label, batch_size=batch_size,
                      epochs=epochs, shuffle=shuffle, validation_data=validation_data)

    # The list of epochs is stored separately from the rest of history.
    epochs = history.epoch

    # Isolate the classification metric for each epoch.
    hist = pd.DataFrame(history.history)

    return epochs, hist  

print("Defined the create_model and train_model functions.")

Defined the create_model and train_model functions.


In [1]:
# The following variables are the hyperparameters.
loss = tf.keras.losses.BinaryCrossentropy()
activation = 'relu'
optimizer = tf.keras.optimizers.RMSprop
learning_rate = 0.001
epochs = 1
batch_size = 25
classification_threshold = 0.70
regularization = tf.keras.regularizers.l2
regularization_lambda = 0.001
label_name = "label"

list_of_hyperparameters = [learning_rate, epochs, batch_size,
                           classification_threshold,
                           regularization_lambda,
                           label_name]


# Here is the updated definition of METRICS:
METRICS = [
      tf.keras.metrics.BinaryAccuracy(
          name='accuracy', threshold=classification_threshold),
      tf.keras.metrics.Precision(
          thresholds=classification_threshold, name='precision'),
      tf.keras.metrics.Recall(
          thresholds=classification_threshold, name="recall"),
]

# Establish the model's topography.
my_model = create_model(learning_rate, METRICS, optimizer=optimizer, regularization=regularization, regularization_lambda=regularization_lambda)

# View the model's structure.
my_model.summary()

# Train the model on the training set.
epochs, hist = train_model(my_model, x_train, y_train, epochs, 
                          label_name, batch_size, validation_data=(x_val, y_val))

# Plot metrics vs. epochs
list_of_metrics_to_plot = ['accuracy', "precision", "recall", 'val_accuracy', 'val_precision', 'val_recall'] 
plot_curve(epochs, hist, list_of_metrics_to_plot, dest_dir, filename, list_of_hyperparameters)
plot_curve(epochs, hist, ['loss', 'val_loss'], dest_dir, filename, list_of_hyperparameters)

training_performance =  my_model.evaluate(x_train, y_train, verbose=0)
print('Training Performance')
print('---------------------------------')
print('loss: ', training_performance[0])
print('accuracy: ', training_performance[1])
print('precision: ', training_performance[2])
print('recall: ', training_performance[3])
print()

validation_performance =  my_model.evaluate(x_val, y_val, verbose=0)
print('Validation Performance')
print('---------------------------------')
print('loss: ', validation_performance[0])
print('accuracy: ', validation_performance[1])
print('precision: ', validation_performance[2])
print('recall: ', validation_performance[3])

NameError: name 'tf' is not defined