In [3]:
import glob
import sys
import os
import shutil

import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import matplotlib.style as ms
ms.use("seaborn-muted")
%matplotlib inline

import IPython.display

import librosa
import librosa.display

import tensorflow as tf
from sklearn.metrics import precision_recall_fscore_support
import pickle

NUMBER_OF_CLASSES = 2
NUMBER_OF_ITERATIONS = 5000
export_path_base = 'C:\\Users\\Howard\\Documents\\learnAudio\\ambientSoundModels'


# tf.app.flags.DEFINE_integer('training_iteration', NUMBER_OF_ITERATIONS, 'number of training iterations.')
# tf.app.flags.DEFINE_integer('model_version', 1, 'version number of the model.')
# tf.app.flags.DEFINE_string('work_dir', '/tmp', 'Working directory.')
# FLAGS = tf.app.flags.FLAGS



In [4]:
def load_files(filepaths):
    raw_sounds = []
    for filepath in filepaths:
        X,sr = librosa.load(filepath)
        raw_sounds.append(X)
    return raw_sounds


def plot_waves(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        print(n,f)
        plt.subplot(10,1,i)
        librosa.display.waveplot(np.array(f),sr=22050)
        plt.title(n.title())
        i += 1
    plt.suptitle("Figure 1: Waveplot",x=0.5, y=0.915,fontsize=18)
    plt.show()

In [5]:
#sound_file_paths = ["Sound-Data/fold1/15564-2-0-0.wav", "Sound-Data/fold1/21684-9-0-5.wav"]

#sound_names = ["children playing", "street music"]

#raw_sounds = load_files(sound_file_paths)

# plot_waves(sound_names,raw_sounds)



# X, sr = librosa.load("Sound-Data/fold1/7383-3-0-0.wav")
# librosa.display.waveplot(np.array(X), sr=22050)
# plt.title("dog bark".title())
# plt.show()



In [6]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

def parse_audio_files(parent_dir,sub_dirs,file_ext="*.wav"):
    features, labels = np.empty((0,193)), np.empty(0) #the shape 193 comes from the shape of features combined
    for label, sub_dir in enumerate(sub_dirs):
        print(label, sub_dir)
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            try:
                mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
            except Exception as e:
                print("Error encountered while parsing file: ", fn)
                continue
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])

            features = np.vstack([features,ext_features])
            labels = np.append(labels, fn.split('\\')[2].split('-')[1])
    return np.array(features), np.array(labels, dtype = np.int)

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels),labels] = 1
    return one_hot_encode

In [None]:
print("Starting Audio parsing")
parent_dir = 'Sound-Data/renameThese'
# dirs = ["fold1c","fold2c", "fold3c", "fold4","AmbientRecordings"]
dirs = ["Ambient_1_3", "Ambient_1_4", "Ambient_1_11", "toilet_sounds","Ambient_1_15"]
features, labels = parse_audio_files(parent_dir, dirs)

print("Done parsing audio")

Starting Audio parsing
0 Ambient_1_3


  if not np.issubdtype(x.dtype, np.float):
  if np.issubdtype(x.dtype, float) or np.issubdtype(x.dtype, complex):
  if not np.issubdtype(dtype, float):
  if np.issubdtype(x.dtype, float) or np.issubdtype(x.dtype, complex):


1 Ambient_1_4


In [None]:
f = open('./pickles/features.pckl', 'wb')
pickle.dump(features,f)
f.close()
fl = open('pickles/labels.pckl', 'wb')
pickle.dump(labels,fl)
fl.close()

In [5]:
f = open("./pickles/features.pckl", 'rb')
features = pickle.load(f)
f.close()


fl = open("./pickles/labels.pckl", "rb")
labels = pickle.load(fl)
fl.close()


In [6]:
print("starting")

# print(labels.shape)


labels = one_hot_encode(labels)

train_test_split = np.random.rand(len(features)) < 0.70
train_x = features[train_test_split]
train_y = labels[train_test_split]
test_x = features[~train_test_split]
test_y = labels[~train_test_split]

print("ending")

starting
ending


In [7]:
classes = [i for i in range(0,NUMBER_OF_CLASSES)]
# names = ["air_conditioner","children_playing","street_music", "toilet"]
# names = ["air_conditioner","children_playing", "toilet", "ambient_sounds"]
names = ["not_bathroom", "bathroom"]

# define parameters
training_epochs = NUMBER_OF_ITERATIONS
n_dim = features.shape[1]
n_classes = NUMBER_OF_CLASSES
n_hidden_units_one = 280 
n_hidden_units_two = 300
sd = 1 / np.sqrt(n_dim)
learning_rate = 0.01

In [8]:
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])

W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_one], mean = 0, stddev=sd))
b_1 = tf.Variable(tf.random_normal([n_hidden_units_one], mean = 0, stddev=sd))
h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)


W_2 = tf.Variable(tf.random_normal([n_hidden_units_one,n_hidden_units_two], mean = 0, stddev=sd))
b_2 = tf.Variable(tf.random_normal([n_hidden_units_two], mean = 0, stddev=sd))
h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)


W = tf.Variable(tf.random_normal([n_hidden_units_two,n_classes], mean = 0, stddev=sd))
b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
y_ = tf.nn.softmax(tf.matmul(h_2,W) + b)

init = tf.global_variables_initializer()

In [9]:
cost_function = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(y_), reduction_indices=[1])) 
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)

correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))



In [10]:
saver = tf.train.Saver()
cost_history = np.empty(shape=[1],dtype=float)
y_true, y_pred = None, None
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(training_epochs):            
        _,cost = sess.run([optimizer,cost_function],feed_dict={X:train_x,Y:train_y})
        cost_history = np.append(cost_history,cost)
    
    y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: test_x})
    y_true = sess.run(tf.argmax(test_y,1))
    
    saver.save(sess, "./checkpoints/"+"NN-model.ckpt")
    tf.train.write_graph(sess.graph_def, './tmp/model', 'ambient_nn.pb', as_text=False)

    
print("finished")

finished


### Export the model

In [2]:
# retrieve checkpoints
checkpoint = tf.train.get_checkpoint_state("./checkpoints")
input_checkpoint = checkpoint.model_checkpoint_path

# decide on the file name for frozen model
absolute_model_dir = "/".join(input_checkpoint.split("/")[:-1])
output_graph = absolute_model_dir + "/frozen_model.pb"

clear_devices = True

img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3))
val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.])
out = tf.identity(val, name="out")

# start a session using a temporary fresh Graph
with tf.Session(graph=tf.Graph()) as sess:
    # import the meta graph in the current default Graph
    saver = tf.train.import_meta_graph(input_checkpoint + ".meta", clear_devices=clear_devices)
    # restore the weights
    saver.restore(sess, input_checkpoint)
    
    # provide the list of node names
    output_node_names = [n.name for n in tf.get_default_graph().as_graph_def().node]
    
#     final_output_node_names = tf.graph_util.remove_training_nodes(
#         output_node_names    
#     )
    
    # use a built-in TF helper to export variables to constants
    output_graph_def = tf.graph_util.convert_variables_to_constants(
        sess, # The session is used to retrieve the weights
        tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes 
        output_node_names # The output node names are used to select the usefull nodes
    )
    
    # serialize and dump the output graph to the filesystem
    with tf.gfile.GFile(output_graph, "wb") as f:
        f.write(output_graph_def.SerializeToString())
    print("%d ops in the final graph." % len(output_graph_def.node))

    # create tflite file
    tflite_model = tf.contrib.lite.toco_convert(output_graph_def, [img], [out])
    open("converteds_model.tflite", "wb").write(tflite_model)

INFO:tensorflow:Restoring parameters from ./checkpoints\NN-model.ckpt
INFO:tensorflow:Froze 6 variables.
Converted 6 variables to const ops.
227 ops in the final graph.
Instructions for updating:
Use the retry module or similar alternatives.


AttributeError: module 'tensorflow.contrib.lite.python.lite' has no attribute 'toco_convert'

In [None]:
fig = plt.figure(figsize=(10,8))
plt.plot(cost_history)
plt.ylabel("Cost")
plt.xlabel("Iterations")
plt.axis([0,training_epochs,0,np.max(cost_history)])
plt.show()

p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average="micro")
print("F-Score:", round(f,4))

## notes
confusion matrix
test on real data
- record random real life events to gather more random sounds

In [None]:
l = [i for i in range(0,NUMBER_OF_CLASSES)]

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred,labels=[i for i in range(0,NUMBER_OF_CLASSES)])
print(cm)


In [None]:
# from prettytable import PrettyTable
count = [0.00 for i in range(0,NUMBER_OF_CLASSES)]
for i in y_true:
    count[i] += 1


copy = list()
for index1, l in enumerate(cm):
    copy.append([])
    for index2, item in enumerate(l):
        copy[index1].append(item / count[index1] * 100)



names = ["bathroom","not_bathroom"]
layout = "{!s:^10} {!s:^10} {!s:^10}"
print(layout.format("","bathroom","not_bathroom"))
list_layout = "{!s:<10} {:^10.4f} {:^10.4f}"
# nums = [12.0, 13.234234,12.2323232323,12.2323232323,12.2323232323,12.2323232323,99.123123123123,99.123123123123,99.123123123123,99.123123123123]
# print(list_layout.format("air_cond",*nums))
for i in range(len(copy)):
    to_print = [names[i]] + copy[i]
    print(list_layout.format(*to_print))

In [None]:
parent_dir = 'Sound-Data/renameThese'
dirs = ["Ambient_1_16","Ambient_1_17","Ambient_1_18"]
features, labels = parse_audio_files(parent_dir, dirs)
# print(features)
# print(features.shape)
# print(labels)
# print(labels.shape)
# labels = one_hot_encode(labels)
print("finished")

In [None]:
with tf.Session() as sess:
    saver.restore(sess, "./checkpoints/NN-model.ckpt")
    print("Model restored")
    new_pred = sess.run(tf.argmax(y_,1),feed_dict={X: features})
#     new_true = sess.run(tf.argmax(labels,1))
    saver.save(sess, "./checkpoints/"+"NN-model.ckpt")
print("finished")

In [None]:
# newtypes = {10: "driving",11:"keyboard",12:"mouseclick"}
data_types = {0: "bathroom", 1: "not_bathroom"}
# for i,j in (labels,new_pred):
#     print(str(newtype[i]) + "->" + str(names[i]))
# for i in labels:
#     print(newtypes[i],end=" \t")
# print()
# for i in new_pred:
#     print(names[i], end="\t")


# count = 0
# sum = 0
# for i in new_pred:
#     print(names[i])
#     sum += i
#     if i == 2:
#         count += 1
# print(new_pred.shape[0])
count = 0
predSum = 0
timer = 0
bathCount = 0
for i in new_pred:
    if (data_types[labels[timer]] == "bathroom"):
        count += 1
    if (str(data_types[labels[timer]]) == str(names[i])):
        predSum += 1
        if (data_types[labels[timer]] == "bathroom"):
            bathCount += 1
#     else:
#         print(str(data_types[labels[timer]]) + ": " + str(names[i]))
    
    timer += 1
print(predSum / new_pred.shape[0])
print(count)
print(bathCount)

In [None]:
## distinguish home events
### -watching tv
### -using toilet -> Starting point, get samples of this first and washing hands sounds, try to record in pocket as well as outside
### -housework (ie vacuum, chores)
### -talking
## Problems
### -phone in pocket
### -sampling size/length

## remove not needed sounds -> 
#     keep air conditioning, children playing, street music


In [None]:
parent_dir = 'New-Data'
dirs = ["fold2", "fold3","fold4"]
features, labels = parse_audio_files(parent_dir, dirs)
print("finished")

In [None]:
with tf.Session() as sess:
    saver.restore(sess, "./checkpoints/NN-model.ckpt")
    print("Model restored")
    new_pred = sess.run(tf.argmax(y_,1),feed_dict={X: features})
#     new_true = sess.run(tf.argmax(labels,1))
    saver.save(sess, "./checkpoints/"+"NN-model.ckpt")
print("finished")

In [None]:
data_types = {3: "toilet", 4:"watching_tv", 5: "cafe/office"}

count = 0
sum = 0
timer = 0
for i in new_pred:
    print(str(data_types[labels[timer]]) + ": " + str(names[i]))
    sum += i
    if i == 2:
        count += 1
    timer += 1
print(count*2 / sum)

In [None]:
## get recording of open/real space sounds, like cafe, office, etc
# analyze conituous ambient sound, if the sound changes, that would be the moment