In [1]:
from helpers import *

import random
from random import randrange
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import time


filedir = "../"

Using TensorFlow backend.


In [2]:
## WRITE STREAM DATA TO FILE

num_files_in_stream = 400
NUM_CLASSES_OVERRIDE = 4  # If 0, use ALL classes; else, use first n classes only
SILENCE_CLASS = 0

def generate_matched_stream(matches):
    if NUM_CLASSES_OVERRIDE > 0:
        to_check = random.choice(matches[:NUM_CLASSES_OVERRIDE])
    else:
        to_check = random.choice(matches)
    matched_class = to_check[1]
    return to_check

def generate_used_stream(used_files):
    if NUM_CLASSES_OVERRIDE > 0:
        random_index = randrange(NUM_CLASSES_OVERRIDE)
    else:
        to_check = random.choice(matches)
    matched_class = random_index
    to_check = [used_files[random_index], str(matched_class), used_files[random_index]]
    return to_check

start = time.time()

def generate_data_file(name):

    # Load used sounds
    used_f = open(filedir + '/sampledFiles.txt', "r")
    used_files = used_f.read().split('\n')

    # Load unused sounds
    unused_f = open(filedir + '/unsampledFiles.txt', "r")
    unused_files = unused_f.read().split('\n')

    # Load matches for unused sound
    matches = load_matches(filedir)

    silence = random.randint(0, 44100)
    
    open(filedir + name, 'w').close()  # Delete previous info

    with open(filedir + name, "a") as training_file:
        training_file.write(str(silence) + "\n")

        for i in range(num_files_in_stream):
            # Choose a random file, used OR unused  (to change this, alter the if statement to always choose used or unused)
            ## UNCOMMENT WHEN FIXED
            # len(unused_files) * 1.0 / (len(used_files) + len(unused_files)):
            if random.uniform(0.0, 1.0) < 0.0:  
                info = generate_matched_stream(matches)
            else:
                info = generate_used_stream(used_files)
            silence = random.randint(0, 44100)
            to_write = info[0] + " -_- " + info[1] + " -_- " + info[2] + " \n"
            to_write += str(silence) + "\n"
            training_file.write(to_write)
        
generate_data_file("trainingInfo.txt")
generate_data_file("testingInfo.txt")

# code you want to evaluate
end = time.time()
print("Time elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")



Time elapsed: 0.012291908264160156 seconds, or 0.0002048651377360026 minutes


In [3]:
## READ STREAM DATA FROM FILE

def load_stream(name, filedir='../'):
    training_file = open(filedir + name, "r")
    info = training_file.read().split('\n')
    data_matrix = []
    silence_matrix = []
    for inf in info:
        segs = inf.split('-_-')
        if len(segs) > 1:
            try:
                data_matrix.append([segs[0].strip(), segs[1].strip(), segs[2].strip()])
            except Exception as e:
                #print(e)
                print(inf)
        else:
            if segs[0] != '':
                silence_matrix.append(segs[0])
    training_file.close()
    return data_matrix, silence_matrix

def generate_composite_stream(audio_matrix, silence_matrix):
    cur_percentage = 0
    data_dict = {}
    composite_signal = np.zeros(int(silence_matrix[0]))
    composite_matches = np.full(composite_signal.shape, SILENCE_CLASS)
    
    for i in range(len(audio_matrix)):
        if (round(i / len(audio_matrix) * 100) != cur_percentage):
            cur_percentage = round(i / len(audio_matrix) * 100)
            print(str(cur_percentage) + "%     ", end='')
            
        filename = audio_matrix[i][0]
        if filename in data_dict:
            y = data_dict.get(filename)
        else:
            y,sr = librosa.load(filename)
            data_dict[filename] = y
        composite_signal = np.concatenate((composite_signal, y))
        composite_matches = np.concatenate((composite_matches, np.full(y.shape, int(audio_matrix[i][1]))))
        composite_signal = np.concatenate((composite_signal, np.zeros(int(silence_matrix[i+1]))))
        composite_matches = np.concatenate((composite_matches, np.full(int(silence_matrix[i+1]), SILENCE_CLASS)))
    return composite_signal, composite_matches
    
start = time.time()

audio_matrix, silence_matrix = load_stream(name="trainingInfo.txt")
composite_signal, composite_matches = generate_composite_stream(audio_matrix, silence_matrix)
audio_matrix_test, silence_matrix_test = load_stream(name="testingInfo.txt")
composite_signal_test, composite_matches_test = generate_composite_stream(audio_matrix_test, silence_matrix_test)

composite_matches 

end = time.time()
print("\nTime elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")

1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     19%     20%     21%     22%     23%     24%     25%     26%     27%     28%     29%     30%     31%     32%     33%     34%     35%     36%     37%     38%     39%     40%     41%     42%     43%     44%     45%     46%     47%     48%     49%     50%     51%     52%     53%     54%     55%     56%     57%     58%     59%     60%     61%     62%     63%     64%     65%     66%     67%     68%     69%     70%     71%     72%     73%     74%     75%     76%     77%     78%     79%     80%     81%     82%     83%     84%     85%     86%     87%     88%     89%     90%     91%     92%     93%     94%     95%     96%     97%     98%     99%     100%     1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     19%     20%     21%     22%     23%     24%     25%     26%     27%     2

In [4]:
start = time.time()

def batch(signal, matches):
    # Generate batched data list for signal/matches
    
    data = []
    classes = []
    spec = get_all(signal, 22050, "spectrogram")[0]
    cur_col = 0
    for col in spec.T:
        data.append(col)
        classes.append(int(matches[int(cur_col / len(spec.T) * len(matches))]))
        cur_col += 1
    return data, classes

training_data, training_classes = batch(composite_signal, composite_matches)
testing_data, testing_classes = batch(composite_signal_test, composite_matches_test)

end = time.time()
print("Time elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")

Time elapsed: 2.111419677734375 seconds, or 0.03519032796223958 minutes


In [7]:
# Make/test the LSTM

lstm_out = 50
batch_size = 64
embedding_vector_length = 32

used_f = open(filedir + '/sampledFiles.txt', "r")
used_files = used_f.read().split('\n')

# Load unused sounds
unused_f = open(filedir + '/unsampledFiles.txt', "r")
unused_files = unused_f.read().split('\n')

max_spec_length = len(training_data[0])
training_data = pad_sequences(training_data, maxlen=max_spec_length)
testing_data = pad_sequences(testing_data, maxlen=max_spec_length)

if NUM_CLASSES_OVERRIDE > 0:
    num_classes = NUM_CLASSES_OVERRIDE + 1
    number_inputs = NUM_CLASSES_OVERRIDE + 1
else:
    num_classes = len(used_files) + 1
    number_inputs = len(used_files) + len(unused_files)

model = Sequential()
model.add(Embedding(number_inputs, embedding_vector_length, input_length = len(training_data[0]), dropout = 0.2))
model.add(LSTM(lstm_out, dropout_U = 0.2, dropout_W = 0.2))
model.add(Dense(num_classes,activation='softmax'))
model.compile(loss = 'sparse_categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

model.fit(training_data, training_classes, validation_data=(testing_data, testing_classes), epochs=3, batch_size=batch_size)



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 128, 32)           160       
_________________________________________________________________
lstm_3 (LSTM)                (None, 50)                16600     
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 255       
Total params: 17,015
Trainable params: 17,015
Non-trainable params: 0
_________________________________________________________________
None
Train on 39955 samples, validate on 40648 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fbab05f8fd0>