In [1]:
from helpers import *
from lstmHelpers import *

import random
from random import randrange, shuffle
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
import time
from collections import deque


filedir = "../"

Using TensorFlow backend.


In [2]:
## WRITE STREAM DATA TO FILE
start = time.time()
total_time = time.time()

NUM_CLASSES_OVERRIDE = 150  # If 0, use ALL classes; else, use n classes only
num_files_in_stream = 30 * NUM_CLASSES_OVERRIDE
order_to_use = []

used_classes, unused_classes = load_classes(filedir, NUM_CLASSES_OVERRIDE, display=False)

for i in range(NUM_CLASSES_OVERRIDE):
    order_to_use.extend([i] * int(num_files_in_stream / NUM_CLASSES_OVERRIDE))
    
shuffle(order_to_use)
generate_data_file("trainingInfo.txt", filedir, used_classes, unused_classes, order_to_use, max_ramp_length=0.25)
shuffle(order_to_use)
generate_data_file("testingInfo.txt", filedir, used_classes, unused_classes, order_to_use, max_ramp_length=0.25)

# code you want to evaluate
end = time.time()
print("Time elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")

Using classes...
Exact Classes: [3226, 6305, 1820, 5896, 5641, 3815, 6348, 2244, 2999, 10152, 6943, 4837, 5371, 2089, 5225, 4072, 7941, 3049, 9643, 3057, 6499, 5892, 9194, 234, 2933, 4113, 1109, 6758, 858, 778, 2482, 8496, 9667, 2745, 7009, 3956, 4754, 4446, 4572, 2664, 3994, 7857, 4989, 5150, 3980, 7960, 8400, 4974, 5504, 4698, 5504, 6559, 1584, 9207, 9789, 9373, 6247, 8909, 375, 9510, 793, 2027, 2895, 8996, 3273, 9456, 5527, 9715, 975, 9751, 5351, 6753, 7468, 7976, 7573, 4759, 9271, 5627, 9985, 2383, 2463, 2089, 8392, 8573, 7843, 9495, 3668, 5223, 9812, 9581, 111, 5407, 6474, 1732, 8627, 9924, 5775, 5823, 6441, 1788, 8503, 10121, 5728, 2518, 3836, 3680, 9781, 2507, 4285, 323]
Matched Classes: [624, 940, 3698, 523, 185, 3935, 3357, 1483, 1069, 144, 433, 1499, 1404, 102, 1987, 3500, 1442, 2682, 560, 2362, 2308, 293, 2101, 2202, 84, 3384, 2884, 1676, 104, 1542, 1335, 3440, 3082, 3454, 1951, 2256, 1034, 2942, 3773, 2402]


Time elapsed: 0.04066896438598633 seconds, or 0.00067781607309977

In [3]:
## READ STREAM DATA FROM FILE

def generate_composite_stream(audio_matrix, data_dict = None):
    cur_percentage = 0
    if data_dict == None:
        data_dict = {}
    
    composite_signal_list = []
    composite_matches_list = []
    
    for i in range(len(audio_matrix)):
        if (round(i / len(audio_matrix) * 100) != cur_percentage):
            cur_percentage = round(i / len(audio_matrix) * 100)
            print(str(cur_percentage) + "%     ", end='')
            
        filename = audio_matrix[i][2]
        if filename in data_dict:
            y = data_dict.get(filename)
        else:
            yt,sr = librosa.load(filename)
            y, idx = librosa.effects.trim(yt, top_db=50)
            data_dict[filename] = y
        faded_y = apply_ramp(y, audio_matrix[i][3], audio_matrix[i][4])
        composite_signal_list.append(faded_y)
        composite_matches_list.append(np.full(faded_y.shape, audio_matrix[i][1]))
    composite_signal = np.array(composite_signal_list)
    composite_signal = np.concatenate(composite_signal).ravel()
    composite_matches = np.array(composite_matches_list)
    composite_matches = np.concatenate(composite_matches).ravel()
    return composite_signal, composite_matches, data_dict
    
start = time.time()

audio_matrix = load_stream(name="trainingInfo.txt")
composite_signal, composite_matches, preloaded_data = generate_composite_stream(audio_matrix)
audio_matrix_test = load_stream(name="testingInfo.txt")
composite_signal_test, composite_matches_test, x = generate_composite_stream(audio_matrix_test, data_dict=preloaded_data)

composite_matches 

end = time.time()
print("\nTime elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")


Blank Segment at File Start
1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     19%     20%     21%     22%     23%     24%     25%     26%     27%     28%     29%     30%     31%     32%     33%     34%     35%     36%     37%     38%     39%     40%     41%     42%     43%     44%     45%     46%     47%     48%     49%     50%     51%     52%     53%     54%     55%     56%     57%     58%     59%     60%     61%     62%     63%     64%     65%     66%     67%     68%     69%     70%     71%     72%     73%     74%     75%     76%     77%     78%     79%     80%     81%     82%     83%     84%     85%     86%     87%     88%     89%     90%     91%     92%     93%     94%     95%     96%     97%     98%     99%     100%     
Blank Segment at File Start
1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     19%     20%    

In [4]:
start = time.time()
n_mels = 128
batch_length = 20

def batch(signal, matches):
    # Generate batched data list for signal/matches
    
    data = []
    classes = []
    spec = get_spectrogram(signal, 22050, n_mels=n_mels, display=False)
    cur_col = 0
    transposed = spec.T
    scaler = MinMaxScaler(feature_range=(0, 1))
    transposed = scaler.fit_transform(transposed)
    cur_percentage = 0
    for col in transposed:
        if cur_col + batch_length >= len(transposed):
            break
        if (round(cur_col / len(transposed) * 100) != cur_percentage):
            cur_percentage = round(cur_col / len(transposed) * 100)
            print(str(cur_percentage) + "%     ", end='')
        comp_cols = [col]
        for i in range(1,batch_length):
            comp_cols.append(transposed[cur_col + i])
        data.append(comp_cols)
        classes.append(int(matches[int(cur_col / len(transposed) * len(matches))]))
        cur_col += 1
    return data, classes

print("Training Data")
training_data, training_classes = batch(composite_signal, composite_matches)
print("\nTesting Data")
testing_data, testing_classes = batch(composite_signal_test, composite_matches_test)
print()
max_spec_length = len(training_data[0])
print()
training_data = np.array(training_data)
testing_data = np.array(testing_data)

end = time.time()
print("Time elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")

Training Data
1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     19%     20%     21%     22%     23%     24%     25%     26%     27%     28%     29%     30%     31%     32%     33%     34%     35%     36%     37%     38%     39%     40%     41%     42%     43%     44%     45%     46%     47%     48%     49%     50%     51%     52%     53%     54%     55%     56%     57%     58%     59%     60%     61%     62%     63%     64%     65%     66%     67%     68%     69%     70%     71%     72%     73%     74%     75%     76%     77%     78%     79%     80%     81%     82%     83%     84%     85%     86%     87%     88%     89%     90%     91%     92%     93%     94%     95%     96%     97%     98%     99%     100%     
Testing Data
1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     19%     20%     21%     22%     23%     24%  

In [8]:
# Make/test the LSTM

lstm_out = 350
batch_size = 64
embedding_vector_length = 32
dropout = 0.2
dropout_r = 0.2

number_inputs = NUM_CLASSES_OVERRIDE

print(str(number_inputs) + " " + str(embedding_vector_length) + " " + str(len(training_data[0])) + " " + str(dropout) + 
      " " + str(lstm_out) + " " + str(dropout_r) + " " + str(NUM_CLASSES_OVERRIDE))
    
model = Sequential()
model.add(LSTM(lstm_out, input_shape=(batch_length, n_mels), dropout = dropout, recurrent_dropout = dropout_r))
model.add(Dense(NUM_CLASSES_OVERRIDE,activation='softmax'))  #softmax
model.compile(loss = 'sparse_categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])  # sparse_categorical_crossentropy
print(model.summary())

model.fit(training_data, training_classes, epochs=3, batch_size=batch_size)
scores = model.evaluate(testing_data, testing_classes)
print("Accuracy: %.2f%%" % (scores[1]*100))

total_end = time.time()
print("Total Time elapsed: " + str((total_end-total_time)/60.0) + " minutes")

150 32 20 0.2 350 0.2 150
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 350)               670600    
_________________________________________________________________
dense_3 (Dense)              (None, 150)               52650     
Total params: 723,250
Trainable params: 723,250
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 91.77%
Total Time elapsed: 13.576078740755717 minutes


In [9]:
model.save("LSTMModel.h5")