In [1]:
from helpers import *
from lstmHelpers import *

import random
from random import randrange, shuffle
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
import time
from collections import deque
import pickle


filedir = "../"

Using TensorFlow backend.


In [2]:
## WRITE STREAM DATA TO FILE
start = time.time()
total_time = time.time()

NUM_CLASSES_OVERRIDE = 5  # If 0, use ALL classes; else, use n classes only
num_files_in_stream = 30 * NUM_CLASSES_OVERRIDE
order_to_use = []

used_classes, unused_classes = load_classes(filedir, NUM_CLASSES_OVERRIDE, display=False)

for i in range(NUM_CLASSES_OVERRIDE):
    order_to_use.extend([i] * int(num_files_in_stream / NUM_CLASSES_OVERRIDE))
    
shuffle(order_to_use)
generate_data_file("trainingInfo.txt", filedir, used_classes, unused_classes, order_to_use, max_ramp_length=0.25)
shuffle(order_to_use)
generate_data_file("testingInfo.txt", filedir, used_classes, unused_classes, order_to_use, max_ramp_length=0.25)

# code you want to evaluate
end = time.time()
print("Time elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")

Using classes...
Exact Classes: [812, 4977, 9455]
Matched Classes: [427, 3667]


Time elapsed: 0.013879776000976562 seconds, or 0.00023132960001627604 minutes


In [3]:
## READ STREAM DATA FROM FILE

def generate_composite_stream(audio_matrix, data_dict = None):
    cur_percentage = 0
    if data_dict == None:
        data_dict = {}
    
    composite_signal_list = []
    composite_matches_list = []
    
    for i in range(len(audio_matrix)):
        if (round(i / len(audio_matrix) * 100) != cur_percentage):
            cur_percentage = round(i / len(audio_matrix) * 100)
            print(str(cur_percentage) + "%     ", end='')
            
        filename = audio_matrix[i][2]
        if filename in data_dict:
            y = data_dict.get(filename)
        else:
            yt,sr = librosa.load(filename)
            y, idx = librosa.effects.trim(yt, top_db=50)
            data_dict[filename] = y
        warped_y = timewarp(y, audio_matrix[i][6])
        noisey_y = apply_noise(warped_y, audio_matrix[i][5])
        faded_y = apply_ramp(noisey_y, audio_matrix[i][3], audio_matrix[i][4])
        composite_signal_list.append(faded_y)
        composite_matches_list.append(np.full(faded_y.shape, audio_matrix[i][1]))
    composite_signal = np.array(composite_signal_list)
    composite_signal = np.concatenate(composite_signal).ravel()
    composite_matches = np.array(composite_matches_list)
    composite_matches = np.concatenate(composite_matches).ravel()
    return composite_signal, composite_matches, data_dict
    
start = time.time()

audio_matrix = load_stream(name="trainingInfo.txt")
composite_signal, composite_matches, preloaded_data = generate_composite_stream(audio_matrix)
audio_matrix_test = load_stream(name="testingInfo.txt")
composite_signal_test, composite_matches_test, x = generate_composite_stream(audio_matrix_test, data_dict=preloaded_data)

composite_matches 

end = time.time()
print("\nTime elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")



1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     19%     20%     21%     22%     23%     24%     25%     26%     27%     28%     29%     30%     31%     32%     33%     34%     35%     36%     37%     38%     39%     40%     41%     42%     43%     44%     45%     46%     47%     48%     49%     50%     51%     52%     53%     54%     55%     56%     57%     58%     59%     60%     61%     62%     63%     64%     65%     66%     67%     68%     69%     70%     71%     72%     73%     74%     75%     76%     77%     78%     79%     80%     81%     82%     83%     84%     85%     86%     87%     88%     89%     90%     91%     92%     93%     94%     95%     96%     97%     98%     99%     

1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     19%     20%     21%     22%     23%     24%     25%     26%     27%     28%   

In [5]:
start = time.time()
n_mels = 128
batch_length = 5

def new_batch(signal, matches):
    signal_batch_length = 2048*3
    data = []
    classes = []
    
    batched_frames = []
    cur_frame_count = 0
    cur_percentage = 0
    for frame in signal:
        batched_frames.append(frame)
        cur_frame_count += 1
        if len(batched_frames) == signal_batch_length:
            recent_signal = np.asarray(batched_frames)
            recent_signal = np.pad(recent_signal, (0, 1024), 'constant', constant_values=(0.0,0.0))
            spec = get_spectrogram(recent_signal, 22050, n_mels=n_mels, display=False)
            cur_col = 0
            transposed = spec.T
            scaler = MinMaxScaler(feature_range=(0, 1))
            transposed = scaler.fit_transform(transposed)
            comp_cols = []
            for i in range(0,batch_length):
                comp_cols.append(transposed[cur_col + i])
            data.append(comp_cols)
            classes.append(int(matches[cur_frame_count-1]))
            batched_frames = batched_frames[int(512 / 16):]  # hop length of spectrogram / 16
            if (round((cur_frame_count-1) / len(signal) * 100) != cur_percentage):
                cur_percentage = round((cur_frame_count-1) / len(signal) * 100)
                print(str(cur_percentage) + "%     ", end='')
    
    return data, classes

def batch(signal, matches):
    # Generate batched data list for signal/matches
    data = []
    classes = []
    
    spec = get_spectrogram(signal, 22050, n_mels=n_mels, display=False)
    cur_col = 0
    transposed = spec.T
    scaler = MinMaxScaler(feature_range=(0, 1))
    transposed = scaler.fit_transform(transposed)
    cur_percentage = 0
    for col in transposed:
        if cur_col + batch_length >= len(transposed):
            break
        if (round(cur_col / len(transposed) * 100) != cur_percentage):
            cur_percentage = round(cur_col / len(transposed) * 100)
            print(str(cur_percentage) + "%     ", end='')
        comp_cols = [col]
        for i in range(1,batch_length):
            comp_cols.append(transposed[cur_col + i])
        data.append(comp_cols)
        classes.append(int(matches[int(cur_col / len(transposed) * len(matches))]))
        cur_col += 1
    return data, classes

print("Training Data")
training_data, training_classes = new_batch(composite_signal, composite_matches)
#print("\nTesting Data")
#testing_data, testing_classes = new_batch(composite_signal_test, composite_matches_test)
print()
max_spec_length = len(training_data[0])
print()
training_data = np.array(training_data)
#testing_data = np.array(testing_data)

end = time.time()
print("Time elapsed: " + str(end-start) + " seconds, or " + str((end-start)/60.0) + " minutes")

Training Data
1%     2%     3%     4%     5%     6%     7%     8%     9%     10%     11%     12%     13%     14%     15%     16%     17%     18%     

KeyboardInterrupt: 

In [None]:
# Make/test the LSTM

lstm_out = 350
batch_size = 64
embedding_vector_length = 32
dropout = 0.2
dropout_r = 0.2

number_inputs = NUM_CLASSES_OVERRIDE

print(str(number_inputs) + " " + str(embedding_vector_length) + " " + str(len(training_data[0])) + " " + str(dropout) + 
      " " + str(lstm_out) + " " + str(dropout_r) + " " + str(NUM_CLASSES_OVERRIDE))
    
model = Sequential()
model.add(LSTM(lstm_out, input_shape=(batch_length, n_mels), dropout = dropout, recurrent_dropout = dropout_r))
model.add(Dense(NUM_CLASSES_OVERRIDE,activation='softmax'))  #softmax
model.compile(loss = 'sparse_categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])  # sparse_categorical_crossentropy
print(model.summary())

model.fit(training_data, training_classes, epochs=3, batch_size=batch_size)
#scores = model.evaluate(testing_data, testing_classes)
#print("Accuracy: %.2f%%" % (scores[1]*100))

total_end = time.time()
print("Total Time elapsed: " + str((total_end-total_time)/60.0) + " minutes")

In [None]:
model.save("LSTMModel.h5")