In [2]:
# feature extractoring and preprocessing data
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from random import sample

import os
import csv
import pickle

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import roc_auc_score, mean_absolute_error
from sklearn.linear_model import LogisticRegression

In [3]:
#Keras
from keras.layers import Dense, Dropout, LSTM
from keras.models import Sequential
from keras.utils import to_categorical

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [4]:
def simulateSequence(n):
    dictionary = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
    length = [500] #np.random.randint(50, 100, n)
    encoder = LabelEncoder().fit(dictionary)
    n_vocab = len(dictionary)
    
    if n == 1:
        seq_str = np.random.choice(dictionary, length[0])
        sequences = encoder.transform(seq_str)
    
    else:
        sequences = []
        for i in range(n):
            seq_str = np.random.choice(dictionary, length[i])
            seq_int = encoder.transform(seq_str)
            sequences.append(seq_int)
    
    return sequences, n_vocab, encoder

def processSequence(notes, n_vocab):
    
    sequence_length = 100
    
    network_input = []
    network_output = []
    n_inputs = len(notes)-sequence_length
    
    # create input sequences and the corresponding outputs
    for i in range(n_inputs):
        network_input.append(notes[i:i+sequence_length])
        network_output.append(notes[i+sequence_length])

    n_patterns = len(network_input)
    
    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input
    network_input = network_input / float(n_vocab)

    # Categorize the targets
    network_output = to_categorical(network_output)

    return network_input, network_output

In [5]:
notes, n_vocab, encoder = simulateSequence(1)
X, y = processSequence(notes, n_vocab)

_, input_shape_1, input_shape_2 = X.shape
input_shape = (input_shape_1, input_shape_2)
X.shape, y.shape, input_shape

((400, 100, 1), (400, 7), (100, 1))

# LSTM

In [11]:
model = Sequential()
model.add(LSTM(256, input_shape=input_shape, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(256))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(n_vocab, activation='softmax'))

In [13]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit(X, y, epochs=10)
#model.fit(X_train, y_train, batch_size=128, epochs=5, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff9f0b4c198>

In [1]:
#print(pd.Series(model.evaluate(X_test, y_test, batch_size=128), index=model.metrics_names))

0.125

In [15]:
model.save_weights('weights.h5')

# Sequence generation

In [7]:
musicGenerator = Sequential()
musicGenerator.add(LSTM(256, input_shape=input_shape, return_sequences=True))
musicGenerator.add(Dropout(0.3))
musicGenerator.add(LSTM(512, return_sequences=True))
musicGenerator.add(Dropout(0.3))
musicGenerator.add(LSTM(256))
musicGenerator.add(Dense(256))
musicGenerator.add(Dropout(0.3))
musicGenerator.add(Dense(n_vocab, activation='softmax'))

musicGenerator.compile(loss='categorical_crossentropy', optimizer='rmsprop')

# Load the weights to each node
musicGenerator.load_weights('weights.h5')

In [61]:
start = np.random.randint(0, len(X)-1)

# Fix an initial note
pattern = X[start]

prediction_output = []

# generate notes
for _ in range(10):

    prediction_input = np.reshape(pattern, (1, len(pattern), 1))
    #prediction_input = prediction_input / float(n_vocab)

    # Predicted class    
    prediction = np.argmax(musicGenerator.predict(prediction_input, verbose=0))
    print(prediction)
    
    # Predicted note
    result = encoder.inverse_transform([prediction])
    prediction_output.append(result[0])

    # Update the pattern
    pattern = np.append(pattern, prediction / n_vocab)
    pattern = pattern[1:len(pattern)]

prediction_output

1
1
1
1
1
1
1
1
1
1


['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B']

In [40]:
pattern = X[0]
prediction_input = np.reshape(pattern, (1, len(pattern), 1))
#prediction_input = prediction_input / float(n_vocab)    
prediction = np.argmax(musicGenerator.predict(prediction_input, verbose=0))
prediction, encoder.inverse_transform([prediction])[0]



(1, 'B')