In [1]:
import librosa
import keras
import keras.layers as L
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pickle
import os
import random
import bisect

Using TensorFlow backend.


In [2]:
def generate_batch(time_step,k):
    startMid = random.randint(generator.time[0],int(generator.time[-1]-time_step*sr*k))
    stopMid = int(startMid + time_step*sr*k)
    
    startSpectr=int(startMid/(k*hop_len))
    stopSpectr=int(startSpectr+time_step*sr/(hop_len))
    
    out1 = spectrogram[:,startSpectr:stopSpectr].transpose().reshape(1,int(time_step*sr/(hop_len)),128)
    out2 = np.array(generator.get_notes_in_interval(startMid,stopMid)).reshape(1, 128)
    return out1, out2

In [3]:
from segment_tree import SegmentTree
class NotesInInteval:
    num_notes = 127+1
    
    def __init__(self):
        pass
    
    @classmethod
    def load(cls, filename):
        obj = cls()
        
        ev = pd.read_csv(filename)
        notes=[]
        for i in range(len(ev["type"])):
            notes.append([ev["pitch"][i], ev["type"][i], ev["tick"][i]])
        notes = sorted(notes, key=lambda note: note[2])
        
        c = cls.num_notes
        n = int(len(notes)/2)
        
        M = [[0 for i in range(2*n)] for j in range(c)]
        bal = [0 for i in range(c)]
        obj.time = []
        for i in range(2*n):
            if(str(notes[i][1]) == 'True'):
                bal[notes[i][0]] += 1
            else:
                bal[notes[i][0]] -= 1
            M[notes[i][0]][i] = bal[notes[i][0]]
            obj.time.append(notes[i][2])
        
        obj.M_st = [SegmentTree(i) for i in M]
        
        return obj
        
    def get_notes_in_interval(self, start, stop):
        if(start > stop):
            (start, stop) = (stop, start)
        start_index = 0
        stop_index = len(self.time)-1

        if(start >= self.time[0]):
            start_index = bisect.bisect_left(self.time,
                                             self.time[bisect.bisect_right(self.time, start)-1])

        if(stop >= self.time[0]):
            stop_index = bisect.bisect_left(self.time,
                                            self.time[bisect.bisect_right(self.time, stop)-1])

        plnotes = (self.M_st[i].query(start_index, stop_index, "max") for i in range(self.num_notes))
        plnotes = [min(x, 1) for x in plnotes]

        return plnotes

In [64]:
time_step = 5 #seconds
hop_len = 512
sr = 22050
model=keras.models.Sequential()
model.add(L.InputLayer(input_shape=(int(time_step*sr/(hop_len)),128)))#128,215)))
model.add(L.Conv1D(256,6, padding='same'))
model.add(L.Conv1D(256,3, padding='same'))
model.add(L.GlobalMaxPool1D())
model.add(L.Dense(512,activation='relu'))
model.add(L.Dense(256, activation='relu'))
model.add(L.Dense(128, activation='sigmoid'))

In [65]:
model.compile(optimizer='SGD',loss='binary_crossentropy')

In [6]:
from IPython.display import clear_output
class PlotLosses(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        
        #self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.i += 1

        """clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.legend()
        plt.show();"""
        
plot_losses = PlotLosses()

In [75]:
y, sr = librosa.load("/home/jheuristic/data/Test/wav/009count.mid.wav")
generator=pickle.load(open('/home/jheuristic/data/Test/pickles/009count.mid.pckl','rb'))

In [8]:
spectrogram=librosa.feature.melspectrogram(y,sr,hop_length=hop_len)
k = generator.time[-1]/len(y) #1 second/sr in midi 

In [10]:
for i in range(1000):
    x,y=generate_batch(time_step,k)
    model.fit(x,y,callbacks=[plot_losses],epochs=1,verbose=0)

In [24]:
x,y=generate_batch(time_step,k)
x.shape, y.shape

((1, 215, 128), (1, 128))

In [25]:
x,y=generate_batch(time_step,k)

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
        1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [28]:
list(zip([1,2], [3,4], [5,6]))

[(1, 3, 5), (2, 4, 6)]

In [39]:
x_batch, y_batch = map(np.concatenate, zip(*[generate_batch(time_step, k) for i in range(10)]))

In [40]:
x_batch.shape

(10, 215, 128)

In [45]:
loss=[]
for i in range(1000):
    loss.append(model.train_on_batch(x_batch,y_batch))

In [66]:
a=model.predict(x_batch,batch_size=10)

In [79]:
def generate_for_test(time_step,startTime):
    startSpectr= int(startTime*sr/(hop_len)) #int(startMid/(k*hop_len))
    stopSpectr=int(startSpectr+time_step*sr/(hop_len))
    
    out1 = spectrogram[:,startSpectr:stopSpectr].transpose().reshape(1,int(time_step*sr/(hop_len)),128)
    return out1

In [80]:
notes=np.zeros((int(len(y)/(sr*time_step)),128))
for i in range(int(len(y)/(sr*time_step))):
    notes[i] = model.predict(generate_for_test(time_step,i/2))[0]

In [None]:
plt.figure(figsize=(24,8))
plt.imshow(notes.transpose())

In [67]:
np.mean(np.asarray(a>0.5,dtype=y_batch.dtype) == y_batch)

0.49921874999999999

In [88]:
len(y)-len(spectrogram.transpose())*hop_len

-17