In [10]:
import numpy as np
import pandas as pd
import numba

In [9]:
moonlight = pd.read_csv("moonlight.csv", skiprows = 95, header = None).dropna(axis = 0)
moonlight

Unnamed: 0,0,1,2,3,4,5
0,2,0,Control_c,0,64,127
1,2,0,Note_on_c,0,37,27
2,2,0,Note_on_c,0,49,27
3,2,85,Note_off_c,0,56,0
4,2,85,Note_on_c,0,61,38
5,2,170,Note_off_c,0,61,0
6,2,170,Note_on_c,0,64,42
7,2,256,Note_off_c,0,64,0
8,2,256,Note_on_c,0,56,40
9,2,341,Note_off_c,0,56,0


In [11]:
#Select Notes and velocity columns#
notes = moonlight.ix[:,4]
velocity = moonlight.ix[:,5]

#Find possible unique notes and velocities
possibleNotes = np.unique(notes)
possibleVelocities =  np.unique(velocity)
notes

0       64
1       37
2       49
3       56
4       61
5       61
6       64
7       64
8       56
9       56
10      61
11      61
12      64
13      64
14      56
15      56
16      61
17      61
18      64
19      64
20      56
21      56
22      61
23      61
24      64
25      64
26      64
27      56
28      64
29      35
        ..
2513    40
2514    40
2515    37
2516    37
2517    52
2518    56
2519    61
2520    37
2521    44
2522    49
2523    37
2524    52
2525    56
2526    61
2527    37
2528    44
2529    49
2530    52
2531    56
2532    61
2533    37
2534    44
2535    49
2536    64
2537    52
2538    56
2539    61
2540    37
2541    44
2542    49
Name: 4, dtype: float64

In [14]:
k1 = len(possibleNotes)
k2 = len(possibleVelocities)
possibleNotes


array([ 29.,  30.,  31.,  32.,  33.,  34.,  35.,  36.,  37.,  38.,  39.,
        40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.,  48.,  49.,  50.,
        51.,  52.,  53.,  54.,  55.,  56.,  57.,  58.,  59.,  60.,  61.,
        62.,  63.,  64.,  65.,  66.,  67.,  68.,  69.,  70.,  71.,  72.,
        73.,  74.,  75.,  76.,  78.,  79.,  80.,  81.,  82.,  84.,  87.])

In [15]:
def encode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = int(np.where(code == x[i])[0]) 
    return output

In [16]:
xNotes = encode(notes, possibleNotes)
xVel = encode(velocity, possibleVelocities)

In [46]:
#@numba.njit()
#Function using the log-sum-exp trick#
def logSumExp(a):
    b = np.max(a)
    return(b + np.log(np.sum(np.exp(a-b))))

#####################
##Forward Algorithm##
#####################

#Function to run forward algorithm, arguments are n = # obs, m = # states for z,#
#k = # states for x, pi = initial distribution(m vector), 
#Tmat = transition matrix (mxm), phi = emission distribution (m x k matrix)#
#x is the observed data#
#takes log of pi, Tmat and phi
@numba.jit()
def forwardAlg(n, m, k, pi, Tmat, phi, x):
    g = np.zeros((n,m))
    for i in range(0,m):
        g[0,i] = (pi[i]) + (phi[i, x[0]])
    
    for j in range(1, n):
        for l in range(0, m):
            g[j,l] = logSumExp(g[j-1, :]+(Tmat[:,l])+(phi[l,x[j]-1]))
    return(g)

def pForward(g, x):
    pXf = logSumExp(g[len(x)-1,:])
    return(pXf)

In [40]:
m = 10
n1 = len(xNotes)
n2 = len(xVel)
pi1 = np.full(m, 1/m)
phi1 = np.full((m,k1), 1/k1)
Tmat1 = np.full((m,m), 1/m)

pi2 = np.full(m, 1/m)
phi2 = np.full((m,k2), 1/k2)
Tmat2 = np.full((m,m), 1/m)

In [47]:
g = forwardAlg(n1,m,k1,np.log(pi1),np.log(Tmat1),np.log(phi1),xNotes)
pXf = pForward(g,xNotes)
pXf

  if __name__ == '__main__':


-10190.648290046434

In [44]:
@numba.jit()
def backwardAlg(n, m, k, pi, Tmat, phi, x):
    r = np.zeros((n,m))
    for j in range(n-2, -1, -1):
        for l in range(0, m):
            r[j, l] = logSumExp(r[j+1,: ] + Tmat[l,:] + phi[:, x[j+1]])
    
    return(r)
@numba.jit()
#Function to return p(x_1:n) from matrix from backward algorithm
def pBackward(r, pi, phi, x):
    pXb = logSumExp(r[0,: ]+ pi +phi[:,x[0]])
    return(pXb)

In [45]:
r = backwardAlg(n1, m, k1, np.log(pi1), np.log(Tmat1), np.log(phi1), xNotes)
pBackward(r, np.log(pi1), np.log(phi1), xNotes)

  if __name__ == '__main__':
  from ipykernel import kernelapp as app


-10190.648290046433

In [48]:
@numba.jit()
def BaumWelch(n, m, k, x, tol):
    #randomly initialize pi, phi and T#
    vals = np.random.rand(m)
    pi = np.log(vals/np.sum(vals))
    Tmat = np.zeros(shape = (m, m))
    phi = np.zeros(shape = (m, k))
    for i in range(0, m):
        vals1 = np.random.rand(m)
        Tmat[i, ] = np.log(vals1/np.sum(vals1))
        vals2 = np.random.rand(k)
        phi[i, ] = np.log(vals2/np.sum(vals2))
    
    iterations = 0
    convergence = 0
    pOld = 1E10
    
    #Initialize matricies for gamma and beta values#
    gamma = np.zeros(shape = (n, m))
    beta = np.zeros(shape = (n,m,m))
    
    #Stop iterations when log(p(x_1:n)) differs by tol between iterations#
    while convergence == 0:
        #Perform forward and backward algorithms# 
        g = forwardAlg(n, m, k, pi, Tmat, phi, x)
        h = backwardAlg(n, m, k, pi, Tmat, phi, x)
        pNew = pForward(g, x)
        
        ##E-Step##
    
        #Calculate gamma and beta#
        for t in range(0, n):
            gamma[t,] = g[t,] + h[t,] - pNew
        for t in range(0, n):
            for i in range(0, m):
                for j in range(0, m):
                    if t == 1:
                        beta[t,i,j] = 1
                    else:
                        beta[t,i,j] = Tmat[i,j] + phi[j, x[t]] + g[t-1, i] + h[t, j] - pNew
        ##M-Step##
    
        #Update pi, phi and Tmat#
        pi = gamma[0,] - logSumExp(gamma[0,])
        for i in range(0, m):
            for j in range(0, m):
                Tmat[i,j] = logSumExp(beta[range(1, n), i, j]) - logSumExp(beta[range(1,n), i, ])
        for i in range(0,m):
            for w in range(0, k):
                indicies = np.where(x == w)
                phi[i,w] = logSumExp(gamma[indicies, i]) - logSumExp(gamma[:,i])
        
        criteria = abs(pOld - pNew)
        if criteria < tol:
            convergence = 1
        else:
            convergence = 0
            pOld = pNew
            iterations +=1
        return (iterations, pNew, np.exp(pi), np.exp(phi), np.exp(Tmat))
        

In [55]:
np.random.seed(77)
it1, p1, pi1, phi1, Tmat1 = BaumWelch(n1, 75, k1, xNotes, 0.0001)
it2, p2, pi2, phi2, Tmat2 = BaumWelch(n2, 75, k2, xVel, 0.0001)

  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [50]:
def decode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = code[x[i]]
    return output

In [51]:
decode(xNotes,possibleNotes)



array([ 64.,  37.,  49., ...,  37.,  44.,  49.])

In [52]:
def hmm(n, pi, phi, Tmat, code):
    m = Tmat.shape[0]
    k = phi.shape[1]
    zstates = range(0, m)
    xstates = range(0, k)
    z = np.zeros(n)
    x = np.zeros(n)
    z[0] = np.random.choice(zstates, size = 1, p = pi)
    for j in range(1, n):
        z[j] = np.random.choice(zstates, size = 1, p = Tmat[z[j-1], :])
    for i in range(0, n):
        x[i] = np.random.choice(xstates, size = 1, p = phi[z[i], :])
    output = decode(x, code)
    return output


In [56]:
newNotes = hmm(n1, pi1, phi1, Tmat1, possibleNotes)
newVelocities = hmm(n2, pi2, phi2, Tmat2, possibleVelocities)



In [57]:
output = pd.DataFrame(newNotes)
output["vel"] = newVelocities
output.to_csv("moonlightRemix.csv")