In [69]:
import numpy as np
import pandas as pd

In [12]:
silentNight = pd.read_csv("silent_night_easy.csv", skiprows = 20, header = None).dropna(axis = 0)
silentNight

Unnamed: 0,0,1,2,3,4,5
0,2,0,Note_on_c,0,60,53
1,2,384,Note_on_c,0,69,56
2,2,391,Note_off_c,0,67,0
3,2,512,Note_on_c,0,67,61
4,2,514,Note_off_c,0,69,0
5,2,768,Note_on_c,0,64,54
6,2,768,Note_off_c,0,60,0
7,2,768,Note_on_c,0,60,57
8,2,773,Note_off_c,0,67,0
9,2,1536,Note_on_c,0,67,65


In [33]:
#Select Notes and velocity columns#
notes = silentNight.ix[:,4]
velocity = silentNight.ix[:,5]

#Find possible unique notes and velocities
possibleNotes = np.unique(notes)
possibleVelocities =  np.unique(velocity)
notes

0      60
1      69
2      67
3      67
4      69
5      64
6      60
7      60
8      67
9      67
10     60
11     60
12     64
13     69
14     67
15     67
16     69
17     64
18     60
19     60
20     67
21     60
22     59
23     59
24     57
25     64
26     74
27     57
28     55
29     74
       ..
117    72
118    55
119    60
120    71
121    76
122    60
123    60
124    72
125    76
126    72
127    60
128    60
129    67
130    72
131    64
132    67
133    67
134    60
135    55
136    64
137    65
138    67
139    62
140    65
141    60
142    55
143    60
144    62
145    60
146    60
Name: 4, dtype: float64

In [32]:
k1 = len(possibleNotes)
k2 = len(possibleVelocities)
possibleNotes

array([ 53.,  55.,  57.,  59.,  60.,  62.,  64.,  65.,  67.,  69.,  71.,
        72.,  74.,  76.,  77.])

In [30]:
int(np.where(possibleNotes == notes[0])[0])

4

In [36]:
def encode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = int(np.where(code == x[i])[0]) 
    return output

In [38]:
xNotes = encode(notes, possibleNotes)
xVel = encode(velocity, possibleVelocities)

In [41]:
#Function using the log-sum-exp trick#
def logSumExp(a):
    b = np.max(a)
    return(b + np.log(np.sum(np.exp(a-b))))

#####################
##Forward Algorithm##
#####################

#Function to run forward algorithm, arguments are n = # obs, m = # states for z,#
#k = # states for x, pi = initial distribution(m vector), 
#Tmat = transition matrix (mxm), phi = emission distribution (m x k matrix)#
#x is the observed data#
#takes log of pi, Tmat and phi

def forwardAlg(n, m, k, pi, Tmat, phi, x):
    g = np.zeros((n,m))
    for i in range(0,m):
        g[0,i] = (pi[i]) + (phi[i, x[0]])
    
    for j in range(1, n):
        for l in range(0, m):
            g[j,l] = logSumExp(g[j-1, :]+(Tmat[:,l])+(phi[l,x[j]-1]))
    return(g)

def pForward(g, x):
    pXf = logSumExp(g[len(x)-1,:])
    return(pXf)

In [39]:
m = 10
n1 = len(xNotes)
n2 = len(xVel)
pi1 = np.full(m, 1/m)
phi1 = np.full((m,k1), 1/k1)
Tmat1 = np.full((m,m), 1/m)

pi2 = np.full(m, 1/m)
phi2 = np.full((m,k2), 1/k2)
Tmat2 = np.full((m,m), 1/m)

In [43]:
g = forwardAlg(n1,m,k1,np.log(pi1),np.log(Tmat1),np.log(phi1),xNotes)
pXf = pForward(g,xNotes)
pXf



-398.08337956202354

In [44]:
def backwardAlg(n, m, k, pi, Tmat, phi, x):
    r = np.zeros((n,m))
    for j in range(n-2, -1, -1):
        for l in range(0, m):
            r[j, l] = logSumExp(r[j+1,: ] + Tmat[l,:] + phi[:, x[j+1]])
    
    return(r)

#Function to return p(x_1:n) from matrix from backward algorithm
def pBackward(r, pi, phi, x):
    pXb = logSumExp(r[0,: ]+ pi +phi[:,x[0]])
    return(pXb)

In [45]:
r = backwardAlg(n1, m, k1, np.log(pi1), np.log(Tmat1), np.log(phi1), xNotes)
pBackward(r, np.log(pi1), np.log(phi1), xNotes)



-398.08337956202354

In [46]:
def BaumWelch(n, m, k, x, tol):
    #randomly initialize pi, phi and T#
    vals = np.random.rand(m)
    pi = np.log(vals/np.sum(vals))
    Tmat = np.zeros(shape = (m, m))
    phi = np.zeros(shape = (m, k))
    for i in range(0, m):
        vals1 = np.random.rand(m)
        Tmat[i, ] = np.log(vals1/np.sum(vals1))
        vals2 = np.random.rand(k)
        phi[i, ] = np.log(vals2/np.sum(vals2))
    
    iterations = 0
    convergence = 0
    pOld = 1E10
    
    #Initialize matricies for gamma and beta values#
    gamma = np.zeros(shape = (n, m))
    beta = np.zeros(shape = (n,m,m))
    
    #Stop iterations when log(p(x_1:n)) differs by tol between iterations#
    while convergence == 0:
        #Perform forward and backward algorithms# 
        g = forwardAlg(n, m, k, pi, Tmat, phi, x)
        h = backwardAlg(n, m, k, pi, Tmat, phi, x)
        pNew = pForward(g, x)
        
        ##E-Step##
    
        #Calculate gamma and beta#
        for t in range(0, n):
            gamma[t,] = g[t,] + h[t,] - pNew
        for t in range(0, n):
            for i in range(0, m):
                for j in range(0, m):
                    if t == 1:
                        beta[t,i,j] = 1
                    else:
                        beta[t,i,j] = Tmat[i,j] + phi[j, x[t]] + g[t-1, i] + h[t, j] - pNew
        ##M-Step##
    
        #Update pi, phi and Tmat#
        pi = gamma[0,] - logSumExp(gamma[0,])
        for i in range(0, m):
            for j in range(0, m):
                Tmat[i,j] = logSumExp(beta[range(1, n), i, j]) - logSumExp(beta[range(1,n), i, ])
        for i in range(0,m):
            for w in range(0, k):
                indicies = np.where(x == w)
                phi[i,w] = logSumExp(gamma[indicies, i]) - logSumExp(gamma[:,i])
        
        criteria = abs(pOld - pNew)
        if criteria < tol:
            convergence = 1
        else:
            convergence = 0
            pOld = pNew
            iterations +=1
        return (iterations, pNew, np.exp(pi), np.exp(phi), np.exp(Tmat))
        

In [73]:
np.random.seed(17)
it1, p1, pi1, phi1, Tmat1 = BaumWelch(n1, 50, k1, xNotes, 0.0001)
it2, p2, pi2, phi2, Tmat2 = BaumWelch(n2, 50, k2, xVel, 0.0001)



In [57]:
def decode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = code[x[i]]
    return output

In [58]:
decode(xNotes,possibleNotes)



array([ 60.,  69.,  67.,  67.,  69.,  64.,  60.,  60.,  67.,  67.,  60.,
        60.,  64.,  69.,  67.,  67.,  69.,  64.,  60.,  60.,  67.,  60.,
        59.,  59.,  57.,  64.,  74.,  57.,  55.,  74.,  74.,  71.,  55.,
        55.,  74.,  55.,  55.,  72.,  55.,  60.,  71.,  72.,  72.,  67.,
        60.,  60.,  72.,  67.,  69.,  60.,  53.,  69.,  69.,  72.,  53.,
        53.,  69.,  71.,  72.,  69.,  71.,  67.,  53.,  60.,  69.,  69.,
        67.,  67.,  69.,  64.,  60.,  60.,  67.,  64.,  69.,  60.,  53.,
        69.,  69.,  72.,  53.,  53.,  69.,  71.,  72.,  69.,  71.,  67.,
        53.,  60.,  69.,  69.,  67.,  67.,  69.,  64.,  60.,  60.,  67.,
        60.,  59.,  59.,  57.,  64.,  74.,  57.,  55.,  74.,  74.,  77.,
        55.,  55.,  74.,  74.,  77.,  71.,  74.,  72.,  55.,  60.,  71.,
        76.,  60.,  60.,  72.,  76.,  72.,  60.,  60.,  67.,  72.,  64.,
        67.,  67.,  60.,  55.,  64.,  65.,  67.,  62.,  65.,  60.,  55.,
        60.,  62.,  60.,  60.])

In [64]:
def hmm(n, pi, phi, Tmat, code):
    m = Tmat.shape[0]
    k = phi.shape[1]
    zstates = range(0, m)
    xstates = range(0, k)
    z = np.zeros(n)
    x = np.zeros(n)
    z[0] = np.random.choice(zstates, size = 1, p = pi)
    for j in range(1, n):
        z[j] = np.random.choice(zstates, size = 1, p = Tmat[z[j-1], :])
    for i in range(0, n):
        x[i] = np.random.choice(xstates, size = 1, p = phi[z[i], :])
    output = decode(x, code)
    return output


In [74]:
newNotes = hmm(n1, pi1, phi1, Tmat1, possibleNotes)
newVelocities = hmm(n2, pi2, phi2, Tmat2, possibleVelocities)



In [83]:
output = pd.DataFrame(newNotes)
output["vel"] = newVelocities
output.to_csv("silentNightRemix.csv")