In [15]:
import numpy as np
import pandas as pd
import numba

In [9]:
clair = pd.read_csv("CSV/clair-de-lune.csv", skiprows = 192, header = None).dropna(axis = 0)
dvorak = pd.read_csv("CSV/Dvorak9Largo.csv", skiprows = 98, header = None).dropna(axis = 0)
jupiter = pd.read_csv("CSV/Jupiter.csv", skiprows = 39, header = None).dropna(axis = 0)
pachelbel = pd.read_csv("CSV/pachelbel.csv", skiprows = 27, header = None).dropna(axis = 0)

In [10]:
#Select Notes and velocity columns#
Cnotes = clair.ix[:,4]
Cvelocity = clair.ix[:,5]

#Find possible unique notes and velocities
CpossibleNotes = np.unique(Cnotes)
CpossibleVelocities =  np.unique(Cvelocity)


#Select Notes and velocity columns#
Dnotes = dvorak.ix[:,4]
Dvelocity = dvorak.ix[:,5]

#Find possible unique notes and velocities
DpossibleNotes = np.unique(Dnotes)
DpossibleVelocities =  np.unique(Dvelocity)

#Select Notes and velocity columns#
Jnotes = jupiter.ix[:,4]
Jvelocity = jupiter.ix[:,5]

#Find possible unique notes and velocities
JpossibleNotes = np.unique(Jnotes)
JpossibleVelocities =  np.unique(Jvelocity)

#Select Notes and velocity columns#
Pnotes = pachelbel.ix[:,4]
Pvelocity = pachelbel.ix[:,5]

#Find possible unique notes and velocities
PpossibleNotes = np.unique(Pnotes)
PpossibleVelocities =  np.unique(Pvelocity)

In [11]:
Ck1 = len(CpossibleNotes)
Ck2 = len(CpossibleVelocities)

Dk1 = len(DpossibleNotes)
Dk2 = len(DpossibleVelocities)

Jk1 = len(JpossibleNotes)
Jk2 = len(JpossibleVelocities)

Pk1 = len(PpossibleNotes)
Pk2 = len(PpossibleVelocities)


In [12]:
def encode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = int(np.where(code == x[i])[0]) 
    return output

In [13]:
CxNotes = encode(Cnotes, CpossibleNotes)
CxVel = encode(Cvelocity, CpossibleVelocities)

DxNotes = encode(Dnotes, DpossibleNotes)
DxVel = encode(Dvelocity, DpossibleVelocities)

JxNotes = encode(Jnotes, JpossibleNotes)
JxVel = encode(Jvelocity, JpossibleVelocities)

PxNotes = encode(Pnotes, PpossibleNotes)
PxVel = encode(Pvelocity, PpossibleVelocities)

In [16]:
#@numba.njit()
#Function using the log-sum-exp trick#
def logSumExp(a):
    b = np.max(a)
    return(b + np.log(np.sum(np.exp(a-b))))

#####################
##Forward Algorithm##
#####################

#Function to run forward algorithm, arguments are n = # obs, m = # states for z,#
#k = # states for x, pi = initial distribution(m vector), 
#Tmat = transition matrix (mxm), phi = emission distribution (m x k matrix)#
#x is the observed data#
#takes log of pi, Tmat and phi
@numba.jit()
def forwardAlg(n, m, k, pi, Tmat, phi, x):
    g = np.zeros((n,m))
    for i in range(0,m):
        g[0,i] = (pi[i]) + (phi[i, x[0]])
    
    for j in range(1, n):
        for l in range(0, m):
            g[j,l] = logSumExp(g[j-1, :]+(Tmat[:,l])+(phi[l,x[j]-1]))
    return(g)

def pForward(g, x):
    pXf = logSumExp(g[len(x)-1,:])
    return(pXf)

In [19]:
m = 10
Cn1 = len(CxNotes)
Cn2 = len(CxVel)

Dn1 = len(DxNotes)
Dn2 = len(DxVel)

Jn1 = len(JxNotes)
Jn2 = len(JxVel)

Pn1 = len(PxNotes)
Pn2 = len(PxVel)




pi1 = np.full(m, 1/m)
phi1 = np.full((m,k1), 1/k1)
Tmat1 = np.full((m,m), 1/m)

pi2 = np.full(m, 1/m)
phi2 = np.full((m,k2), 1/k2)
Tmat2 = np.full((m,m), 1/m)

In [47]:
g = forwardAlg(n1,m,k1,np.log(pi1),np.log(Tmat1),np.log(phi1),xNotes)
pXf = pForward(g,xNotes)
pXf

  if __name__ == '__main__':


-10190.648290046434

In [17]:
@numba.jit()
def backwardAlg(n, m, k, pi, Tmat, phi, x):
    r = np.zeros((n,m))
    for j in range(n-2, -1, -1):
        for l in range(0, m):
            r[j, l] = logSumExp(r[j+1,: ] + Tmat[l,:] + phi[:, x[j+1]])
    
    return(r)
@numba.jit()
#Function to return p(x_1:n) from matrix from backward algorithm
def pBackward(r, pi, phi, x):
    pXb = logSumExp(r[0,: ]+ pi +phi[:,x[0]])
    return(pXb)

In [45]:
r = backwardAlg(n1, m, k1, np.log(pi1), np.log(Tmat1), np.log(phi1), xNotes)
pBackward(r, np.log(pi1), np.log(phi1), xNotes)

  if __name__ == '__main__':
  from ipykernel import kernelapp as app


-10190.648290046433

In [18]:
@numba.jit()
def BaumWelch(n, m, k, x, tol):
    #randomly initialize pi, phi and T#
    vals = np.random.rand(m)
    pi = np.log(vals/np.sum(vals))
    Tmat = np.zeros(shape = (m, m))
    phi = np.zeros(shape = (m, k))
    for i in range(0, m):
        vals1 = np.random.rand(m)
        Tmat[i, ] = np.log(vals1/np.sum(vals1))
        vals2 = np.random.rand(k)
        phi[i, ] = np.log(vals2/np.sum(vals2))
    
    iterations = 0
    convergence = 0
    pOld = 1E10
    
    #Initialize matricies for gamma and beta values#
    gamma = np.zeros(shape = (n, m))
    beta = np.zeros(shape = (n,m,m))
    
    #Stop iterations when log(p(x_1:n)) differs by tol between iterations#
    while convergence == 0:
        #Perform forward and backward algorithms# 
        g = forwardAlg(n, m, k, pi, Tmat, phi, x)
        h = backwardAlg(n, m, k, pi, Tmat, phi, x)
        pNew = pForward(g, x)
        
        ##E-Step##
    
        #Calculate gamma and beta#
        for t in range(0, n):
            gamma[t,] = g[t,] + h[t,] - pNew
        for t in range(0, n):
            for i in range(0, m):
                for j in range(0, m):
                    if t == 1:
                        beta[t,i,j] = 1
                    else:
                        beta[t,i,j] = Tmat[i,j] + phi[j, x[t]] + g[t-1, i] + h[t, j] - pNew
        ##M-Step##
    
        #Update pi, phi and Tmat#
        pi = gamma[0,] - logSumExp(gamma[0,])
        for i in range(0, m):
            for j in range(0, m):
                Tmat[i,j] = logSumExp(beta[range(1, n), i, j]) - logSumExp(beta[range(1,n), i, ])
        for i in range(0,m):
            for w in range(0, k):
                indicies = np.where(x == w)
                phi[i,w] = logSumExp(gamma[indicies, i]) - logSumExp(gamma[:,i])
        
        criteria = abs(pOld - pNew)
        if criteria < tol:
            convergence = 1
        else:
            convergence = 0
            pOld = pNew
            iterations +=1
        return (iterations, pNew, np.exp(pi), np.exp(phi), np.exp(Tmat))
        

In [20]:
np.random.seed(77)
Cit1, Cp1, Cpi1, Cphi1, CTmat1 = BaumWelch(Cn1, 75, Ck1, CxNotes, 0.0001)
Cit2, Cp2, Cpi2, Cphi2, CTmat2 = BaumWelch(Cn2, 75, Ck2, CxVel, 0.0001)

Dit1, Dp1, Dpi1, Dphi1, DTmat1 = BaumWelch(Dn1, 75, Dk1, DxNotes, 0.0001)
Dit2, Dp2, Dpi2, Dphi2, DTmat2 = BaumWelch(Dn2, 75, Dk2, DxVel, 0.0001)

Jit1, Jp1, Jpi1, Jphi1, JTmat1 = BaumWelch(Jn1, 75, Jk1, JxNotes, 0.0001)
Jit2, Jp2, Jpi2, Jphi2, JTmat2 = BaumWelch(Jn2, 75, Jk2, JxVel, 0.0001)

Pit1, Pp1, Ppi1, Pphi1, PTmat1 = BaumWelch(Pn1, 75, Pk1, PxNotes, 0.0001)
Pit2, Pp2, Ppi2, Pphi2, PTmat2 = BaumWelch(Pn2, 75, Pk2, PxVel, 0.0001)

  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [21]:
def decode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = code[x[i]]
    return output

In [51]:
decode(xNotes,possibleNotes)



array([ 64.,  37.,  49., ...,  37.,  44.,  49.])

In [22]:
def hmm(n, pi, phi, Tmat, code):
    m = Tmat.shape[0]
    k = phi.shape[1]
    zstates = range(0, m)
    xstates = range(0, k)
    z = np.zeros(n)
    x = np.zeros(n)
    z[0] = np.random.choice(zstates, size = 1, p = pi)
    for j in range(1, n):
        z[j] = np.random.choice(zstates, size = 1, p = Tmat[z[j-1], :])
    for i in range(0, n):
        x[i] = np.random.choice(xstates, size = 1, p = phi[z[i], :])
    output = decode(x, code)
    return output


In [23]:
CnewNotes = hmm(Cn1, Cpi1, Cphi1, CTmat1, CpossibleNotes)
CnewVelocities = hmm(Cn2, Cpi2, Cphi2, CTmat2, CpossibleVelocities)

DnewNotes = hmm(Dn1, Dpi1, Dphi1, DTmat1, DpossibleNotes)
DnewVelocities = hmm(Dn2, Dpi2, Dphi2, DTmat2, DpossibleVelocities)

JnewNotes = hmm(Jn1, Jpi1, Jphi1, JTmat1, JpossibleNotes)
JnewVelocities = hmm(Jn2, Jpi2, Jphi2, JTmat2, JpossibleVelocities)

PnewNotes = hmm(Pn1, Ppi1, Pphi1, PTmat1, PpossibleNotes)
PnewVelocities = hmm(Pn2, Ppi2, Pphi2, PTmat2, PpossibleVelocities)



In [24]:
Coutput = pd.DataFrame(CnewNotes)
Coutput["vel"] = CnewVelocities
Coutput.to_csv("clair-de-luneRemix.csv")

Doutput = pd.DataFrame(DnewNotes)
Doutput["vel"] = DnewVelocities
Doutput.to_csv("Dvorak9Remix.csv")

Joutput = pd.DataFrame(JnewNotes)
Joutput["vel"] = JnewVelocities
Joutput.to_csv("jupiterRemix.csv")

Poutput = pd.DataFrame(PnewNotes)
Poutput["vel"] = PnewVelocities
Poutput.to_csv("pachelbelRemix.csv")