In [1]:
import numpy as np
import pandas as pd
import numba

In [97]:
clair = pd.read_csv("CSV/clair-de-lune.csv", skiprows = 192, header = None).dropna(axis = 0)
dvorak = pd.read_csv("CSV/Dvorak9Largo.csv", skiprows = 98, header = None).dropna(axis = 0)
jupiter = pd.read_csv("CSV/Jupiter.csv", skiprows = 39, header = None).dropna(axis = 0)
pachelbel = pd.read_csv("CSV/pachelbel.csv", skiprows = 27, header = None).dropna(axis = 0)

In [98]:
#Select Notes and velocity columns#
Cnotes = clair.ix[:,4]
Cvelocity = clair.ix[:,5]

#Find possible unique notes and velocities
CpossibleNotes = np.unique(Cnotes)
CpossibleVelocities =  np.unique(Cvelocity)


#Select Notes and velocity columns#
Dnotes = dvorak.ix[:,4]
Dvelocity = dvorak.ix[:,5]

#Find possible unique notes and velocities
DpossibleNotes = np.unique(Dnotes)
DpossibleVelocities =  np.unique(Dvelocity)

#Select Notes and velocity columns#
Jnotes = jupiter.ix[:,4]
Jvelocity = jupiter.ix[:,5]

#Find possible unique notes and velocities
JpossibleNotes = np.unique(Jnotes)
JpossibleVelocities =  np.unique(Jvelocity)

#Select Notes and velocity columns#
Pnotes = pachelbel.ix[:,4]
Pvelocity = pachelbel.ix[:,5]

#Find possible unique notes and velocities
PpossibleNotes = np.unique(Pnotes)
PpossibleVelocities =  np.unique(Pvelocity)

In [99]:
Ck1 = len(CpossibleNotes)
Ck2 = len(CpossibleVelocities)

Dk1 = len(DpossibleNotes)
Dk2 = len(DpossibleVelocities)

Jk1 = len(JpossibleNotes)
Jk2 = len(JpossibleVelocities)

Pk1 = len(PpossibleNotes)
Pk2 = len(PpossibleVelocities)


In [5]:
def encode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = int(np.where(code == x[i])[0]) 
    return output

In [100]:
CxNotes = encode(Cnotes, CpossibleNotes)
CxVel = encode(Cvelocity, CpossibleVelocities)

DxNotes = encode(Dnotes, DpossibleNotes)
DxVel = encode(Dvelocity, DpossibleVelocities)

JxNotes = encode(Jnotes, JpossibleNotes)
JxVel = encode(Jvelocity, JpossibleVelocities)

PxNotes = encode(Pnotes, PpossibleNotes)
PxVel = encode(Pvelocity, PpossibleVelocities)

In [7]:
#@numba.njit()
#Function using the log-sum-exp trick#
def logSumExp(a):
    b = np.max(a)
    return(b + np.log(np.sum(np.exp(a-b))))

#####################
##Forward Algorithm##
#####################

#Function to run forward algorithm, arguments are n = # obs, m = # states for z,#
#k = # states for x, pi = initial distribution(m vector), 
#Tmat = transition matrix (mxm), phi = emission distribution (m x k matrix)#
#x is the observed data#
#takes log of pi, Tmat and phi
@numba.jit()
def forwardAlg(n, m, k, pi, Tmat, phi, x):
    g = np.zeros((n,m))
    for i in range(0,m):
        g[0,i] = (pi[i]) + (phi[i, x[0]])
    
    for j in range(1, n):
        for l in range(0, m):
            g[j,l] = logSumExp(g[j-1, :]+(Tmat[:,l])+(phi[l,x[j]-1]))
    return(g)

def pForward(g, x):
    pXf = logSumExp(g[len(x)-1,:])
    return(pXf)

In [101]:
m = 10
Cn1 = len(CxNotes)
Cn2 = len(CxVel)

Dn1 = len(DxNotes)
Dn2 = len(DxVel)

Jn1 = len(JxNotes)
Jn2 = len(JxVel)

Pn1 = len(PxNotes)
Pn2 = len(PxVel)




In [47]:
g = forwardAlg(n1,m,k1,np.log(pi1),np.log(Tmat1),np.log(phi1),xNotes)
pXf = pForward(g,xNotes)
pXf

  if __name__ == '__main__':


-10190.648290046434

In [10]:
@numba.jit()
def backwardAlg(n, m, k, pi, Tmat, phi, x):
    r = np.zeros((n,m))
    for j in range(n-2, -1, -1):
        for l in range(0, m):
            r[j, l] = logSumExp(r[j+1,: ] + Tmat[l,:] + phi[:, x[j+1]])
    
    return(r)
@numba.jit()
#Function to return p(x_1:n) from matrix from backward algorithm
def pBackward(r, pi, phi, x):
    pXb = logSumExp(r[0,: ]+ pi +phi[:,x[0]])
    return(pXb)

In [45]:
r = backwardAlg(n1, m, k1, np.log(pi1), np.log(Tmat1), np.log(phi1), xNotes)
pBackward(r, np.log(pi1), np.log(phi1), xNotes)

  if __name__ == '__main__':
  from ipykernel import kernelapp as app


-10190.648290046433

In [17]:
N = 2
M = 3
At = np.ones((N,N))
Bt = np.ones((N,M,M))
Tmat = np.zeros((N*M, N*M))
count_ik = 0
count_jl = 0
count = 0
for i in range(0,N):
    for q in range(0,M):
        count_jl = 0
        for j in range(0, N):
            for l in range(0,M):
                
                Tmat[count_ik,count_jl] = At[i,j]*Bt[j,q,l]
                count_jl +=1
        count_ik +=1
Tmat

array([[ 1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.]])

In [36]:
#output = [l[i:i+N] for i in range(0, len(l), N)]
for j in range(0,M):
    for i in range(j,N*M,M):
        print(i)
#     for q in range(i*M, (i+1)*M):
#         print(q)


0
3
1
4
2
5


In [70]:
@numba.jit()
def BaumWelch(n, M, N, k, x, tol):
    #randomly initialize pi, phi and T#
    vals = np.random.rand(N*M)
    pi = np.log(vals/np.sum(vals))
    A = np.zeros(shape = (N, N))
    B = np.zeros(shape = (N, M, M))
    phi = np.zeros(shape = (N*M, k))
    for i in range(0, N):
        vals1 = np.random.rand(N)
        A[i, ] = np.log(vals1/np.sum(vals1))
        for j in range(0, M):
            vals3 = np.random.rand(M)
            B[i,j,] = np.log(vals3/np.sum(vals3))
    for i in range(0,N*M):
        vals2 = np.random.rand(k)
        phi[i, ] = np.log(vals2/np.sum(vals2))
    
    iterations = 0
    convergence = 0
    pOld = 1E10
    
    #Initialize matricies for gamma and beta values#
    gamma = np.zeros(shape = (n, N*M))
    beta = np.zeros(shape = (n,N*M,N*M))
    
    #Stop iterations when log(p(x_1:n)) differs by tol between iterations#
    while convergence == 0:
        
        #Find Tmat which is AxB
        Tmat = np.zeros((N*M, N*M))
        count_ik = 0
        count_jl = 0
        count = 0
        for i in range(0,N):
            for q in range(0,M):
                count_jl = 0
                for j in range(0, N):
                    for l in range(0,M):

                        Tmat[count_ik,count_jl] = A[i,j] + B[j,q,l]
                        count_jl +=1
                count_ik +=1
                
        #Perform forward and backward algorithms# 
        g = forwardAlg(n, N*M, k, pi, Tmat, phi, x)
        h = backwardAlg(n, N*M, k, pi, Tmat, phi, x)
        pNew = pForward(g, x)
        
        ##E-Step##
    
        #Calculate gamma and beta#
        for t in range(0, n):
            gamma[t,] = g[t,] + h[t,] - pNew
        for t in range(0, n):
            for i in range(0, N*M):
                for j in range(0, N*M):
                    if t == 1:
                        beta[t,i,j] = 1
                    else:
                        beta[t,i,j] = Tmat[i,j] + phi[j, x[t]] + g[t-1, i] + h[t, j] - pNew
        ##M-Step##
    
        #Update pi, phi and Tmat#
        pi = gamma[0,] - logSumExp(gamma[0,])
        
        
        
        for i in range(0,N):
            for j in range(0,N):
                Asums = []
                for q in range(i*M, (i+1)*M):
                    for l in range(j*M, (j+1)*M):
                        Asums.append(logSumExp(beta[range(1,n),q,l]))

                A[i,j] = logSumExp(Asums) 
        
        A = np.log(np.exp(A)/np.sum(np.exp(A), axis = 1)[:, np.newaxis])
        
        for j in range(0,N):
            for q in range(0,M):
                for l in range(0,M):
                    Bsums = []
                    for i in range(q,N*M,M):
                        Bsums.append(logSumExp(beta[range(1,n),i,M*j+l]))
                    B[j,q,l] = logSumExp(Bsums) 

        B = np.log(np.exp(B)/np.sum(np.exp(B), axis = 2)[:,:,np.newaxis])
        
        for i in range(0,N*M):
            for w in range(0, k):
                indicies = np.where(x == w)
                phi[i,w] = logSumExp(gamma[indicies, i]) - logSumExp(gamma[:,i])
        
        criteria = abs(pOld - pNew)
        if criteria < tol:
            convergence = 1
        else:
            convergence = 0
            pOld = pNew
            iterations +=1
        return (iterations, pNew, np.exp(pi), np.exp(phi), np.exp(Tmat), np.exp(A), np.exp(B))
        

In [None]:
np.random.seed(77)
Cit1, Cp1, Cpi1, Cphi1, CTmat1, CA1, CB1 = BaumWelch(Cn1, 10, 15, Ck1, CxNotes, 0.01)
Cit2, Cp2, Cpi2, Cphi2, CTmat2, CA2, CB2 = BaumWelch(Cn2, 10, 15, Ck2, CxVel, 0.01)

Dit1, Dp1, Dpi1, Dphi1, DTmat1, DA1, DB1 = BaumWelch(Dn1, 10, 15, Dk1, DxNotes, 0.01)
Dit2, Dp2, Dpi2, Dphi2, DTmat2, DA2, DB2 = BaumWelch(Dn2, 10, 15, Dk2, DxVel, 0.01)

# Jit1, Jp1, Jpi1, Jphi1, JTmat1, JA1, JB1 = BaumWelch(Jn1, 10,15, Jk1, JxNotes, 0.01)
# Jit2, Jp2, Jpi2, Jphi2, JTmat2, JA2, JB2 = BaumWelch(Jn2, 10, 15, Jk2, JxVel, 0.01)

Pit1, Pp1, Ppi1, Pphi1, PTmat1, PA1, PB1 = BaumWelch(Pn1, 10, 15, Pk1, PxNotes, 0.01)
Pit2, Pp2, Ppi2, Pphi2, PTmat2, PA2, PB2 = BaumWelch(Pn2, 10, 15, Pk2, PxVel, 0.01)

  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [80]:
JA1

array([[ 0.06658621,  0.06662086,  0.06664607,  0.06665262,  0.06665626,
         0.06672215,  0.06680797,  0.06666231,  0.06658309,  0.06660051,
         0.06666966,  0.06672067,  0.06679515,  0.06661199,  0.06666449],
       [ 0.06658393,  0.06659179,  0.06666926,  0.06677365,  0.06670953,
         0.06666468,  0.06672988,  0.06663478,  0.06671829,  0.0666793 ,
         0.06662215,  0.06671069,  0.06661376,  0.06655978,  0.06673855],
       [ 0.06670022,  0.06665726,  0.06659533,  0.06661806,  0.06668041,
         0.06659697,  0.06666382,  0.06670335,  0.06665937,  0.06668081,
         0.06672259,  0.06672374,  0.06660558,  0.06671536,  0.06667712],
       [ 0.06657743,  0.06670909,  0.0665961 ,  0.06665518,  0.06675621,
         0.06672827,  0.06673729,  0.06671742,  0.0667254 ,  0.06659915,
         0.06659227,  0.06663214,  0.06676997,  0.06662394,  0.06658015],
       [ 0.06654543,  0.06662375,  0.06669925,  0.06672344,  0.0667121 ,
         0.06661487,  0.0666884 ,  0.06667177, 

In [53]:
#randomly initialize pi, phi and T#
N = 2
M = 3
k = 3
n = 10
x = np.array([0,1,2,1,2,0,0,1,2,0])
vals = np.random.rand(N*M)
pi = np.log(vals/np.sum(vals))
A = np.zeros(shape = (N, N))
B = np.zeros(shape = (N, M, M))
phi = np.zeros(shape = (N*M, k))
for i in range(0, N):
    vals1 = np.random.rand(N)
    A[i, ] = np.log(vals1/np.sum(vals1))
    for j in range(0, M):
        vals3 = np.random.rand(M)
        B[i,j,] = np.log(vals3/np.sum(vals3))
for i in range(0,N*M):
    vals2 = np.random.rand(k)
    phi[i, ] = np.log(vals2/np.sum(vals2))

iterations = 0
convergence = 0
pOld = 1E10

#Initialize matricies for gamma and beta values#
gamma = np.zeros(shape = (n, N*M))
beta = np.zeros(shape = (n,N*M,N*M))

#Stop iterations when log(p(x_1:n)) differs by tol between iterations#


    #Find Tmat which is AxB
Tmat = np.zeros((N*M, N*M))
count_ik = 0
count_jl = 0
for i in range(0,N):
    for q in range(0,M):
        count_jl = 0
        for j in range(0, N):
            for l in range(0,M):

                Tmat[count_ik,count_jl] = A[i,j]*B[j,q,l]
                count_jl +=1
        count_ik +=1

#Perform forward and backward algorithms# 
g = forwardAlg(n, N*M, k, pi, Tmat, phi, x)
h = backwardAlg(n, N*M, k, pi, Tmat, phi, x)
pNew = pForward(g, x)

##E-Step##

#Calculate gamma and beta#
for t in range(0, n):
    gamma[t,] = g[t,] + h[t,] - pNew
for t in range(0, n):
    for i in range(0, N*M):
        for j in range(0, N*M):
            if t == 1:
                beta[t,i,j] = 1
            else:
                beta[t,i,j] = Tmat[i,j] + phi[j, x[t]] + g[t-1, i] + h[t, j] - pNew
##M-Step##

#Update pi, phi and Tmat#
pi = gamma[0,] - logSumExp(gamma[0,])


for i in range(0,N):
    for j in range(0,N):
        Asums = []
        for q in range(i*M, (i+1)*M):
            for l in range(j*M, (j+1)*M):
                Asums.append(logSumExp(beta[range(1,n),q,l]))
        
        A[i,j] = logSumExp(Asums) 

A = A/np.sum(A, axis = 1)[:, np.newaxis]

for j in range(0,N):
    for q in range(0,M):
        for l in range(0,M):
            Bsums = []
            for i in range(q,N*M,M):
                Bsums.append(logSumExp(beta[range(1,n),i,M*j+l]))
            B[j,q,l] = logSumExp(Bsums) 
            
B = B/np.sum(B, axis = 2)[:,:,np.newaxis]

for i in range(0,N*M):
    for w in range(0, k):
        indicies = np.where(x == w)
        phi[i,w] = logSumExp(gamma[indicies, i]) - logSumExp(gamma[:,i])

In [92]:
def decode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = code[x[i]]
    return output

In [51]:
decode(xNotes,possibleNotes)



array([ 64.,  37.,  49., ...,  37.,  44.,  49.])

In [89]:
int((2 - 2 % M)/M)

0

In [88]:
2 % M

2

In [93]:
def hmm(n, pi, phi, Tmat, A, B, code):
    N = A.shape[0]
    M = B.shape[1]
    k = phi.shape[1]
    zstates = range(0, N*M)
    rstates = range(0,N)
    sstates = range(0,M)
    xstates = range(0, k)
    R = np.zeros(n)
    S = np.zeros(n)
    x = np.zeros(n)
    z = np.random.choice(zstates, size = 1, p = pi)
    S[0] = z % M
    R[0] = int((z - z%M)/M)
    for j in range(1, n):
        R[j] = np.random.choice(rstates, size = 1, p = A[R[j-1], :])
        S[j] = np.random.choice(sstates, size = 1, p = B[R[j], S[j-1],:])
    for i in range(0, n):
        x[i] = np.random.choice(xstates, size = 1, p = phi[S[i], :])
    output = decode(x, code)
    return output


In [94]:
CnewNotes = hmm(Cn1, Cpi1, Cphi1, CTmat1, CA1, CB1, CpossibleNotes)
CnewVelocities = hmm(Cn2, Cpi2, Cphi2, CTmat2, CA2, CB2, CpossibleVelocities)

DnewNotes = hmm(Dn1, Dpi1, Dphi1, DTmat1, DA1, DB1, DpossibleNotes)
DnewVelocities = hmm(Dn2, Dpi2, Dphi2, DTmat2, DA2, DB2, DpossibleVelocities)

# JnewNotes = hmm(Jn1, Jpi1, Jphi1, JTmat1, JA1, JB1, JpossibleNotes)
# JnewVelocities = hmm(Jn2, Jpi2, Jphi2, JTmat2, JA2, JB2, JpossibleVelocities)

PnewNotes = hmm(Pn1, Ppi1, Pphi1, PTmat1, PA1, PB1, PpossibleNotes)
PnewVelocities = hmm(Pn2, Ppi2, Pphi2, PTmat2, PA2, PB2, PpossibleVelocities)



array([ 58.,  56.,  70., ...,  67.,  70.,  67.])

In [96]:
Coutput = pd.DataFrame(CnewNotes)
Coutput["vel"] = CnewVelocities
Coutput.to_csv("clair-de-luneRemix-2H.csv")

Doutput = pd.DataFrame(DnewNotes)
Doutput["vel"] = DnewVelocities
Doutput.to_csv("Dvorak9Remix-2H.csv")

# Joutput = pd.DataFrame(JnewNotes)
# Joutput["vel"] = JnewVelocities
# Joutput.to_csv("jupiterRemix-2H.csv")

Poutput = pd.DataFrame(PnewNotes)
Poutput["vel"] = PnewVelocities
Poutput.to_csv("pachelbelRemix-2H.csv")