In [1]:
import numpy as np
import pandas as pd
import numba

In [2]:
clair = pd.read_csv("CSV/clair-de-lune.csv", skiprows = 192, header = None).dropna(axis = 0)
dvorak = pd.read_csv("CSV/Dvorak9Largo.csv", skiprows = 98, header = None).dropna(axis = 0)
jupiter = pd.read_csv("CSV/Jupiter.csv", skiprows = 39, header = None).dropna(axis = 0)
pachelbel = pd.read_csv("CSV/pachelbel.csv", skiprows = 27, header = None).dropna(axis = 0)

In [3]:
#Select Notes and velocity columns#
Cnotes = clair.ix[:,4]
Cvelocity = clair.ix[:,5]

#Find possible unique notes and velocities
CpossibleNotes = np.unique(Cnotes)
CpossibleVelocities =  np.unique(Cvelocity)


#Select Notes and velocity columns#
Dnotes = dvorak.ix[:,4]
Dvelocity = dvorak.ix[:,5]

#Find possible unique notes and velocities
DpossibleNotes = np.unique(Dnotes)
DpossibleVelocities =  np.unique(Dvelocity)

#Select Notes and velocity columns#
Jnotes = jupiter.ix[:,4]
Jvelocity = jupiter.ix[:,5]

#Find possible unique notes and velocities
JpossibleNotes = np.unique(Jnotes)
JpossibleVelocities =  np.unique(Jvelocity)

#Select Notes and velocity columns#
Pnotes = pachelbel.ix[:,4]
Pvelocity = pachelbel.ix[:,5]

#Find possible unique notes and velocities
PpossibleNotes = np.unique(Pnotes)
PpossibleVelocities =  np.unique(Pvelocity)

In [4]:
Ck1 = len(CpossibleNotes)
Ck2 = len(CpossibleVelocities)

Dk1 = len(DpossibleNotes)
Dk2 = len(DpossibleVelocities)

Jk1 = len(JpossibleNotes)
Jk2 = len(JpossibleVelocities)

Pk1 = len(PpossibleNotes)
Pk2 = len(PpossibleVelocities)


In [5]:
def encode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = int(np.where(code == x[i])[0]) 
    return output

In [6]:
CxNotes = encode(Cnotes, CpossibleNotes)
CxVel = encode(Cvelocity, CpossibleVelocities)

DxNotes = encode(Dnotes, DpossibleNotes)
DxVel = encode(Dvelocity, DpossibleVelocities)

JxNotes = encode(Jnotes, JpossibleNotes)
JxVel = encode(Jvelocity, JpossibleVelocities)

PxNotes = encode(Pnotes, PpossibleNotes)
PxVel = encode(Pvelocity, PpossibleVelocities)

In [14]:
#@numba.njit()
#Function using the log-sum-exp trick#
def logSumExp(a):
    b = np.max(a)
    return(b + np.log(np.sum(np.exp(a-b))))

#####################
##Forward Algorithm##
#####################

#Function to run forward algorithm, arguments are n = # obs, m = # states for z,#
#k = # states for x, pi = initial distribution(m vector), 
#Tmat = transition matrix (mxm), phi = emission distribution (m x k matrix)#
#x is the observed data#
#takes log of pi, Tmat and phi
@numba.jit()
def forwardAlg(n, m, k, pi, Tmat, T2mat, phi, x):
    g = np.zeros((n,m))
    G = np.zeros((n, m, m))
    for i in range(0,m):
        g[0,i] = (pi[i]) + (phi[i, x[0]])
        for j in range(0, m):
            G[1,i,j] = g[0,i] + Tmat[i,j] + phi[j, x[1]]
    
    for j in range(1, n):
        for l in range(0, m):
            g[j,l] = logSumExp(g[j-1, :]+(Tmat[:,l])+(phi[l,x[j]]))
            if j != 1:
                for t in range(0,m):
                    G[j,l,t] = logSumExp(G[j-1,:,l] + T2mat[:,l,t] + phi[t, x[j]])
    return(g, G)



def pForward(g, x):
    pXf = logSumExp(g[len(x),:])
    return(pXf)

In [15]:
m = 10
Cn1 = len(CxNotes)
Cn2 = len(CxVel)

Dn1 = len(DxNotes)
Dn2 = len(DxVel)

Jn1 = len(JxNotes)
Jn2 = len(JxVel)

Pn1 = len(PxNotes)
Pn2 = len(PxVel)

k1 = len(JpossibleNotes)


pi1 = np.full(m, 1/m)
phi1 = np.full((m,k1), 1/k1)
Tmat1 = np.full((m,m), 1/m)
T2mat1 = np.full((m,m,m), 1/m)
g1, G1 = forwardAlg(Jn1, m, k1, np.log(pi1), np.log(Tmat1), np.log(T2mat1), np.log(phi1), JxNotes)
G1



array([[[    0.        ,     0.        ,     0.        , ...,
             0.        ,     0.        ,     0.        ],
        [    0.        ,     0.        ,     0.        , ...,
             0.        ,     0.        ,     0.        ],
        [    0.        ,     0.        ,     0.        , ...,
             0.        ,     0.        ,     0.        ],
        ..., 
        [    0.        ,     0.        ,     0.        , ...,
             0.        ,     0.        ,     0.        ],
        [    0.        ,     0.        ,     0.        , ...,
             0.        ,     0.        ,     0.        ],
        [    0.        ,     0.        ,     0.        , ...,
             0.        ,     0.        ,     0.        ]],

       [[  -11.19684392,   -11.19684392,   -11.19684392, ...,
           -11.19684392,   -11.19684392,   -11.19684392],
        [  -11.19684392,   -11.19684392,   -11.19684392, ...,
           -11.19684392,   -11.19684392,   -11.19684392],
        [  -11.19684392,

In [47]:
g = forwardAlg(n1,m,k1,np.log(pi1),np.log(Tmat1),np.log(phi1),xNotes)
pXf = pForward(g,xNotes)
pXf

  if __name__ == '__main__':


-10190.648290046434

In [9]:
@numba.jit()
def backwardAlg(n, m, k, pi, Tmat, phi, x):
    r = np.zeros((n,m))
    for j in range(n-2, -1, -1):
        for l in range(0, m):
            r[j, l] = logSumExp(r[j+1,: ] + Tmat[l,:] + phi[:, x[j+1]])
    
    return(r)
@numba.jit()
#Function to return p(x_1:n) from matrix from backward algorithm
def pBackward(r, pi, phi, x):
    pXb = logSumExp(r[0,: ]+ pi +phi[:,x[0]])
    return(pXb)

In [18]:
# def pForward(g, x):
#     pXf = logSumExp(g[len(x),:])
#     return(pXf)
len(JxNotes)

1166

In [45]:
r = backwardAlg(n1, m, k1, np.log(pi1), np.log(Tmat1), np.log(phi1), xNotes)
pBackward(r, np.log(pi1), np.log(phi1), xNotes)

  if __name__ == '__main__':
  from ipykernel import kernelapp as app


-10190.648290046433

In [16]:
@numba.jit()
def BaumWelch(n, m, k, x, tol):
    #randomly initialize pi, phi and T#
    vals = np.random.rand(m)
    pi = np.log(vals/np.sum(vals))
    Tmat = np.zeros(shape = (m, m))
    phi = np.zeros(shape = (m, k))
    T2matOld = np.zeros(shape = (m,m,m))
    T2matNew = np.zeros(shape = (m,m,m))
    for i in range(0, m):
        vals1 = np.random.rand(m)
        Tmat[i, ] = np.log(vals1/np.sum(vals1))
        vals2 = np.random.rand(k)
        phi[i, ] = np.log(vals2/np.sum(vals2))
        for j in range(0,m):
            vals3 = np.random.rand(m)
            T2matOld[i,j, ] = np.log(vals3/np.sum(vals3))
    
    iterations = 0
    convergence = 0
    pOld = 1E10
    
    #Initialize matricies for gamma and beta values#
    gamma = np.zeros(shape = (n, m))
    beta = np.zeros(shape = (n,m,m))
    
    #Stop iterations when log(p(x_1:n)) differs by tol between iterations#
    while convergence == 0:
        #Perform forward and backward algorithms# 
        g = forwardAlg(n, m, k, pi, Tmat, T2matOld, phi, x)
        h = backwardAlg(n, m, k, pi, Tmat, phi, x)
        pNew = pForward(g, x)
        
        ##E-Step##
    
        #Calculate gamma and beta#
        for t in range(0, n):
            gamma[t,] = g[t,] + h[t,] - pNew
        for t in range(0, n):
            for i in range(0, m):
                for j in range(0, m):
                    if t == 1:
                        beta[t,i,j] = 1
                    else:
                        beta[t,i,j] = Tmat[i,j] + phi[j, x[t]] + g[t-1, i] + h[t, j] - pNew
        ##M-Step##
    
        #Update pi, phi and Tmat#
        pi = gamma[0,] - logSumExp(gamma[0,])
        for i in range(0, m):
            for j in range(0, m):
                Tmat[i,j] = logSumExp(beta[range(1, n), i, j]) - logSumExp(beta[range(1,n), i, ])
                for q in range(0,m):
                    T2matNew[i,j,q] = logSumExp(G[range(1, n-1),i,j] + T2matOld[i,j,q] + 
                                                phiOld[k, x[2:n]] + h[range(2,n),q])
                T2matNew[i,j,] = T2matNew[i,j,] - logSumExp(T2matNew[i,j,:])
        for i in range(0,m):
            for w in range(0, k):
                indicies = np.where(x == w)
                phi[i,w] = logSumExp(gamma[indicies, i]) - logSumExp(gamma[:,i])
        
        criteria = abs(pOld - pNew)
        if criteria < tol:
            convergence = 1
        else:
            convergence = 0
            pOld = pNew
            T2matOld = T2matNew
            iterations +=1
        return (iterations, pNew, np.exp(pi), np.exp(phi), np.exp(Tmat), np.exp(T2mat))
        


In [17]:
np.random.seed(17)
Jit1, Jp1, Jpi1, Jphi1, JTmat1, JT2mat1 = BaumWelch(Jn1, 10, Jk1, JxNotes.astype(int), 0.0001)
Jit2, Jp2, Jpi2, Jphi2, JTmat2, JT2mat2 = BaumWelch(Jn2, 10, Jk2, JxVel.astype(int), 0.0001)

TypeError: tuple indices must be integers or slices, not tuple

In [57]:
m

10

In [20]:
JT2mat1

array([[[ 0.00031263,  0.0172487 ,  0.01132697, ...,  0.02456528,
          0.01827668,  0.01928061],
        [ 0.00928684,  0.01893023,  0.0139213 , ...,  0.00351702,
          0.01146965,  0.01202267],
        [ 0.00178214,  0.01304072,  0.00337616, ...,  0.01176533,
          0.01587449,  0.01713996],
        ..., 
        [ 0.00513735,  0.0079902 ,  0.00933119, ...,  0.01176925,
          0.00328517,  0.00864295],
        [ 0.01279221,  0.01784206,  0.00057036, ...,  0.0205215 ,
          0.0164821 ,  0.00434566],
        [ 0.01790324,  0.01520443,  0.00890094, ...,  0.0191409 ,
          0.00652243,  0.00424464]],

       [[ 0.01650984,  0.01263545,  0.0236473 , ...,  0.02057817,
          0.01267448,  0.01278765],
        [ 0.02242092,  0.00853511,  0.01561717, ...,  0.01391078,
          0.00758901,  0.0046699 ],
        [ 0.00526881,  0.01140236,  0.01978073, ...,  0.00846643,
          0.01634448,  0.01963015],
        ..., 
        [ 0.01739302,  0.0022545 ,  0.02512482, ...,

In [25]:
np.random.seed(732)
Cit1, Cp1, Cpi1, Cphi1, CTmat1, CT2mat1 = BaumWelch(Cn1, 75, Ck1, CxNotes.astype(int), 0.0001)
Cit2, Cp2, Cpi2, Cphi2, CTmat2, CT2mat2 = BaumWelch(Cn2, 75, Ck2, CxVel.astype(int), 0.0001)

Dit1, Dp1, Dpi1, Dphi1, DTmat1, DT2mat1 = BaumWelch(Dn1, 75, Dk1, DxNotes.astype(int), 0.0001)
Dit2, Dp2, Dpi2, Dphi2, DTmat2, DT2mat2 = BaumWelch(Dn2, 75, Dk2, DxVel, 0.0001)


Pit1, Pp1, Ppi1, Pphi1, PTmat1, PT2mat1 = BaumWelch(Pn1, 75, Pk1, PxNotes.astype(int), 0.0001)
Pit2, Pp2, Ppi2, Pphi2, PTmat2, PT2mat2 = BaumWelch(Pn2, 75, Pk2, PxVel.astype(int), 0.0001)

KeyboardInterrupt: 

In [26]:
def decode(x, code):
    output = np.zeros(len(x))
    for i in range(0, len(x)):
        output[i] = code[x[i]]
    return output

In [51]:
decode(xNotes,possibleNotes)



array([ 64.,  37.,  49., ...,  37.,  44.,  49.])

In [30]:
def hmm(n, pi, phi, Tmat, T2mat, code):
    m = Tmat.shape[0]
    k = phi.shape[1]
    zstates = range(0, m)
    xstates = range(0, k)
    z = np.zeros(n)
    x = np.zeros(n)
    z[0] = np.random.choice(zstates, size = 1,  p = pi)
    z[1] = np.random.choice(zstates, size = 1,  p = Tmat[z[0], :])
    for j in range(2, n):
        z[j] = np.random.choice(zstates, size = 1,  p = T2mat[z[j-2],z[j-1], :])
    for i in range(0, n):
        x[i] = np.random.choice(xstates, size =1, p = phi[z[i], :])
    output = decode(x, code)
    return output


In [34]:
zstates = range(0, m)
Jpi1

0.011695355479488755

In [47]:
#CnewNotes = hmm(Cn1, Cpi1, Cphi1, CTmat1, CT2mat1, CpossibleNotes)
#CnewVelocities = hmm(Cn2, Cpi2, Cphi2, CTmat2, CT2mat2, CpossibleVelocities)

# DnewNotes = hmm(Dn1, Dpi1, Dphi1, DTmat1, DpossibleNotes)
# DnewVelocities = hmm(Dn2, Dpi2, Dphi2, DTmat2, DpossibleVelocities)

JnewNotes = hmm(Jn1, Jpi1, Jphi1, JTmat1, JT2mat1, JpossibleNotes)
JnewVelocities = hmm(Jn2, Jpi2, Jphi2, JTmat2, JT2mat2,JpossibleVelocities)

# PnewNotes = hmm(Pn1, Ppi1, Pphi1, PTmat1, PpossibleNotes)
# PnewVelocities = hmm(Pn2, Ppi2, Pphi2, PTmat2, PpossibleVelocities)



In [50]:
JnewNotes[1:100]
JpossibleNotes

array([ 34.,  36.,  38.,  39.,  41.,  43.,  44.,  46.,  48.,  50.,  51.,
        53.,  55.,  56.,  58.,  60.,  62.,  63.,  65.,  67.,  68.,  70.,
        72.,  74.,  75.,  77.,  79.])

In [42]:
# Coutput = pd.DataFrame(CnewNotes)
# Coutput["vel"] = CnewVelocities
# Coutput.to_csv("clair-de-luneRemix.csv")

# Doutput = pd.DataFrame(DnewNotes)
# Doutput["vel"] = DnewVelocities
# Doutput.to_csv("Dvorak9Remix.csv")

Joutput = pd.DataFrame(JnewNotes)
Joutput["vel"] = JnewVelocities
Joutput.to_csv("jupiterRemix2.csv")

# Poutput = pd.DataFrame(PnewNotes)
# Poutput["vel"] = PnewVelocities
# Poutput.to_csv("pachelbelRemix.csv")