In [224]:
import numpy as np
from numpy import log, exp, zeros, dot, array
import itertools
import pandas as pd
from itertools import product
import timeit
import matplotlib as mpl
import numpy as np
from matplotlib import pyplot as plt
import numpy as np
import warnings
import scipy.io
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.models import Model
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Input

Using Theano backend.
Using gpu device 0: GeForce GTX 1070 (CNMeM is disabled, cuDNN 5105)


In [380]:
def log_sum_exp(x):
    
    m = x.max()
    x_s = x - m
    return m + log((exp(x_s)).sum())


def label_seq(l,j):
    '''Creates a list of all posible length l sequences taking values in
       {0,1,...,j-1}. Out put list is of length j^l'''
    
    
    return list(product(list(range(j)), repeat=l))

def find_legit_vals(k,l,n):
    '''Finds the only possible label pairs (i,j) with f_k(i,j,x,t) != 0.
       Will be used to speed up gradient computation.
       l: Number of labels
       n: Size of a nodes feature vector'''
    
    res = []
    k_s = k - n*l
    
    if k_s < 0:  #Means k is in the Unary features
        
        i = k // n
        
        for j in range(l):
            
            res.append((i,j))
            
    else:  #Means k is a Binary feature
        
        f = (k_s // n)
        j = f % l
        i = f // l
        res.append((i,j))  
    
    return res

C_init = np.array([1,1,0])

class CRF(object):
    
    def __init__(self, feature_function, K, L, lb=1, W = 'none'):
        ''' If our labels belong to space L and our observations belonging to X amd
            then length of our chain is T (call {0,1, ... , T-1} = T') then 
            feature_functions is a vector valued function, f: L^2 x X x T' --> R^K
            i.e. f(i,j,x,t) is a K-d real valued vector and has component functions of the form
            f_k(i,j,x,t) to be specified, think of (i,j) = (y_t, y_t-1). It will have an optional
            keyword argument 'project' which defaults to -1, if a positive integer k,
            it will project onto the k-th component.
            
            K: # of Features
            
            L: Number of labels. Will assume labels have been encoded as integers in {0,...,L-1} 
            
            lamb: L2 regularization constant'''
        
        self.K = K
        self.L = L
        self.N = K//L - L
        self.Lambda = lb
        self.f_x = feature_function
        
        if type(W) == str:
            if W == 'none':
                self.W = np.random.randn(K)
            if W == 'U':
                self.W = np.zeros(K)
                self.W[0] = 1
                
            if W == 'B':
                self.W = np.ones(K)
                self.W[1:] = np.random.randn(K-1)
        else:
            self.W = W
            
    def get_weights(self):
        
        return np.asarray(self.W)
        
    def  log_forward(self, x):
        '''This computes the log(alphas) as in the forward-backward algorithm in order to
           be used for inference tasks later on.
           x is an observation.'''
        
        f = self.f_x
        T = x.shape[0]
        alphas = zeros((T, self.L))
        
        # Initialization
        
        for l in range(self.L):
            
            alphas[0,l] = dot(self.W, f(l,0,x,0,self.N,self.L))
            
        # Recursion
        
        for t in range(1,T):
            
            for l in range(self.L):
                
                psi = array([dot(self.W, f(l,i,x,t,self.N,self.L)) for i in range(self.L)])
                
                alphas[t,l] = log_sum_exp(psi + alphas[t-1])
            
        return alphas
    
        
    def log_backward(self, x):
        '''This computes the log(betas) as in the forward-backward algorithm in order to
           be used for inference tasks later on.
           x is an observation.'''
        
        # Initialization
        
        f = self.f_x
        T = x.shape[0]
        betas = np.ones((T, self.L))
      
        # Recursion
        
        for t in range(T-2,-1,-1):
            
            for l in range(self.L):
                
                psi = array([dot(self.W, f(i,l,x,t+1,self.N,self.L)) for i in range(self.L)])
                
                betas[t][l] = log_sum_exp(psi + betas[t+1])
                
        return betas
    
    
    def log_partition(self, x):
        '''Efficient computation of the log of the partition function Z(x) appearing in CRF model.
           Input an observation and inital label (for forward algorithm) and output is log(Z(x))'''
        
        alphas = self.log_forward(x)
        
        return log_sum_exp(alphas[-1])
    
    
    def MAP(self, x):
        '''Viterbi algortithm for computing the most likely label of a sequence with
           given observation vector x using maximum a posteriori estimation. Using log
           sum version for numeric stability'''
        
        f = self.f_x
        T = x.shape[0]   
        # Initialization
        
        deltas = np.zeros((T, self.L))
        delt_arg = np.zeros((T, self.L))
        
        for l in range(self.L):
            
            deltas[0][l] = dot(self.W, f(l,0,x,0,self.N,self.L))  # Not sure about this.
            
        # Recursion
        
        for t in range(1,T):
            
            for l in range(self.L):
                
                psi = array([dot(self.W, f(l,i,x,t,self.N,self.L)) for i in range(self.L)])
                
                deltas[t][l] = (psi + deltas[t-1]).max()
                delt_arg[t][l] = (psi + deltas[t-1]).argmax()
            
        map_lab = np.zeros(T, dtype='i4')
        map_lab[-1] = deltas[-1].argmax()
        
        for t in range(T-2,-1,-1):
            
            map_lab[t] = delt_arg[t+1][map_lab[t+1]]
            
                
        return tuple(map_lab)
        
        
    def marginal(self,i,j,x,t):
        '''Using the forward backward algorithm to compute the marginal p(y_t-1=i,y_t=j|x)'''
        
        f = self.f_x
        alphas = self.log_forward(x)
        betas = self.log_backward(x)
        psi = dot(self.W,f(j,i,x,t))
        psi_b = np.array([dot(self.W,f(k,0,x,0,self.N,self.L)) for k in range(self.L)])
        log_joint = alphas[t-1][i] + psi + betas[t][j] - log_sum_exp(psi_b + betas[0])
        
        return exp(log_joint)
                       
    def naive_comp(self, x, out='Z'):
        '''Brute force computation of log(Z(x)) or MAP (if out = 'MAP')'''
        
        f = self.f_x
        T = x.shape[0]
        
        # Get List of all possible label sequences
        
        lab_seq = label_seq(T, self.L)
        
        psi = np.zeros(len(lab_seq))
        
        for k in range(len(lab_seq)):
            
            lab = lab_seq[k]
            temp = np.zeros(T)
            temp[0] = dot(self.W, f(lab[0], 0, x, 0,self.N,self.L))
            
            for t in range(1,T):
                
                temp[t] = dot(self.W, f(lab[t], lab[t-1], x, t))
            
            psi[k] = temp.sum()
        
        arg_m_i = psi.argmax()
        
        return log_sum_exp(psi) if out == 'Z' else lab_seq[arg_m_i]
    
    
    def log_pseudo_marg(self, i, j, k, x, t):
        '''Computes the log probability for pseudolikelihood training.
            i = y_t
            j = y_t-1
            k = y_t+1'''
        
        f = self.f_x
        Z_c = np.zeros(self.L)
        L = self.L
        M = self.N
        W = self.W[1:]
        W_u = self.W[:L*M]
        if t == 0:
            
            for l in range(self.L):
                
                Z_c[l] = f(l,0,x,t,M,L)[0] + W[L*k + l]
            
            return f(i,0,x,t,M,L)[0] + W[L*k + i] - log_sum_exp(Z_c)
        
        elif 0 < t and t < x.shape[0]-1:
            
            for l in range(self.L):
                
                Z_c[l] = f(l,0,x,t,M,L)[0] + W[L*l + j] + W[L*k + l]
            
            return f(i,0,x,t,M,L)[0] + W[L*k + i] + W[L*i + j] - log_sum_exp(Z_c)
        
        else:
            
            for l in range(self.L):
                
                Z_c[l] = f(l,0,x,t,M,L)[0] + W[L*l + j]
            
            return f(i,0,x,t,M,L)[0] + W[L*i + j] - log_sum_exp(Z_c)
            
    def grad_pll(self, X, Y):
        '''Gradient of the (negayive) - pseudo log likelihood.'''
        
        f = self.f_x
        lb = self.Lambda
        L = self.L
        M = self.N
        #grad_u = np.zeros(M*L)
        grad_b = np.zeros(L**2)
        
        for n in range(X.shape[0]):
            
            T_n = X[n].shape[0]
            
            for t in range(T_n):
                
                E = np.zeros(L**2)
                #E_u = np.zeros(M*L)
                for l in range(self.L):
                    
                    if t == 0:
                        
                        E += (np.exp(self.log_pseudo_marg(l,0,Y[n][t+1],X[n],0)) * 
                              f(Y[n][t+1],l,X[n],0,M,L)[1:])
                        #E_u += (np.exp(self.log_pseudo_marg(l,0,Y[n][t+1],X[n],0)) * 
                                #f(l,0,X[n],0,M,L)[:M*L])
                        
                    elif 0 < t and t < T_n - 1:
                        
                        E += (np.exp(self.log_pseudo_marg(l,Y[n][t-1],Y[n][t+1],X[n],t))*
                              (f(Y[n][t+1],l,X[n],t,M,L)[1:] +
                                f(l,Y[n][t-1],X[n],t,M,L)[1:]))
                        #E_u += (np.exp(self.log_pseudo_marg(l,Y[n][t-1],Y[n][t+1],X[n],t))*
                                #f(l,0,X[n],t,M,L)[:M*L])
                        
                    else:
                        
                        E += np.exp(self.log_pseudo_marg(l,Y[n][t-1],0,X[n],t)) * f(l,Y[n][t-1],X[n],t,M,L)[1:]
                        #E_u += np.exp(self.log_pseudo_marg(l,Y[n][t-1],0,X[n],t)) * f(l,0,X[n],t,M,L)[:M*L]
                    
                        
                if t == 0:
                    
                    grad_b += f(Y[n][t+1], Y[n][t],X[n],t,M,L)[1:]-E
                    
                elif 0 < t and t < T_n-1:
                    
                    grad_b += f(Y[n][t+1],Y[n][t],X[n],t,M,L)[1:]+f(Y[n][t],Y[n][t-1],X[n],t,M,L)[1:]-E
                    
                else: 
                    
                    grad_b += f(Y[n][t],Y[n][t-1],X[n],t,M,L)[1:]-E
                    
                #grad_u += f(Y[n][t],0,X[n],t,M,L)[:M*L]-E_u
                
                    
        return lb * self.W[1:] - grad_b   
        
    
    def gradient(self, X, Y):
        ''' Creates the gradient vector of the log-likelihood. 
            X, Y: Are arrays containing training examples.'''
        
        f = self.f_x
        lamb = self.Lambda
        grad = np.zeros(self.K)
        
        for k in range(self.K):
            
            val_pair = find_legit_vals(k, self.L, X.shape[-1])
            first_term = np.zeros((X.shape[0],T))
            
            for n in range(X.shape[0]):
                
                T = X[n].shape[0]
                for t in range(T):
                    
                    first_term[n][t] = f(Y[n][t], Y[n][t-1], X[n], t, project=k)
            
            sec_term = np.zeros((X.shape[0],self.T))
            
            for n in range(X.shape[0]):
                
                for t in range(T):
                    
                    marginals = np.zeros(len(val_pair))
                        
                    for j in range(len(val_pair)):
                        
                        y, y_p = val_pair[j]
                        marginals[j] = (f(y, y_p, X[n], t, project=k) * 
                                        self.marginal(y, y_p, X[n], t))
                    
                    sec_term[n][t] = marginals.sum()
                    
            grad[k] = (first_term + sec_term).sum() - self.W[k] * lamb
            print(k)
        return grad
    
    
    def gradient_f(self, X, Y):
        
        f = self.f_x
        lamb = self.Lambda
        grad_f = np.zeros(self.K)
        lab_pairs = label_seq(2,self.L)
        
        for n in range(X.shape[0]):
            
            T = X[n].shape[0]
            grad = np.zeros(self.K)
            x, y = X[n], Y[n]
            
            for t in range(T):
            
                grad = grad + f(y[t],y[t-1],x,t)
        
            for pair in lab_pairs:
                print(pair)
                y, y_p = pair[0], pair[1]
                grad_temp = np.zeros(self.K)
            
                for t in range(T):
                
                    marg = self.marginal(y,y_p,x,t)
                    grad_temp = grad_temp + (marg*f(y,y_p,x,t))
        
                grad = grad - grad_temp
            
            grad_f = grad_f + grad
    
        return lamb * self.W  - grad_f
    
    def reg_neg_ll(self, X, Y):
        
        f = self.f_x
        res = 0
        for n in range(X.shape[0]):
            
            T = X[n].shape[0]
            s = 0
            
            for t in range(T):
            
                s += np.dot(self.W,f(Y[n][t],Y[n][t-1],X[n],t))
            
            res += self.log_partition(X[n]) - s
         
        return res

In [2]:
data = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel01_data.mat')

In [3]:
data.keys()

dict_keys(['label_tr', 'videoId_te', 'BOF_tr_M', 'videoId_tr', 'label_te', 'BOF_te_M', 'BOF_tr_K', '__version__', '__globals__', '__header__', 'BOF_te_K'])

array([[  1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01,
          1.82574186e-01,   1.82574186e-01,   1.82574186e-01],
       [  9.93013661e-17,   9.93013661e-17,   9.93013661e-17,
          9.93013661e-17,   9.93013661e-17,   9.93013661e-17,
          9.93013661e-17,   9.93013661e-17,   9.93013661e-17,
          9.93013661e-17,   9.93013661e-17,   9.93013661e-17,
          9.93013661e-17,   9.93013661e-17,   9.93013661e-17,
          9.93013661e-17,   9.93013661e-17,   8.94427191e-01,
       

In [248]:
# Data Extraction Training

X_tr, y_tr = [], []

for k in range(30):
    
    T = data['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data['BOF_tr_K'][0][k][t,:]
        lab[t] = data['label_tr'][0][k][t,0]
    X_tr.append(feat)
    y_tr.append(lab)
    
X_tr = np.asarray(X_tr, dtype='object')
y_tr = np.asarray(y_tr, dtype='object')

X_n, y_n = np.zeros((908,60)), np.zeros((908,11), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr[k].shape[0]):
        X_n[l] = X_tr[k][j]
        y_n[l][y_tr[k][j]] = 1
        l += 1

In [284]:
from keras.layers import Dropout

inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.2)(out1)
out1 = Dense(11, activation='softmax', bias=False)(out1) 

unary_model = Model(inp,out1)

In [285]:
from keras.optimizers import adam

sgd = adam(0.0007,epsilon=1e-8)

unary_model.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [286]:
unary_model.fit(X_n,y_n,batch_size=128,validation_split=0.10,verbose=2, nb_epoch=100)

Train on 817 samples, validate on 91 samples
Epoch 1/100
0s - loss: 2.3497 - acc: 0.1579 - val_loss: 2.3700 - val_acc: 0.2747
Epoch 2/100
0s - loss: 2.2649 - acc: 0.3035 - val_loss: 2.3634 - val_acc: 0.2198
Epoch 3/100
0s - loss: 2.1922 - acc: 0.3023 - val_loss: 2.3701 - val_acc: 0.1758
Epoch 4/100
0s - loss: 2.1212 - acc: 0.2815 - val_loss: 2.3788 - val_acc: 0.1538
Epoch 5/100
0s - loss: 2.0567 - acc: 0.2889 - val_loss: 2.3791 - val_acc: 0.1538
Epoch 6/100
0s - loss: 1.9950 - acc: 0.3084 - val_loss: 2.3591 - val_acc: 0.1868
Epoch 7/100
0s - loss: 1.9326 - acc: 0.3623 - val_loss: 2.3302 - val_acc: 0.2527
Epoch 8/100
0s - loss: 1.8665 - acc: 0.4321 - val_loss: 2.2929 - val_acc: 0.2857
Epoch 9/100
0s - loss: 1.7903 - acc: 0.5275 - val_loss: 2.2449 - val_acc: 0.3736
Epoch 10/100
0s - loss: 1.7250 - acc: 0.5838 - val_loss: 2.1756 - val_acc: 0.4066
Epoch 11/100
0s - loss: 1.6530 - acc: 0.6242 - val_loss: 2.1060 - val_acc: 0.3956
Epoch 12/100
0s - loss: 1.5723 - acc: 0.6377 - val_loss: 2.047

<keras.callbacks.History at 0x7f6299258eb8>

In [287]:
inpp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
             weights=unary_model.layers[1].get_weights())(inpp)
out1 = Dropout(0.2)(out1)
out1 = Dense(11, activation='linear', bias=False,
             weights=unary_model.layers[-1].get_weights())(out1) 

unary_model1 = Model(inpp,out1)

In [290]:
unary_model1.predict(X_tr[0][0].reshape((1,60)))[0]

array([ 1.71292162, -1.44587445, -1.01723075, -2.1560452 , -2.98591805,
       -2.20095158, -2.36557651, -3.10711026, -2.39063001, -1.84378684,
       -2.5490005 ], dtype=float32)

In [57]:
# Data Extraction Testing

X_ts, y_ts = [], []

for k in range(17):
    
    T = data['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data['BOF_te_K'][0][k][t,:]
        lab[t] = data['label_te'][0][k][t,0]
    X_ts.append(feat)
    y_ts.append(lab)
    
X_ts = np.asarray(X_ts, dtype='object')
y_ts = np.asarray(y_ts, dtype='object')

In [58]:
y_ts[0]

array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 7, 7, 7, 7, 7, 7, 7, 7, 0,
       0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0], dtype=int32)

In [60]:
max_l = 0 

for k in range(30):
    
    max_l = max(max_l, y_tr[k].max())
    
for k in range(17):
    
    max_l = max(max_l, y_ts[k].max())
    
max_l

10

In [293]:
def feat_func(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model1.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [300]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 1

from scipy.spatial.distance import hamming 

crf = CRF(feat_func,1+11**2,11, 1e-7, best_W)
crf_u = CRF(feat_func,1+11**2,11, 1e-7, 'U')
alpha = 0.07
B1, B2 = 0.9, 0.9999
eps = 1e-7
m, v = np.zeros(crf.K-1), np.zeros(crf.K-1)
num_epochs = 2
batch_size = 5
epoch = 1
#best_W = np.zeros(11**2)
#best_acc = 0

acc_t1 = np.zeros(17)
acc_u = np.zeros(17)
for n in range(17):
    acc_t1[n] = 1 - hamming(crf.MAP(X_ts[n]), y_ts[n])
    acc_u[n] = 1 - hamming(crf_u.MAP(X_ts[n]), y_ts[n])
print('Initial accuracy score Binary: ',100* acc_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr.shape[0],batch_size):
        
        X_b, y_b = X_tr[min_batch[k:k+batch_size]],y_tr[min_batch[k:k+batch_size]]
        t = (X_tr.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf.W[1:] = crf.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc_test = np.zeros(17)
            
            for n in range(17):
                acc_test[n] = 1 - hamming(crf.MAP(X_ts[n]), y_ts[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc_test.mean(), '%.')
            
            if best_acc <= acc_test.mean():
                best_acc = acc_test.mean()
                best_W = crf.W
        
    epoch += 1

    
acc_f = np.zeros(X_ts.shape[0])

for n in range(X_ts.shape[0]):
    acc_f[n] = 1 - hamming(crf.MAP(X_ts[n]), y_ts[n])
    
print('Final accuracy score: ',100* acc_f.mean(), '%.')
print('Zero-one-loss: ', 100*acc_f[acc_f==1].sum()/17, '%')
print('Unary Zero-one-loss: ', 100*acc_u[acc_u==1].sum()/17, '%')

Initial accuracy score Binary:  87.224640692 %.
Accuracy score Unary only:  77.5708621153 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  87.3137672518 %.
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  87.4134681491 %.
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  86.5063048378 %.
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  87.2733558443 %.
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  87.1736549469 %.
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  87.742914909 %.
Final accuracy score:  87.742914909 %.
Zero-one-loss:  0.0 %
Unary Zero-one-loss:  0.0 %


In [104]:
data2 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel02_data.mat')
data3 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel03_data.mat')
data4 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel04_data.mat')
data5 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel05_data.mat')
data6 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel06_data.mat')
data7 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel07_data.mat')
data8 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel08_data.mat')
data9 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel09_data.mat')
data10 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel10_data.mat')
data11 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel11_data.mat')
data12 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel12_data.mat')
data13 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel13_data.mat')
data14 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel14_data.mat')
data15 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel15_data.mat')
data16 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel16_data.mat')
data17 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel17_data.mat')
data18 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel18_data.mat')
data19 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel19_data.mat')
data20 = scipy.io.loadmat('/home/james/anaconda3/data/HW/Chalearn/BOFData/devel20_data.mat')

Data = [data2, data3, data4, data5, data6, data7, data8, data9, data10, data11,
        data12, data13, data14, data15, data16, data17, data18, data19, data20]

In [304]:
# Data Extraction Training

X_tr2, y_tr2 = [], []

for k in range(30):
    
    T = data2['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data2['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data2['BOF_tr_K'][0][k][t,:]
        lab[t] = data2['label_tr'][0][k][t,0]
    X_tr2.append(feat)
    y_tr2.append(lab)
    
X_tr2 = np.asarray(X_tr2, dtype='object')
y_tr2 = np.asarray(y_tr2, dtype='object')


# Data Extraction Testing

X_ts2, y_ts2 = [], []

for k in range(16):
    
    T = data2['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data2['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data2['BOF_te_K'][0][k][t,:]
        lab[t] = data2['label_te'][0][k][t,0]
    X_ts2.append(feat)
    y_ts2.append(lab)
    
X_ts2 = np.asarray(X_ts2, dtype='object')
y_ts2 = np.asarray(y_ts2, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr2[k].shape[0]):
        num_node += 1


X_n2, y_n2 = np.zeros((num_node,60)), np.zeros((num_node,11), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr2[k].shape[0]):
        X_n2[l] = X_tr2[k][j]
        y_n2[l][y_tr2[k][j]] = 1
        l += 1

In [320]:
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(11, activation='softmax', bias=False)(out1) 

unary_model2 = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model2.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [321]:
unary_model2.fit(X_n2,y_n2,batch_size=128,nb_epoch=11,validation_split=.05, verbose=2)

Train on 998 samples, validate on 53 samples
Epoch 1/11
0s - loss: 2.0741 - acc: 0.2725 - val_loss: 1.9937 - val_acc: 0.4528
Epoch 2/11
0s - loss: 1.5252 - acc: 0.5391 - val_loss: 1.5151 - val_acc: 0.5660
Epoch 3/11
0s - loss: 1.1647 - acc: 0.6623 - val_loss: 1.1849 - val_acc: 0.6792
Epoch 4/11
0s - loss: 0.9527 - acc: 0.7445 - val_loss: 1.0245 - val_acc: 0.6981
Epoch 5/11
0s - loss: 0.8181 - acc: 0.7695 - val_loss: 1.0228 - val_acc: 0.6981
Epoch 6/11
0s - loss: 0.7175 - acc: 0.8076 - val_loss: 0.8631 - val_acc: 0.7547
Epoch 7/11
0s - loss: 0.6377 - acc: 0.8176 - val_loss: 0.9610 - val_acc: 0.7170
Epoch 8/11
0s - loss: 0.5986 - acc: 0.8407 - val_loss: 0.8335 - val_acc: 0.7547
Epoch 9/11
0s - loss: 0.5634 - acc: 0.8477 - val_loss: 0.9124 - val_acc: 0.7547
Epoch 10/11
0s - loss: 0.5344 - acc: 0.8517 - val_loss: 0.8691 - val_acc: 0.7547
Epoch 11/11
0s - loss: 0.5009 - acc: 0.8737 - val_loss: 0.8268 - val_acc: 0.7547


<keras.callbacks.History at 0x7f6296f5a128>

In [325]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model2.layers[1].get_weights())(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(11, activation='linear', bias=False,
            weights= unary_model2.layers[-1].get_weights())(out1) 

unary_model2b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model2b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [326]:
def feat_func2(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model2b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [331]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 2

from scipy.spatial.distance import hamming 

crf2 = CRF(feat_func2,1+11**2,11, 1e-7, best_W2)
crf_u2 = CRF(feat_func2,1+11**2,11, 1e-7, 'U')
alpha = 0.07
B1, B2 = 0.9, 0.9999
eps = 1e-7
m, v = np.zeros(crf2.K-1), np.zeros(crf2.K-1)
num_epochs = 1
batch_size = 5
epoch = 1
#best_W2 = np.zeros(11**2)
#best_acc2 = 0

acc2_t1 = np.zeros(16)
acc2_u = np.zeros(16)
for n in range(16):
    acc2_t1[n] = 1 - hamming(crf2.MAP(X_ts2[n]), y_ts2[n])
    acc2_u[n] = 1 - hamming(crf_u2.MAP(X_ts2[n]), y_ts2[n])
print('Initial accuracy score Binary: ',100* acc2_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc2_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr2.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr2.shape[0],batch_size):
        
        X_b, y_b = X_tr2[min_batch[k:k+batch_size]],y_tr2[min_batch[k:k+batch_size]]
        t = (X_tr2.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf2.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf2.W[1:] = crf2.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc2_test = np.zeros(16)
            
            for n in range(16):
                acc2_test[n] = 1 - hamming(crf2.MAP(X_ts2[n]), y_ts2[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc2_test.mean(), '%.')
            
            if best_acc2 <= acc2_test.mean():
                best_acc2 = acc2_test.mean()
                best_W2 = crf2.W
        
    epoch += 1

    
acc2_f = np.zeros(X_ts2.shape[0])

for n in range(X_ts2.shape[0]):
    acc2_f[n] = 1 - hamming(crf2.MAP(X_ts2[n]), y_ts2[n])
    
print('Final accuracy score: ',100* acc2_f.mean(), '%.')
print('Zero-one-loss: ', 100*acc2_f[acc2_f==1].sum()/17, '%')
print('Unary Zero-one-loss: ', 100*acc2_u[acc2_u==1].sum()/17, '%')

Initial accuracy score Binary:  78.2494315765 %.
Accuracy score Unary only:  73.8793777848 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  78.1789235067 %.
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  77.4197685263 %.
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  77.6150810263 %.
Final accuracy score:  77.8719303414 %.
Zero-one-loss:  5.88235294118 %
Unary Zero-one-loss:  0.0 %


In [334]:
# Data Extraction Data set 3

X_tr3, y_tr3 = [], []

for k in range(30):
    
    T = data3['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data3['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data3['BOF_tr_K'][0][k][t,:]
        lab[t] = data3['label_tr'][0][k][t,0]
    X_tr3.append(feat)
    y_tr3.append(lab)
    
X_tr3 = np.asarray(X_tr3, dtype='object')
y_tr3 = np.asarray(y_tr3, dtype='object')


# Data Extraction Testing

X_ts3, y_ts3 = [], []

for k in range(17):
    
    T = data3['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data3['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data3['BOF_te_K'][0][k][t,:]
        lab[t] = data3['label_te'][0][k][t,0]
    X_ts3.append(feat)
    y_ts3.append(lab)
    
X_ts3 = np.asarray(X_ts3, dtype='object')
y_ts3 = np.asarray(y_ts3, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr3[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr3[k].max())
for k in range(16):
    max_l = max(max_l, y_ts3[k].max())
print(max_l)


X_n3, y_n3 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr3[k].shape[0]):
        X_n3[l] = X_tr3[k][j]
        y_n3[l][y_tr3[k][j]] = 1
        l += 1

8


In [345]:
#Unary Neural Net dataset 3
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.05)(out1)
out1 = Dense(9, activation='softmax', bias=False)(out1) 

unary_model3 = Model(inp,out1)

sgd = adam(0.005,epsilon=1e-7)

unary_model3.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [346]:
unary_model3.fit(X_n3,y_n3,batch_size=128,nb_epoch=19,validation_split=.05, verbose=2)

Train on 917 samples, validate on 49 samples
Epoch 1/19
0s - loss: 2.0276 - acc: 0.2683 - val_loss: 1.8521 - val_acc: 0.4082
Epoch 2/19
0s - loss: 1.7108 - acc: 0.4885 - val_loss: 1.6269 - val_acc: 0.6327
Epoch 3/19
0s - loss: 1.4265 - acc: 0.6074 - val_loss: 1.3396 - val_acc: 0.6735
Epoch 4/19
0s - loss: 1.1904 - acc: 0.6478 - val_loss: 1.1540 - val_acc: 0.6735
Epoch 5/19
0s - loss: 1.0030 - acc: 0.7034 - val_loss: 0.9808 - val_acc: 0.6939
Epoch 6/19
0s - loss: 0.8807 - acc: 0.7383 - val_loss: 1.0030 - val_acc: 0.7143
Epoch 7/19
0s - loss: 0.7900 - acc: 0.7699 - val_loss: 0.9493 - val_acc: 0.7143
Epoch 8/19
0s - loss: 0.7205 - acc: 0.8048 - val_loss: 0.9628 - val_acc: 0.7347
Epoch 9/19
0s - loss: 0.6574 - acc: 0.8201 - val_loss: 1.0813 - val_acc: 0.7143
Epoch 10/19
0s - loss: 0.6116 - acc: 0.8244 - val_loss: 0.9943 - val_acc: 0.6939
Epoch 11/19
0s - loss: 0.5641 - acc: 0.8212 - val_loss: 1.0343 - val_acc: 0.6939
Epoch 12/19
0s - loss: 0.5216 - acc: 0.8680 - val_loss: 1.0883 - val_acc:

<keras.callbacks.History at 0x7f6295825828>

In [347]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model3.layers[1].get_weights())(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(9, activation='linear', bias=False,
            weights= unary_model3.layers[-1].get_weights())(out1) 

unary_model3b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model3b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [348]:
def feat_func3(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model3b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [351]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 3


crf3 = CRF(feat_func3,1+9**2,9, 1e-10, best_W3)
crf_u3 = CRF(feat_func3,1+9**2,9, 1e-7, 'U')
alpha = 0.09
B1, B2 = 0.9, 0.999
eps = 1e-8
m, v = np.zeros(crf3.K-1), np.zeros(crf3.K-1)
num_epochs = 5
batch_size = 5
epoch = 1
#best_W3 = np.zeros(9**2)
#best_acc3 = 0

acc3_t1 = np.zeros(17)
acc3_u = np.zeros(17)
for n in range(17):
    acc3_t1[n] = 1 - hamming(crf3.MAP(X_ts3[n]), y_ts3[n])
    acc3_u[n] = 1 - hamming(crf_u3.MAP(X_ts3[n]), y_ts3[n])
print('Initial accuracy score Binary: ',100* acc3_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc3_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr3.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr3.shape[0],batch_size):
        
        X_b, y_b = X_tr3[min_batch[k:k+batch_size]],y_tr3[min_batch[k:k+batch_size]]
        t = (X_tr3.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf3.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf3.W[1:] = crf3.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc3_test = np.zeros(17)
            
            for n in range(17):
                acc3_test[n] = 1 - hamming(crf3.MAP(X_ts3[n]), y_ts3[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc3_test.mean(), '%.')
            
            if best_acc3 <= acc3_test.mean():
                best_acc3 = acc3_test.mean()
                best_W3 = crf3.W
        
    epoch += 1

    
acc3_f = np.zeros(X_ts3.shape[0])

for n in range(X_ts3.shape[0]):
    acc3_f[n] = 1 - hamming(crf3.MAP(X_ts3[n]), y_ts3[n])
    
print('Final accuracy score: ',100* acc3_f.mean(), '%.')
print('Zero-one-loss: ', 100*acc3_f[acc3_f==1].sum()/17, '%')
print('Unary Zero-one-loss: ', 100*acc3_u[acc3_u==1].sum()/17, '%')

Initial accuracy score Binary:  62.3404770581 %.
Accuracy score Unary only:  55.198712337 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  62.5805730965 %.
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  62.917958605 %.
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  61.6441701899 %.
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  61.7642182091 %.
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  61.8437094651 %.
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  61.6863509596 %.
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  61.6863509596 %.
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  61.3521263607 %.
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:  61.4571683775 %.
Epoch/Iteration:  4 / 18 . Current Average Test Hamming Accuracy:  61.819424717 %.
Epoch/Iteration:  4 / 20 . Current Average Test Hamming Accuracy:  61.8194

In [202]:
best_acc3

0.49928405209559845

In [352]:
# Data Extraction Data set 4

X_tr4, y_tr4 = [], []

for k in range(30):
    
    T = data4['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data4['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data4['BOF_tr_K'][0][k][t,:]
        lab[t] = data4['label_tr'][0][k][t,0]
    X_tr4.append(feat)
    y_tr4.append(lab)
    
X_tr4 = np.asarray(X_tr4, dtype='object')
y_tr4 = np.asarray(y_tr4, dtype='object')


# Data Extraction Testing

X_ts4, y_ts4 = [], []

for k in range(17):
    
    T = data4['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data4['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data4['BOF_te_K'][0][k][t,:]
        lab[t] = data4['label_te'][0][k][t,0]
    X_ts4.append(feat)
    y_ts4.append(lab)
    
X_ts4 = np.asarray(X_ts4, dtype='object')
y_ts4 = np.asarray(y_ts4, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr4[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr4[k].max())
for k in range(16):
    max_l = max(max_l, y_ts4[k].max())
print(max_l)


X_n4, y_n4 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr4[k].shape[0]):
        X_n4[l] = X_tr4[k][j]
        y_n4[l][y_tr4[k][j]] = 1
        l += 1

10


In [353]:
#Unary Neural Net dataset 4
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.05)(out1)
out1 = Dense(11, activation='softmax', bias=False)(out1) 

unary_model4 = Model(inp,out1)

sgd = adam(0.005,epsilon=1e-7)

unary_model4.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [354]:
unary_model4.fit(X_n4,y_n4,batch_size=128,nb_epoch=19,validation_split=.05, verbose=2)

Train on 757 samples, validate on 40 samples
Epoch 1/19
0s - loss: 2.1023 - acc: 0.3448 - val_loss: 2.1474 - val_acc: 0.1750
Epoch 2/19
0s - loss: 1.7874 - acc: 0.3910 - val_loss: 1.7994 - val_acc: 0.3500
Epoch 3/19
0s - loss: 1.4960 - acc: 0.5456 - val_loss: 1.3197 - val_acc: 0.6000
Epoch 4/19
0s - loss: 1.2372 - acc: 0.6129 - val_loss: 1.1409 - val_acc: 0.6250
Epoch 5/19
0s - loss: 1.0116 - acc: 0.7332 - val_loss: 0.9643 - val_acc: 0.6250
Epoch 6/19
0s - loss: 0.8448 - acc: 0.7807 - val_loss: 0.8762 - val_acc: 0.6500
Epoch 7/19
0s - loss: 0.7168 - acc: 0.8203 - val_loss: 0.7747 - val_acc: 0.7000
Epoch 8/19
0s - loss: 0.6334 - acc: 0.8402 - val_loss: 0.7089 - val_acc: 0.7250
Epoch 9/19
0s - loss: 0.5674 - acc: 0.8692 - val_loss: 0.7013 - val_acc: 0.7250
Epoch 10/19
0s - loss: 0.5198 - acc: 0.8719 - val_loss: 0.7320 - val_acc: 0.7250
Epoch 11/19
0s - loss: 0.4792 - acc: 0.8877 - val_loss: 0.6241 - val_acc: 0.7250
Epoch 12/19
0s - loss: 0.4468 - acc: 0.8983 - val_loss: 0.7078 - val_acc:

<keras.callbacks.History at 0x7f6295322470>

In [355]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model4.layers[1].get_weights())(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(11, activation='linear', bias=False,
            weights= unary_model4.layers[-1].get_weights())(out1) 

unary_model4b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model4b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [356]:
def feat_func4(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model4b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [414]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 4


crf4 = CRF(feat_func4,1+11**2,11, 0, 'B')
crf_u4 = CRF(feat_func4,1+11**2,11, 1e-7, 'U')
alpha = 0.085
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf4.K-1), np.zeros(crf4.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc4 = 0

acc4_t1 = np.zeros(17)
acc4_u = np.zeros(17)
for n in range(17):
    acc4_t1[n] = 1 - hamming(crf4.MAP(X_ts4[n]), y_ts4[n])
    acc4_u[n] = 1 - hamming(crf_u4.MAP(X_ts4[n]), y_ts4[n])
print('Initial accuracy score Binary: ',100* acc4_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc4_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr4.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr4.shape[0],batch_size):
        
        X_b, y_b = X_tr4[min_batch[k:k+batch_size]],y_tr4[min_batch[k:k+batch_size]]
        t = (X_tr4.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf4.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf4.W[1:] = crf4.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc4_test = np.zeros(17)
            
            for n in range(17):
                acc4_test[n] = 1 - hamming(crf4.MAP(X_ts4[n]), y_ts4[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc4_test.mean(), '%.')
            
            if best_acc4 < acc4_test.mean():
                print('Zero-one-loss: ', 100*acc4_test[acc4_test==1].sum()/17, '%')
    epoch += 1

    
acc4_f = np.zeros(X_ts4.shape[0])

for n in range(X_ts4.shape[0]):
    acc4_f[n] = 1 - hamming(crf4.MAP(X_ts4[n]), y_ts4[n])
    
print('Final accuracy score: ',100* acc4_f.mean(), '%.')
print('Zero-one-loss: ', 100*acc4_f[acc4_f==1].sum()/17, '%')
print('Unary Zero-one-loss: ', 100*acc4_u[acc4_u==1].sum()/17, '%')

Initial accuracy score Binary:  77.0903514201 %.
Accuracy score Unary only:  80.8538756905 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  77.3874399525 %.
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  79.1406169945 %.
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  80.658215116 %.
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  81.4442398 %.
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  81.8448678875 %.
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  82.7038781583 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  83.3940088019 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  83.0637908562 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:  82.7198166299 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  4 / 18 . Current Average Test Hamming Accuracy:  83.2

In [386]:
# Data Extraction Data set 5

X_tr5, y_tr5 = [], []

for k in range(30):
    
    T = data5['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data5['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data5['BOF_tr_K'][0][k][t,:]
        lab[t] = data5['label_tr'][0][k][t,0]
    X_tr5.append(feat)
    y_tr5.append(lab)
    
X_tr5 = np.asarray(X_tr5, dtype='object')
y_tr5 = np.asarray(y_tr5, dtype='object')


# Data Extraction Testing

X_ts5, y_ts5 = [], []

for k in range(17):
    
    T = data5['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data5['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data5['BOF_te_K'][0][k][t,:]
        lab[t] = data5['label_te'][0][k][t,0]
    X_ts5.append(feat)
    y_ts5.append(lab)
    
X_ts5 = np.asarray(X_ts5, dtype='object')
y_ts5 = np.asarray(y_ts5, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr5[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr5[k].max())
for k in range(16):
    max_l = max(max_l, y_ts5[k].max())
print(max_l)


X_n5, y_n5 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr5[k].shape[0]):
        X_n5[l] = X_tr5[k][j]
        y_n5[l][y_tr5[k][j]] = 1
        l += 1

8


In [405]:
#Unary Neural Net dataset 5
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.37)(out1)
out1 = Dense(9, activation='softmax', bias=False)(out1) 

unary_model5 = Model(inp,out1)

sgd = adam(0.005,epsilon=1e-7)

unary_model5.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [406]:
unary_model5.fit(X_n5,y_n5,batch_size=128,nb_epoch=35,validation_split=.05, verbose=2)

Train on 1190 samples, validate on 63 samples
Epoch 1/35
0s - loss: 2.0220 - acc: 0.2294 - val_loss: 2.1924 - val_acc: 0.0159
Epoch 2/35
0s - loss: 1.6465 - acc: 0.4983 - val_loss: 1.9448 - val_acc: 0.2063
Epoch 3/35
0s - loss: 1.3419 - acc: 0.5933 - val_loss: 1.6893 - val_acc: 0.3968
Epoch 4/35
0s - loss: 1.1421 - acc: 0.6655 - val_loss: 1.4846 - val_acc: 0.4762
Epoch 5/35
0s - loss: 1.0104 - acc: 0.6983 - val_loss: 1.4763 - val_acc: 0.4762
Epoch 6/35
0s - loss: 0.9149 - acc: 0.7345 - val_loss: 1.2549 - val_acc: 0.6032
Epoch 7/35
0s - loss: 0.8587 - acc: 0.7429 - val_loss: 1.3980 - val_acc: 0.4921
Epoch 8/35
0s - loss: 0.8157 - acc: 0.7555 - val_loss: 1.3950 - val_acc: 0.5238
Epoch 9/35
0s - loss: 0.7503 - acc: 0.7790 - val_loss: 1.2061 - val_acc: 0.5873
Epoch 10/35
0s - loss: 0.6915 - acc: 0.8000 - val_loss: 1.1748 - val_acc: 0.6349
Epoch 11/35
0s - loss: 0.6694 - acc: 0.8034 - val_loss: 1.2649 - val_acc: 0.5873
Epoch 12/35
0s - loss: 0.6403 - acc: 0.8168 - val_loss: 1.3470 - val_acc

<keras.callbacks.History at 0x7f6292747b70>

In [407]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model5.layers[1].get_weights())(inp)
out1 = Dropout(0.37)(out1)
out1 = Dense(9, activation='linear', bias=False,
            weights= unary_model5.layers[-1].get_weights())(out1) 

unary_model5b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model5b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [408]:
def feat_func5(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model5b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [413]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 5


crf5 = CRF(feat_func5,1+9**2,9, 1e-8, 'B')
crf_u5 = CRF(feat_func5,1+9**2,9, 1e-7, 'U')
alpha = 0.085
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf5.K-1), np.zeros(crf5.K-1)
num_epochs = 25
batch_size = 5
epoch = 1
best_acc5 = 0


acc5_t1 = np.zeros(17)
acc5_u = np.zeros(17)
for n in range(17):
    acc5_t1[n] = 1 - hamming(crf5.MAP(X_ts5[n]), y_ts5[n])
    acc5_u[n] = 1 - hamming(crf_u5.MAP(X_ts5[n]), y_ts5[n])
print('Initial accuracy score Binary: ',100* acc5_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc5_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr5.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr5.shape[0],batch_size):
        
        X_b, y_b = X_tr5[min_batch[k:k+batch_size]],y_tr5[min_batch[k:k+batch_size]]
        t = (X_tr5.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf5.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf5.W[1:] = crf5.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc5_test = np.zeros(17)
            
            for n in range(17):
                acc5_test[n] = 1 - hamming(crf5.MAP(X_ts5[n]), y_ts5[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc5_test.mean(), '%.')
            
            if best_acc5 < acc5_test.mean():
                best_acc5 = acc5_test.mean()
                print('Zero-one-loss: ', 100*acc5_test[acc5_test==1].sum()/17, '%')
        
    epoch += 1

    
acc5_f = np.zeros(X_ts5.shape[0])

for n in range(X_ts5.shape[0]):
    acc5_f[n] = 1 - hamming(crf5.MAP(X_ts5[n]), y_ts5[n])
    
print('Final accuracy score: ',100* acc5_f.mean(), '%.')
print('Zero-one-loss: ', 100*acc5_f[acc5_f==1].sum()/17, '%')
print('Unary Zero-one-loss: ', 100*acc5_u[acc5_u==1].sum()/17, '%')

Initial accuracy score Binary:  63.5845216199 %.
Accuracy score Unary only:  73.0905279198 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  64.2421926099 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  65.2247225824 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  67.2368467457 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  67.1884852014 %.
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  67.833728834 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  67.8983700752 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  69.1191110701 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  69.8836299142 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:  70.1431454851 %.
Ze

In [415]:
# Data Extraction Data set 6

X_tr6, y_tr6 = [], []

for k in range(30):
    
    T = data6['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data6['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data6['BOF_tr_K'][0][k][t,:]
        lab[t] = data6['label_tr'][0][k][t,0]
    X_tr6.append(feat)
    y_tr6.append(lab)
    
X_tr6 = np.asarray(X_tr6, dtype='object')
y_tr6 = np.asarray(y_tr6, dtype='object')


# Data Extraction Testing

X_ts6, y_ts6 = [], []

for k in range(17):
    
    T = data6['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data6['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data6['BOF_te_K'][0][k][t,:]
        lab[t] = data6['label_te'][0][k][t,0]
    X_ts6.append(feat)
    y_ts6.append(lab)
    
X_ts6 = np.asarray(X_ts6, dtype='object')
y_ts6 = np.asarray(y_ts6, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr6[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr6[k].max())
for k in range(16):
    max_l = max(max_l, y_ts6[k].max())
print(max_l)


X_n6, y_n6 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr6[k].shape[0]):
        X_n6[l] = X_tr6[k][j]
        y_n6[l][y_tr6[k][j]] = 1
        l += 1

10


In [464]:
#Unary Neural Net dataset 6
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.45)(out1)
out1 = Dense(11, activation='softmax', bias=False)(out1) 

unary_model6 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model6.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [467]:
unary_model6.fit(X_n6,y_n6,batch_size=128,nb_epoch=14,validation_split=.05, verbose=2)

Train on 647 samples, validate on 35 samples
Epoch 1/14
0s - loss: 0.4738 - acc: 0.8764 - val_loss: 1.3932 - val_acc: 0.6000
Epoch 2/14
0s - loss: 0.4479 - acc: 0.8794 - val_loss: 1.6526 - val_acc: 0.5429
Epoch 3/14
0s - loss: 0.4486 - acc: 0.8903 - val_loss: 1.5723 - val_acc: 0.5714
Epoch 4/14
0s - loss: 0.4299 - acc: 0.8841 - val_loss: 1.5688 - val_acc: 0.5429
Epoch 5/14
0s - loss: 0.4592 - acc: 0.8702 - val_loss: 1.3920 - val_acc: 0.5714
Epoch 6/14
0s - loss: 0.3987 - acc: 0.8856 - val_loss: 1.5188 - val_acc: 0.5714
Epoch 7/14
0s - loss: 0.4001 - acc: 0.9026 - val_loss: 1.8112 - val_acc: 0.4857
Epoch 8/14
0s - loss: 0.3852 - acc: 0.9042 - val_loss: 1.5527 - val_acc: 0.5429
Epoch 9/14
0s - loss: 0.3918 - acc: 0.8964 - val_loss: 1.8474 - val_acc: 0.5429
Epoch 10/14
0s - loss: 0.4243 - acc: 0.8609 - val_loss: 1.9108 - val_acc: 0.5143
Epoch 11/14
0s - loss: 0.3645 - acc: 0.9057 - val_loss: 1.7349 - val_acc: 0.5429
Epoch 12/14
0s - loss: 0.3747 - acc: 0.9026 - val_loss: 1.5747 - val_acc:

<keras.callbacks.History at 0x7f628cac1240>

In [493]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model6.layers[1].get_weights())(inp)
out1 = Dropout(0.45)(out1)
out1 = Dense(11, activation='linear', bias=False,
            weights= unary_model6.layers[-1].get_weights())(out1) 

unary_model6b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model6b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [494]:
def feat_func6(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model6b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [495]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 6


crf6 = CRF(feat_func6,1+11**2,11, 0, 'B')
crf_u6 = CRF(feat_func6,1+11**2,11, 1e-7, 'U')
alpha = 0.085
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf6.K-1), np.zeros(crf6.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc6 = 0

acc6_t1 = np.zeros(17)
acc6_u = np.zeros(17)
for n in range(17):
    acc6_t1[n] = 1 - hamming(crf6.MAP(X_ts6[n]), y_ts6[n])
    acc6_u[n] = 1 - hamming(crf_u6.MAP(X_ts6[n]), y_ts6[n])
print('Initial accuracy score Binary: ',100* acc6_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc6_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr6.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr6.shape[0],batch_size):
        
        X_b, y_b = X_tr6[min_batch[k:k+batch_size]],y_tr6[min_batch[k:k+batch_size]]
        t = (X_tr6.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf6.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf6.W[1:] = crf6.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc6_test = np.zeros(17)
            
            for n in range(17):
                acc6_test[n] = 1 - hamming(crf6.MAP(X_ts6[n]), y_ts6[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc6_test.mean(), '%.')
            
            if best_acc6 < acc6_test.mean():
                best_acc6 = acc6_test.mean()
                print('Zero-one-loss: ', 100*acc6_test[acc6_test==1].sum()/17, '%')
    epoch += 1

    
acc6_f = np.zeros(X_ts6.shape[0])

for n in range(X_ts6.shape[0]):
    acc6_f[n] = 1 - hamming(crf6.MAP(X_ts6[n]), y_ts6[n])
    
print('Final accuracy score: ',100* acc6_f.mean(), '%.')
print('Zero-one-loss: ', 100*acc6_f[acc6_f==1].sum()/17, '%')
print('Unary Zero-one-loss: ', 100*acc6_u[acc6_u==1].sum()/17, '%')

Initial accuracy score Binary:  61.0963109994 %.
Accuracy score Unary only:  64.949889014 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  62.0314093978 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  63.152594714 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  63.7128188036 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  64.6379256885 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  64.780136419 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  66.2252599229 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  66.1419067436 %.
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  66.3428416067 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:  66.2774821296 %.
Epoc

In [471]:
# Data Extraction Data set 7

X_tr7, y_tr7 = [], []

for k in range(30):
    
    T = data7['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data7['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data7['BOF_tr_K'][0][k][t,:]
        lab[t] = data7['label_tr'][0][k][t,0]
    X_tr7.append(feat)
    y_tr7.append(lab)
    
X_tr7 = np.asarray(X_tr7, dtype='object')
y_tr7 = np.asarray(y_tr7, dtype='object')


# Data Extraction Testing

X_ts7, y_ts7 = [], []

for k in range(17):
    
    T = data7['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data7['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data7['BOF_te_K'][0][k][t,:]
        lab[t] = data7['label_te'][0][k][t,0]
    X_ts7.append(feat)
    y_ts7.append(lab)
    
X_ts7 = np.asarray(X_ts7, dtype='object')
y_ts7 = np.asarray(y_ts7, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr7[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr7[k].max())
for k in range(16):
    max_l = max(max_l, y_ts7[k].max())
print(max_l)


X_n7, y_n7 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr7[k].shape[0]):
        X_n7[l] = X_tr7[k][j]
        y_n7[l][y_tr7[k][j]] = 1
        l += 1

9


In [479]:
#Unary Neural Net dataset 7
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.45)(out1)
out1 = Dense(10, activation='softmax', bias=False)(out1) 

unary_model7 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model7.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [480]:
unary_model7.fit(X_n7,y_n7,batch_size=128,nb_epoch=11,validation_split=.05, verbose=2)

Train on 693 samples, validate on 37 samples
Epoch 1/11
0s - loss: 1.9752 - acc: 0.2929 - val_loss: 1.6129 - val_acc: 0.6486
Epoch 2/11
0s - loss: 1.3585 - acc: 0.6436 - val_loss: 1.0993 - val_acc: 0.8378
Epoch 3/11
0s - loss: 0.9465 - acc: 0.7633 - val_loss: 0.9304 - val_acc: 0.7838
Epoch 4/11
0s - loss: 0.7419 - acc: 0.8081 - val_loss: 0.8800 - val_acc: 0.7838
Epoch 5/11
0s - loss: 0.6555 - acc: 0.8110 - val_loss: 0.6912 - val_acc: 0.8649
Epoch 6/11
0s - loss: 0.5627 - acc: 0.8485 - val_loss: 0.7260 - val_acc: 0.8649
Epoch 7/11
0s - loss: 0.4987 - acc: 0.8600 - val_loss: 0.7816 - val_acc: 0.8378
Epoch 8/11
0s - loss: 0.4578 - acc: 0.8817 - val_loss: 0.6645 - val_acc: 0.8919
Epoch 9/11
0s - loss: 0.4159 - acc: 0.8903 - val_loss: 0.7301 - val_acc: 0.8649
Epoch 10/11
0s - loss: 0.4129 - acc: 0.8918 - val_loss: 0.6430 - val_acc: 0.8919
Epoch 11/11
0s - loss: 0.3908 - acc: 0.9033 - val_loss: 0.6194 - val_acc: 0.8919


<keras.callbacks.History at 0x7f628b965e48>

In [490]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model7.layers[1].get_weights())(inp)
out1 = Dropout(0.45)(out1)
out1 = Dense(10, activation='linear', bias=False,
            weights= unary_model7.layers[-1].get_weights())(out1) 

unary_model7b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model7b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [491]:
def feat_func7(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model7b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [492]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 7


crf7 = CRF(feat_func7,1+10**2,10, 0, 'B')
crf_u7 = CRF(feat_func7,1+10**2,10, 1e-7, 'U')
alpha = 0.09
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf7.K-1), np.zeros(crf7.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc7 = 0

acc7_t1 = np.zeros(17)
acc7_u = np.zeros(17)
for n in range(17):
    acc7_t1[n] = 1 - hamming(crf7.MAP(X_ts7[n]), y_ts7[n])
    acc7_u[n] = 1 - hamming(crf_u7.MAP(X_ts7[n]), y_ts7[n])
print('Initial accuracy score Binary: ',100* acc7_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc7_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr7.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr7.shape[0],batch_size):
        
        X_b, y_b = X_tr7[min_batch[k:k+batch_size]],y_tr7[min_batch[k:k+batch_size]]
        t = (X_tr7.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf7.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf7.W[1:] = crf7.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc7_test = np.zeros(17)
            
            for n in range(17):
                acc7_test[n] = 1 - hamming(crf7.MAP(X_ts7[n]), y_ts7[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc7_test.mean(), '%.')
            
            if best_acc7 < acc7_test.mean():
                best_acc7 = acc7_test.mean()
                print('Zero-one-loss: ', 100*acc7_test[acc7_test==1].sum()/17, '%')
    epoch += 1

    
acc7_f = np.zeros(X_ts7.shape[0])

for n in range(X_ts7.shape[0]):
    acc7_f[n] = 1 - hamming(crf7.MAP(X_ts7[n]), y_ts7[n])
    
print('Final accuracy score: ',100* acc7_f.mean(), '%.')
print('Zero-one-loss: ', 100*acc7_f[acc7_f==1].sum()/17, '%')
print('Unary Zero-one-loss: ', 100*acc7_u[acc7_u==1].sum()/17, '%')

Initial accuracy score Binary:  70.668075565 %.
Accuracy score Unary only:  73.1019640753 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  69.9186050451 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  71.5882436322 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  72.1410186638 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  74.1882122641 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  74.3669896574 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  74.8201027208 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  76.0608368193 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  74.7769439692 %.
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:  74.4060719362 %.
Ep

ValueError: The 1d arrays must have equal lengths.

In [496]:
# Data Extraction Data set 8

X_tr8, y_tr8 = [], []

for k in range(30):
    
    T = data8['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data8['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data8['BOF_tr_K'][0][k][t,:]
        lab[t] = data8['label_tr'][0][k][t,0]
    X_tr8.append(feat)
    y_tr8.append(lab)
    
X_tr8 = np.asarray(X_tr8, dtype='object')
y_tr8 = np.asarray(y_tr8, dtype='object')


# Data Extraction Testing

X_ts8, y_ts8 = [], []

for k in range(17):
    
    T = data8['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data8['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data8['BOF_te_K'][0][k][t,:]
        lab[t] = data8['label_te'][0][k][t,0]
    X_ts8.append(feat)
    y_ts8.append(lab)
    
X_ts8 = np.asarray(X_ts8, dtype='object')
y_ts8 = np.asarray(y_ts8, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr8[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr8[k].max())
for k in range(16):
    max_l = max(max_l, y_ts8[k].max())
print(max_l)


X_n8, y_n8 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr8[k].shape[0]):
        X_n8[l] = X_tr8[k][j]
        y_n8[l][y_tr8[k][j]] = 1
        l += 1

11


In [503]:
#Unary Neural Net dataset 8
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(12, activation='softmax', bias=False)(out1) 

unary_model8 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model8.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [506]:
unary_model8.fit(X_n8,y_n8,batch_size=128,nb_epoch=15,validation_split=.05, verbose=2)

Train on 923 samples, validate on 49 samples
Epoch 1/15
0s - loss: 0.3008 - acc: 0.9339 - val_loss: 1.5073 - val_acc: 0.6735
Epoch 2/15
0s - loss: 0.2925 - acc: 0.9404 - val_loss: 1.4760 - val_acc: 0.6939
Epoch 3/15
0s - loss: 0.2822 - acc: 0.9458 - val_loss: 1.4657 - val_acc: 0.6735
Epoch 4/15
0s - loss: 0.2781 - acc: 0.9426 - val_loss: 1.6300 - val_acc: 0.6531
Epoch 5/15
0s - loss: 0.2894 - acc: 0.9426 - val_loss: 1.5378 - val_acc: 0.7143
Epoch 6/15
0s - loss: 0.3039 - acc: 0.9361 - val_loss: 1.5663 - val_acc: 0.6531
Epoch 7/15
0s - loss: 0.2855 - acc: 0.9382 - val_loss: 1.5934 - val_acc: 0.6735
Epoch 8/15
0s - loss: 0.2756 - acc: 0.9393 - val_loss: 1.5122 - val_acc: 0.6939
Epoch 9/15
0s - loss: 0.2744 - acc: 0.9404 - val_loss: 1.8205 - val_acc: 0.6122
Epoch 10/15
0s - loss: 0.2642 - acc: 0.9437 - val_loss: 1.3949 - val_acc: 0.7143
Epoch 11/15
0s - loss: 0.2668 - acc: 0.9480 - val_loss: 1.5483 - val_acc: 0.6939
Epoch 12/15
0s - loss: 0.2658 - acc: 0.9447 - val_loss: 1.7157 - val_acc:

<keras.callbacks.History at 0x7f628a942358>

In [507]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model8.layers[1].get_weights())(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(12, activation='linear', bias=False,
            weights= unary_model8.layers[-1].get_weights())(out1) 

unary_model8b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model8b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [508]:
def feat_func8(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model8b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [509]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 8


crf8 = CRF(feat_func8,1+12**2,12, 0, 'B')
crf_u8 = CRF(feat_func8,1+12**2,12, 1e-7, 'U')
alpha = 0.09
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf8.K-1), np.zeros(crf8.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc8 = 0

acc8_t1 = np.zeros(17)
acc8_u = np.zeros(17)
for n in range(17):
    acc8_t1[n] = 1 - hamming(crf8.MAP(X_ts8[n]), y_ts8[n])
    acc8_u[n] = 1 - hamming(crf_u8.MAP(X_ts8[n]), y_ts8[n])
print('Initial accuracy score Binary: ',100* acc8_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc8_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr8.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr8.shape[0],batch_size):
        
        X_b, y_b = X_tr8[min_batch[k:k+batch_size]],y_tr8[min_batch[k:k+batch_size]]
        t = (X_tr8.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf8.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf8.W[1:] = crf8.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc8_test = np.zeros(17)
            
            for n in range(17):
                acc8_test[n] = 1 - hamming(crf8.MAP(X_ts8[n]), y_ts8[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc8_test.mean(), '%.')
            
            if best_acc8 < acc8_test.mean():
                best_acc8 = acc8_test.mean()
                print('Zero-one-loss: ', 100*acc8_test[acc8_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  59.4707474531 %.
Accuracy score Unary only:  63.3089368797 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  60.4123507057 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  61.4748467969 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  61.6838804104 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  61.8925449573 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  62.2529477391 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  63.644359383 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  64.2439470441 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  64.3002195531 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy

In [510]:
# Data Extraction Data set 9

X_tr9, y_tr9 = [], []

for k in range(30):
    
    T = data9['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data9['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data9['BOF_tr_K'][0][k][t,:]
        lab[t] = data9['label_tr'][0][k][t,0]
    X_tr9.append(feat)
    y_tr9.append(lab)
    
X_tr9 = np.asarray(X_tr9, dtype='object')
y_tr9 = np.asarray(y_tr9, dtype='object')


# Data Extraction Testing

X_ts9, y_ts9 = [], []

for k in range(17):
    
    T = data9['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data9['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data9['BOF_te_K'][0][k][t,:]
        lab[t] = data9['label_te'][0][k][t,0]
    X_ts9.append(feat)
    y_ts9.append(lab)
    
X_ts9 = np.asarray(X_ts9, dtype='object')
y_ts9 = np.asarray(y_ts9, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr9[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr9[k].max())
for k in range(16):
    max_l = max(max_l, y_ts9[k].max())
print(max_l)


X_n9, y_n9 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr9[k].shape[0]):
        X_n9[l] = X_tr9[k][j]
        y_n9[l][y_tr9[k][j]] = 1
        l += 1

9


In [517]:
#Unary Neural Net dataset 9
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(10, activation='softmax', bias=False)(out1) 

unary_model9 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model9.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [518]:
unary_model9.fit(X_n9,y_n9,batch_size=128,nb_epoch=15,validation_split=.05, verbose=2)

Train on 850 samples, validate on 45 samples
Epoch 1/15
0s - loss: 2.0189 - acc: 0.2741 - val_loss: 2.3105 - val_acc: 0.2000
Epoch 2/15
0s - loss: 1.3779 - acc: 0.6600 - val_loss: 1.3189 - val_acc: 0.6444
Epoch 3/15
0s - loss: 0.9468 - acc: 0.7435 - val_loss: 1.1118 - val_acc: 0.6444
Epoch 4/15
0s - loss: 0.7121 - acc: 0.7906 - val_loss: 0.8718 - val_acc: 0.7556
Epoch 5/15
0s - loss: 0.6035 - acc: 0.8165 - val_loss: 0.7318 - val_acc: 0.7778
Epoch 6/15
0s - loss: 0.5263 - acc: 0.8376 - val_loss: 0.7762 - val_acc: 0.7556
Epoch 7/15
0s - loss: 0.4621 - acc: 0.8576 - val_loss: 0.6208 - val_acc: 0.8000
Epoch 8/15
0s - loss: 0.4229 - acc: 0.8776 - val_loss: 0.6478 - val_acc: 0.8222
Epoch 9/15
0s - loss: 0.3892 - acc: 0.8847 - val_loss: 0.5280 - val_acc: 0.8444
Epoch 10/15
0s - loss: 0.3608 - acc: 0.9000 - val_loss: 0.6813 - val_acc: 0.7333
Epoch 11/15
0s - loss: 0.3298 - acc: 0.9165 - val_loss: 0.4961 - val_acc: 0.8444
Epoch 12/15
0s - loss: 0.3146 - acc: 0.9176 - val_loss: 0.5990 - val_acc:

<keras.callbacks.History at 0x7f62896275f8>

In [520]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model9.layers[1].get_weights())(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(10, activation='linear', bias=False,
            weights= unary_model9.layers[-1].get_weights())(out1) 

unary_model9b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model9b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [521]:
def feat_func9(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model9b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [522]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 9


crf9 = CRF(feat_func9,1+10**2,10, 0, 'B')
crf_u9 = CRF(feat_func9,1+10**2,10, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf9.K-1), np.zeros(crf9.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc9 = 0

acc9_t1 = np.zeros(17)
acc9_u = np.zeros(17)
for n in range(17):
    acc9_t1[n] = 1 - hamming(crf9.MAP(X_ts9[n]), y_ts9[n])
    acc9_u[n] = 1 - hamming(crf_u9.MAP(X_ts9[n]), y_ts9[n])
print('Initial accuracy score Binary: ',100* acc9_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc9_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr9.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr9.shape[0],batch_size):
        
        X_b, y_b = X_tr9[min_batch[k:k+batch_size]],y_tr9[min_batch[k:k+batch_size]]
        t = (X_tr9.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf9.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf9.W[1:] = crf9.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc9_test = np.zeros(17)
            
            for n in range(17):
                acc9_test[n] = 1 - hamming(crf9.MAP(X_ts9[n]), y_ts9[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc9_test.mean(), '%.')
            
            if best_acc9 < acc9_test.mean():
                best_acc9 = acc9_test.mean()
                print('Zero-one-loss: ', 100*acc9_test[acc8_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  75.531951209 %.
Accuracy score Unary only:  81.2996471213 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  75.8944901691 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  77.1673764355 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  79.1027395447 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  80.7935920546 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  81.9724454711 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  83.4408244117 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  83.4614128325 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  84.369808845 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:

In [523]:
# Data Extraction Data set 10

X_tr10, y_tr10 = [], []

for k in range(30):
    
    T = data10['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data10['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data10['BOF_tr_K'][0][k][t,:]
        lab[t] = data10['label_tr'][0][k][t,0]
    X_tr10.append(feat)
    y_tr10.append(lab)
    
X_tr10 = np.asarray(X_tr10, dtype='object')
y_tr10 = np.asarray(y_tr10, dtype='object')


# Data Extraction Testing

X_ts10, y_ts10 = [], []

for k in range(17):
    
    T = data10['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data10['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data10['BOF_te_K'][0][k][t,:]
        lab[t] = data10['label_te'][0][k][t,0]
    X_ts10.append(feat)
    y_ts10.append(lab)
    
X_ts10 = np.asarray(X_ts10, dtype='object')
y_ts10 = np.asarray(y_ts10, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr10[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr10[k].max())
for k in range(16):
    max_l = max(max_l, y_ts10[k].max())
print(max_l)


X_n10, y_n10 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr10[k].shape[0]):
        X_n10[l] = X_tr10[k][j]
        y_n10[l][y_tr10[k][j]] = 1
        l += 1

9


In [524]:
#Unary Neural Net dataset 10
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(10, activation='softmax', bias=False)(out1) 

unary_model10 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model10.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [525]:
unary_model10.fit(X_n10,y_n10,batch_size=128,nb_epoch=15,validation_split=.05, verbose=2)

Train on 780 samples, validate on 42 samples
Epoch 1/15
0s - loss: 2.1094 - acc: 0.2667 - val_loss: 1.9310 - val_acc: 0.2143
Epoch 2/15
0s - loss: 1.6737 - acc: 0.4603 - val_loss: 1.4802 - val_acc: 0.6190
Epoch 3/15
0s - loss: 1.4023 - acc: 0.5936 - val_loss: 1.2965 - val_acc: 0.5952
Epoch 4/15
0s - loss: 1.2225 - acc: 0.6385 - val_loss: 1.0750 - val_acc: 0.6905
Epoch 5/15
0s - loss: 1.1032 - acc: 0.6744 - val_loss: 1.0581 - val_acc: 0.6667
Epoch 6/15
0s - loss: 1.0228 - acc: 0.6923 - val_loss: 1.0479 - val_acc: 0.7381
Epoch 7/15
0s - loss: 0.9118 - acc: 0.7423 - val_loss: 0.8458 - val_acc: 0.7381
Epoch 8/15
0s - loss: 0.8836 - acc: 0.7385 - val_loss: 0.8744 - val_acc: 0.7857
Epoch 9/15
0s - loss: 0.7957 - acc: 0.7846 - val_loss: 0.9787 - val_acc: 0.7619
Epoch 10/15
0s - loss: 0.7398 - acc: 0.8038 - val_loss: 0.9947 - val_acc: 0.6429
Epoch 11/15
0s - loss: 0.7023 - acc: 0.8026 - val_loss: 0.8509 - val_acc: 0.8333
Epoch 12/15
0s - loss: 0.7024 - acc: 0.8103 - val_loss: 1.0256 - val_acc:

<keras.callbacks.History at 0x7f6289137390>

In [527]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model10.layers[1].get_weights())(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(10, activation='linear', bias=False,
            weights= unary_model10.layers[-1].get_weights())(out1) 

unary_model10b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model10b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func10(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model10b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [528]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 10


crf10 = CRF(feat_func10,1+10**2,10, 0, 'B')
crf_u10 = CRF(feat_func10,1+10**2,10, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf10.K-1), np.zeros(crf10.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc10 = 0

acc10_t1 = np.zeros(17)
acc10_u = np.zeros(17)
for n in range(17):
    acc10_t1[n] = 1 - hamming(crf10.MAP(X_ts10[n]), y_ts10[n])
    acc10_u[n] = 1 - hamming(crf_u10.MAP(X_ts10[n]), y_ts10[n])
print('Initial accuracy score Binary: ',100* acc10_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc10_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr10.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr10.shape[0],batch_size):
        
        X_b, y_b = X_tr10[min_batch[k:k+batch_size]],y_tr10[min_batch[k:k+batch_size]]
        t = (X_tr10.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf10.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf10.W[1:] = crf10.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc10_test = np.zeros(17)
            
            for n in range(17):
                acc10_test[n] = 1 - hamming(crf10.MAP(X_ts10[n]), y_ts10[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc10_test.mean(), '%.')
            
            if best_acc10 < acc10_test.mean():
                best_acc10 = acc10_test.mean()
                print('Zero-one-loss: ', 100*acc10_test[acc10_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  55.7538696592 %.
Accuracy score Unary only:  57.2903694797 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  57.1915751226 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  56.7903801845 %.
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  62.0465625674 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  61.875980095 %.
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  62.8319195804 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  62.8949937611 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  63.5170471301 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  63.8165123707 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:  64.3786952859 %.
Zero-one-loss:  0.0 %
Ep

In [530]:
# Data Extraction Data set 11

X_tr11, y_tr11 = [], []

for k in range(30):
    
    T = data11['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data11['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data11['BOF_tr_K'][0][k][t,:]
        lab[t] = data11['label_tr'][0][k][t,0]
    X_tr11.append(feat)
    y_tr11.append(lab)
    
X_tr11 = np.asarray(X_tr11, dtype='object')
y_tr11 = np.asarray(y_tr11, dtype='object')


# Data Extraction Testing

X_ts11, y_ts11 = [], []

for k in range(17):
    
    T = data11['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data11['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data11['BOF_te_K'][0][k][t,:]
        lab[t] = data11['label_te'][0][k][t,0]
    X_ts11.append(feat)
    y_ts11.append(lab)
    
X_ts11 = np.asarray(X_ts11, dtype='object')
y_ts11 = np.asarray(y_ts11, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr11[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr11[k].max())
for k in range(17):
    max_l = max(max_l, y_ts11[k].max())
print(max_l)


X_n11, y_n11 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr11[k].shape[0]):
        X_n11[l] = X_tr11[k][j]
        y_n11[l][y_tr11[k][j]] = 1
        l += 1

8


In [544]:
#Unary Neural Net dataset 11
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(9, activation='softmax', bias=False)(out1) 

unary_model11 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model11.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [545]:
unary_model11.fit(X_n11,y_n11,batch_size=128,nb_epoch=15,validation_split=.05, verbose=2)

Train on 1099 samples, validate on 58 samples
Epoch 1/15
0s - loss: 1.7607 - acc: 0.4149 - val_loss: 1.6187 - val_acc: 0.3793
Epoch 2/15
0s - loss: 1.3091 - acc: 0.5905 - val_loss: 1.4998 - val_acc: 0.4655
Epoch 3/15
0s - loss: 1.0276 - acc: 0.6770 - val_loss: 1.0756 - val_acc: 0.5862
Epoch 4/15
0s - loss: 0.8592 - acc: 0.7370 - val_loss: 1.1573 - val_acc: 0.5690
Epoch 5/15
0s - loss: 0.7647 - acc: 0.7698 - val_loss: 1.0189 - val_acc: 0.6897
Epoch 6/15
0s - loss: 0.7088 - acc: 0.7898 - val_loss: 0.9729 - val_acc: 0.6552
Epoch 7/15
0s - loss: 0.6741 - acc: 0.8025 - val_loss: 1.0212 - val_acc: 0.6552
Epoch 8/15
0s - loss: 0.6539 - acc: 0.7925 - val_loss: 0.9475 - val_acc: 0.7069
Epoch 9/15
0s - loss: 0.5972 - acc: 0.8089 - val_loss: 0.9729 - val_acc: 0.7069
Epoch 10/15
0s - loss: 0.5711 - acc: 0.8280 - val_loss: 0.9407 - val_acc: 0.7069
Epoch 11/15
0s - loss: 0.5562 - acc: 0.8253 - val_loss: 0.9430 - val_acc: 0.6207
Epoch 12/15
0s - loss: 0.5207 - acc: 0.8417 - val_loss: 0.8972 - val_acc

<keras.callbacks.History at 0x7f62873e7f98>

In [546]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model11.layers[1].get_weights())(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(9, activation='linear', bias=False,
            weights= unary_model11.layers[-1].get_weights())(out1) 

unary_model11b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model11b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func11(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model11b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [547]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 11


crf11 = CRF(feat_func11,1+9**2,9, 0, 'B')
crf_u11 = CRF(feat_func11,1+9**2,9, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf11.K-1), np.zeros(crf11.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc11 = 0

acc11_t1 = np.zeros(17)
acc11_u = np.zeros(17)
for n in range(17):
    acc11_t1[n] = 1 - hamming(crf11.MAP(X_ts11[n]), y_ts11[n])
    acc11_u[n] = 1 - hamming(crf_u11.MAP(X_ts11[n]), y_ts11[n])
print('Initial accuracy score Binary: ',100* acc11_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc11_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr11.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr11.shape[0],batch_size):
        
        X_b, y_b = X_tr11[min_batch[k:k+batch_size]],y_tr11[min_batch[k:k+batch_size]]
        t = (X_tr11.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf11.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf11.W[1:] = crf11.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc11_test = np.zeros(17)
            
            for n in range(17):
                acc11_test[n] = 1 - hamming(crf11.MAP(X_ts11[n]), y_ts11[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc11_test.mean(), '%.')
            
            if best_acc11 < acc11_test.mean():
                best_acc11 = acc11_test.mean()
                print('Zero-one-loss: ', 100*acc11_test[acc11_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  59.0958235196 %.
Accuracy score Unary only:  66.2351178692 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  59.9206412389 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  60.6744245143 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  62.7519810036 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  63.86754377 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  64.6202588599 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  64.9132589557 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  64.9807379319 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  65.5679032898 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:

In [548]:
# Data Extraction Data set 12

X_tr12, y_tr12 = [], []

for k in range(30):
    
    T = data12['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data12['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data12['BOF_tr_K'][0][k][t,:]
        lab[t] = data12['label_tr'][0][k][t,0]
    X_tr12.append(feat)
    y_tr12.append(lab)
    
X_tr12 = np.asarray(X_tr12, dtype='object')
y_tr12 = np.asarray(y_tr12, dtype='object')


# Data Extraction Testing

X_ts12, y_ts12 = [], []

for k in range(17):
    
    T = data12['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data12['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data12['BOF_te_K'][0][k][t,:]
        lab[t] = data12['label_te'][0][k][t,0]
    X_ts12.append(feat)
    y_ts12.append(lab)
    
X_ts12 = np.asarray(X_ts12, dtype='object')
y_ts12 = np.asarray(y_ts12, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr12[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr12[k].max())
for k in range(17):
    max_l = max(max_l, y_ts12[k].max())
print(max_l)


X_n12, y_n12 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr12[k].shape[0]):
        X_n12[l] = X_tr12[k][j]
        y_n12[l][y_tr12[k][j]] = 1
        l += 1

11


In [551]:
#Unary Neural Net dataset 12
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(12, activation='softmax', bias=False)(out1) 

unary_model12 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model12.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [553]:
unary_model12.fit(X_n12,y_n12,batch_size=128,nb_epoch=15,validation_split=.05, verbose=2)

Train on 760 samples, validate on 41 samples
Epoch 1/15
0s - loss: 0.1709 - acc: 0.9671 - val_loss: 0.7452 - val_acc: 0.7317
Epoch 2/15
0s - loss: 0.1736 - acc: 0.9592 - val_loss: 0.8788 - val_acc: 0.7073
Epoch 3/15
0s - loss: 0.1471 - acc: 0.9618 - val_loss: 0.8328 - val_acc: 0.6829
Epoch 4/15
0s - loss: 0.1455 - acc: 0.9658 - val_loss: 0.7914 - val_acc: 0.7073
Epoch 5/15
0s - loss: 0.1417 - acc: 0.9671 - val_loss: 0.8839 - val_acc: 0.6829
Epoch 6/15
0s - loss: 0.1384 - acc: 0.9658 - val_loss: 0.7742 - val_acc: 0.7073
Epoch 7/15
0s - loss: 0.1451 - acc: 0.9658 - val_loss: 0.8902 - val_acc: 0.7073
Epoch 8/15
0s - loss: 0.1327 - acc: 0.9724 - val_loss: 0.5981 - val_acc: 0.7561
Epoch 9/15
0s - loss: 0.1370 - acc: 0.9724 - val_loss: 0.9623 - val_acc: 0.6341
Epoch 10/15
0s - loss: 0.1240 - acc: 0.9697 - val_loss: 0.7176 - val_acc: 0.7073
Epoch 11/15
0s - loss: 0.1181 - acc: 0.9697 - val_loss: 0.8667 - val_acc: 0.6829
Epoch 12/15
0s - loss: 0.1113 - acc: 0.9789 - val_loss: 0.7933 - val_acc:

<keras.callbacks.History at 0x7f6286a31898>

In [554]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model12.layers[1].get_weights())(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(12, activation='linear', bias=False,
            weights= unary_model12.layers[-1].get_weights())(out1) 

unary_model12b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model12b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func12(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model12b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [555]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 12


crf12 = CRF(feat_func12,1+12**2,12, 0, 'B')
crf_u12 = CRF(feat_func12,1+12**2,12, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf12.K-1), np.zeros(crf12.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc12 = 0

acc12_t1 = np.zeros(17)
acc12_u = np.zeros(17)
for n in range(17):
    acc12_t1[n] = 1 - hamming(crf12.MAP(X_ts12[n]), y_ts12[n])
    acc12_u[n] = 1 - hamming(crf_u12.MAP(X_ts12[n]), y_ts12[n])
print('Initial accuracy score Binary: ',100* acc12_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc12_u.mean(), '%.')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr12.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr12.shape[0],batch_size):
        
        X_b, y_b = X_tr12[min_batch[k:k+batch_size]],y_tr12[min_batch[k:k+batch_size]]
        t = (X_tr12.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf12.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf12.W[1:] = crf12.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc12_test = np.zeros(17)
            
            for n in range(17):
                acc12_test[n] = 1 - hamming(crf12.MAP(X_ts12[n]), y_ts12[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc12_test.mean(), '%.')
            
            if best_acc12 < acc12_test.mean():
                best_acc12 = acc12_test.mean()
                print('Zero-one-loss: ', 100*acc12_test[acc12_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  79.8969317384 %.
Accuracy score Unary only:  80.4504612906 %.
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  79.6757150466 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  80.9993063105 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  81.0860130362 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  81.7113204797 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  82.2269018204 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  82.2269018204 %.
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  82.7543642674 %.
Zero-one-loss:  11.7647058824 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  83.3947817247 %.
Zero-one-loss:  11.7647058824 %
Epoch/Iteration:  3 / 16 . Current 

In [556]:
print('Zero-one-loss: ', 100*acc12_u[acc12_u==1].sum()/17, '%')

Zero-one-loss:  0.0 %


In [557]:
# Data Extraction Data set 13

X_tr13, y_tr13 = [], []

for k in range(30):
    
    T = data13['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data13['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data13['BOF_tr_K'][0][k][t,:]
        lab[t] = data13['label_tr'][0][k][t,0]
    X_tr13.append(feat)
    y_tr13.append(lab)
    
X_tr13 = np.asarray(X_tr13, dtype='object')
y_tr13 = np.asarray(y_tr13, dtype='object')


# Data Extraction Testing

X_ts13, y_ts13 = [], []

for k in range(17):
    
    T = data13['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data13['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data13['BOF_te_K'][0][k][t,:]
        lab[t] = data13['label_te'][0][k][t,0]
    X_ts13.append(feat)
    y_ts13.append(lab)
    
X_ts13 = np.asarray(X_ts13, dtype='object')
y_ts13 = np.asarray(y_ts13, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr13[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr13[k].max())
for k in range(17):
    max_l = max(max_l, y_ts13[k].max())
print(max_l)


X_n13, y_n13 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr13[k].shape[0]):
        X_n13[l] = X_tr13[k][j]
        y_n13[l][y_tr13[k][j]] = 1
        l += 1

12


In [558]:
#Unary Neural Net dataset 13
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(13, activation='softmax', bias=False)(out1) 

unary_model13 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model13.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [561]:
unary_model13.fit(X_n13,y_n13,batch_size=128,nb_epoch=15,validation_split=.05, verbose=2)

Train on 520 samples, validate on 28 samples
Epoch 1/15
0s - loss: 0.3233 - acc: 0.9346 - val_loss: 0.8757 - val_acc: 0.8571
Epoch 2/15
0s - loss: 0.2989 - acc: 0.9404 - val_loss: 0.8583 - val_acc: 0.8571
Epoch 3/15
0s - loss: 0.2752 - acc: 0.9519 - val_loss: 1.0194 - val_acc: 0.7500
Epoch 4/15
0s - loss: 0.2745 - acc: 0.9481 - val_loss: 0.9423 - val_acc: 0.7857
Epoch 5/15
0s - loss: 0.2585 - acc: 0.9558 - val_loss: 0.8303 - val_acc: 0.8571
Epoch 6/15
0s - loss: 0.2397 - acc: 0.9596 - val_loss: 0.8874 - val_acc: 0.8214
Epoch 7/15
0s - loss: 0.2951 - acc: 0.9308 - val_loss: 0.9701 - val_acc: 0.7857
Epoch 8/15
0s - loss: 0.2715 - acc: 0.9481 - val_loss: 0.8807 - val_acc: 0.8214
Epoch 9/15
0s - loss: 0.3108 - acc: 0.9327 - val_loss: 1.0153 - val_acc: 0.7500
Epoch 10/15
0s - loss: 0.2720 - acc: 0.9481 - val_loss: 1.3624 - val_acc: 0.7500
Epoch 11/15
0s - loss: 0.2968 - acc: 0.9385 - val_loss: 1.0666 - val_acc: 0.8214
Epoch 12/15
0s - loss: 0.2503 - acc: 0.9519 - val_loss: 0.9303 - val_acc:

<keras.callbacks.History at 0x7f62869ec320>

In [563]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model13.layers[1].get_weights())(inp)
out1 = Dropout(0.3)(out1)
out1 = Dense(13, activation='linear', bias=False,
            weights= unary_model13.layers[-1].get_weights())(out1) 

unary_model13b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model13b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func13(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model13b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [565]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 13


crf13 = CRF(feat_func13,1+13**2,13, 0, 'B')
crf_u13 = CRF(feat_func13,1+13**2,13, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf13.K-1), np.zeros(crf13.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc13 = 0

acc13_t1 = np.zeros(17)
acc13_u = np.zeros(17)
for n in range(17):
    acc13_t1[n] = 1 - hamming(crf13.MAP(X_ts13[n]), y_ts13[n])
    acc13_u[n] = 1 - hamming(crf_u13.MAP(X_ts13[n]), y_ts13[n])
print('Initial accuracy score Binary: ',100* acc13_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc13_u.mean(), '%.')
print('Zero-one-loss Unary: ', 100*acc13_u[acc13_u==1].sum()/17, '%')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr13.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr13.shape[0],batch_size):
        
        X_b, y_b = X_tr13[min_batch[k:k+batch_size]],y_tr13[min_batch[k:k+batch_size]]
        t = (X_tr13.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf13.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf13.W[1:] = crf13.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc13_test = np.zeros(17)
            
            for n in range(17):
                acc13_test[n] = 1 - hamming(crf13.MAP(X_ts13[n]), y_ts13[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc13_test.mean(), '%.')
            
            if best_acc13 < acc13_test.mean():
                best_acc13 = acc13_test.mean()
                print('Zero-one-loss: ', 100*acc13_test[acc13_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  77.3069015212 %.
Accuracy score Unary only:  76.9220227042 %.
Zero-one-loss Unary:  5.88235294118 %
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  77.9750674198 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  77.3046085459 %.
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  78.5488724138 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  79.188668549 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  79.6835413614 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  80.2137981996 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  80.7201545883 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  80.830411457 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test H

In [566]:
# Data Extraction Data set 14

X_tr14, y_tr14 = [], []

for k in range(30):
    
    T = data14['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data14['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data14['BOF_tr_K'][0][k][t,:]
        lab[t] = data14['label_tr'][0][k][t,0]
    X_tr14.append(feat)
    y_tr14.append(lab)
    
X_tr14 = np.asarray(X_tr14, dtype='object')
y_tr14 = np.asarray(y_tr14, dtype='object')


# Data Extraction Testing

X_ts14, y_ts14 = [], []

for k in range(17):
    
    T = data14['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data14['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data14['BOF_te_K'][0][k][t,:]
        lab[t] = data14['label_te'][0][k][t,0]
    X_ts14.append(feat)
    y_ts14.append(lab)
    
X_ts14 = np.asarray(X_ts14, dtype='object')
y_ts14 = np.asarray(y_ts14, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr14[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr14[k].max())
for k in range(17):
    max_l = max(max_l, y_ts14[k].max())
print(max_l)


X_n14, y_n14 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr14[k].shape[0]):
        X_n14[l] = X_tr14[k][j]
        y_n14[l][y_tr14[k][j]] = 1
        l += 1

8


In [578]:
#Unary Neural Net dataset 14
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.15)(out1)
out1 = Dense(9, activation='softmax', bias=False)(out1) 

unary_model14 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model14.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [579]:
unary_model14.fit(X_n14,y_n14,batch_size=128,nb_epoch=15,validation_split=.05, verbose=2)

Train on 519 samples, validate on 28 samples
Epoch 1/15
0s - loss: 1.8796 - acc: 0.3507 - val_loss: 1.4164 - val_acc: 0.5000
Epoch 2/15
0s - loss: 1.5005 - acc: 0.5202 - val_loss: 1.1665 - val_acc: 0.7143
Epoch 3/15
0s - loss: 1.2478 - acc: 0.5761 - val_loss: 0.9721 - val_acc: 0.6071
Epoch 4/15
0s - loss: 0.9833 - acc: 0.7013 - val_loss: 0.9844 - val_acc: 0.6786
Epoch 5/15
0s - loss: 0.8342 - acc: 0.7881 - val_loss: 0.8006 - val_acc: 0.7143
Epoch 6/15
0s - loss: 0.6924 - acc: 0.7938 - val_loss: 0.7636 - val_acc: 0.7857
Epoch 7/15
0s - loss: 0.5995 - acc: 0.8150 - val_loss: 0.7314 - val_acc: 0.7857
Epoch 8/15
0s - loss: 0.5575 - acc: 0.8401 - val_loss: 0.7293 - val_acc: 0.7500
Epoch 9/15
0s - loss: 0.5016 - acc: 0.8459 - val_loss: 0.7681 - val_acc: 0.7500
Epoch 10/15
0s - loss: 0.4837 - acc: 0.8709 - val_loss: 0.7424 - val_acc: 0.7500
Epoch 11/15
0s - loss: 0.4440 - acc: 0.8844 - val_loss: 0.7816 - val_acc: 0.7857
Epoch 12/15
0s - loss: 0.4613 - acc: 0.8902 - val_loss: 0.9298 - val_acc:

<keras.callbacks.History at 0x7f62851dcda0>

In [580]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model14.layers[1].get_weights())(inp)
out1 = Dropout(0.15)(out1)
out1 = Dense(9, activation='linear', bias=False,
            weights= unary_model14.layers[-1].get_weights())(out1) 

unary_model14b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model14b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func14(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model14b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [585]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 14


crf14 = CRF(feat_func14,1+9**2,9, 0, 'B')
crf_u14 = CRF(feat_func14,1+9**2,9, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf14.K-1), np.zeros(crf14.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc14 = 0

acc14_t1 = np.zeros(17)
acc14_u = np.zeros(17)
for n in range(17):
    acc14_t1[n] = 1 - hamming(crf14.MAP(X_ts14[n]), y_ts14[n])
    acc14_u[n] = 1 - hamming(crf_u14.MAP(X_ts14[n]), y_ts14[n])
print('Initial accuracy score Binary: ',100* acc14_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc14_u.mean(), '%.')
print('Zero-one-loss Unary: ', 100*acc14_u[acc14_u==1].sum()/17, '%')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr14.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr14.shape[0],batch_size):
        
        X_b, y_b = X_tr14[min_batch[k:k+batch_size]],y_tr14[min_batch[k:k+batch_size]]
        t = (X_tr14.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf14.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf14.W[1:] = crf14.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc14_test = np.zeros(17)
            
            for n in range(17):
                acc14_test[n] = 1 - hamming(crf14.MAP(X_ts14[n]), y_ts14[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc14_test.mean(), '%.')
            
            if best_acc14 < acc14_test.mean():
                best_acc14 = acc14_test.mean()
                print('Zero-one-loss: ', 100*acc14_test[acc14_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  70.6027385303 %.
Accuracy score Unary only:  79.6715793024 %.
Zero-one-loss Unary:  0.0 %
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  70.4583824602 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  71.2822079253 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  72.4427739653 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  73.0243798625 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  74.1084673437 %.
Zero-one-loss:  11.7647058824 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  74.2514056784 %.
Zero-one-loss:  11.7647058824 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  75.9292768269 %.
Zero-one-loss:  11.7647058824 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  75.9

In [586]:
# Data Extraction Data set 15

X_tr15, y_tr15 = [], []

for k in range(30):
    
    T = data15['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data15['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data15['BOF_tr_K'][0][k][t,:]
        lab[t] = data15['label_tr'][0][k][t,0]
    X_tr15.append(feat)
    y_tr15.append(lab)
    
X_tr15 = np.asarray(X_tr15, dtype='object')
y_tr15 = np.asarray(y_tr15, dtype='object')


# Data Extraction Testing

X_ts15, y_ts15 = [], []

for k in range(17):
    
    T = data15['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data15['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data15['BOF_te_K'][0][k][t,:]
        lab[t] = data15['label_te'][0][k][t,0]
    X_ts15.append(feat)
    y_ts15.append(lab)
    
X_ts15 = np.asarray(X_ts15, dtype='object')
y_ts15 = np.asarray(y_ts15, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr15[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr15[k].max())
for k in range(17):
    max_l = max(max_l, y_ts15[k].max())
print(max_l)


X_n15, y_n15 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr15[k].shape[0]):
        X_n15[l] = X_tr15[k][j]
        y_n15[l][y_tr15[k][j]] = 1
        l += 1

8


In [592]:
#Unary Neural Net dataset 15
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.35)(out1)
out1 = Dense(9, activation='softmax', bias=False)(out1) 

unary_model15 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model15.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [593]:
unary_model15.fit(X_n15,y_n15,batch_size=128,nb_epoch=15,validation_split=.05, verbose=2)

Train on 707 samples, validate on 38 samples
Epoch 1/15
0s - loss: 1.8414 - acc: 0.3352 - val_loss: 1.5960 - val_acc: 0.5000
Epoch 2/15
0s - loss: 1.1888 - acc: 0.6478 - val_loss: 1.4013 - val_acc: 0.5526
Epoch 3/15
0s - loss: 0.8557 - acc: 0.7553 - val_loss: 0.9763 - val_acc: 0.6316
Epoch 4/15
0s - loss: 0.6669 - acc: 0.8020 - val_loss: 0.9042 - val_acc: 0.6579
Epoch 5/15
0s - loss: 0.5936 - acc: 0.8416 - val_loss: 0.8835 - val_acc: 0.6579
Epoch 6/15
0s - loss: 0.5175 - acc: 0.8444 - val_loss: 0.9500 - val_acc: 0.6579
Epoch 7/15
0s - loss: 0.4869 - acc: 0.8769 - val_loss: 0.9673 - val_acc: 0.6842
Epoch 8/15
0s - loss: 0.4761 - acc: 0.8755 - val_loss: 0.7327 - val_acc: 0.7632
Epoch 9/15
0s - loss: 0.4292 - acc: 0.8854 - val_loss: 0.8557 - val_acc: 0.7368
Epoch 10/15
0s - loss: 0.4198 - acc: 0.8868 - val_loss: 0.8126 - val_acc: 0.7632
Epoch 11/15
0s - loss: 0.4152 - acc: 0.8911 - val_loss: 0.7538 - val_acc: 0.7632
Epoch 12/15
0s - loss: 0.3821 - acc: 0.9066 - val_loss: 0.8334 - val_acc:

<keras.callbacks.History at 0x7f62844c11d0>

In [594]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model15.layers[1].get_weights())(inp)
out1 = Dropout(0.35)(out1)
out1 = Dense(9, activation='linear', bias=False,
            weights= unary_model15.layers[-1].get_weights())(out1) 

unary_model15b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model15b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func15(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model15b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [595]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 14


crf15 = CRF(feat_func15,1+9**2,9, 0, 'B')
crf_u15 = CRF(feat_func15,1+9**2,9, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf15.K-1), np.zeros(crf15.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc15 = 0

acc15_t1 = np.zeros(17)
acc15_u = np.zeros(17)
for n in range(17):
    acc15_t1[n] = 1 - hamming(crf15.MAP(X_ts15[n]), y_ts15[n])
    acc15_u[n] = 1 - hamming(crf_u15.MAP(X_ts15[n]), y_ts15[n])
print('Initial accuracy score Binary: ',100* acc15_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc15_u.mean(), '%.')
print('Zero-one-loss Unary: ', 100*acc15_u[acc14_u==1].sum()/17, '%')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr15.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr15.shape[0],batch_size):
        
        X_b, y_b = X_tr15[min_batch[k:k+batch_size]],y_tr15[min_batch[k:k+batch_size]]
        t = (X_tr14.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf15.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf15.W[1:] = crf15.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc15_test = np.zeros(17)
            
            for n in range(17):
                acc15_test[n] = 1 - hamming(crf15.MAP(X_ts15[n]), y_ts15[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc15_test.mean(), '%.')
            
            if best_acc15 < acc15_test.mean():
                best_acc15 = acc15_test.mean()
                print('Zero-one-loss: ', 100*acc15_test[acc15_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  69.4455978133 %.
Accuracy score Unary only:  75.2605210397 %.
Zero-one-loss Unary:  0.0 %
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  70.2524007535 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  71.7300842007 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  74.307650664 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  75.2091603692 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  76.3968472043 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  77.351174407 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  77.7245759415 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  77.4335673605 %.
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Acc

In [596]:
# Data Extraction Data set 16

X_tr16, y_tr16 = [], []

for k in range(30):
    
    T = data16['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data16['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data16['BOF_tr_K'][0][k][t,:]
        lab[t] = data16['label_tr'][0][k][t,0]
    X_tr16.append(feat)
    y_tr16.append(lab)
    
X_tr16 = np.asarray(X_tr16, dtype='object')
y_tr16 = np.asarray(y_tr16, dtype='object')


# Data Extraction Testing

X_ts16, y_ts16 = [], []

for k in range(17):
    
    T = data16['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data16['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data16['BOF_te_K'][0][k][t,:]
        lab[t] = data16['label_te'][0][k][t,0]
    X_ts16.append(feat)
    y_ts16.append(lab)
    
X_ts16 = np.asarray(X_ts16, dtype='object')
y_ts16 = np.asarray(y_ts16, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr16[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr16[k].max())
for k in range(17):
    max_l = max(max_l, y_ts16[k].max())
print(max_l)


X_n16, y_n16 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr16[k].shape[0]):
        X_n16[l] = X_tr16[k][j]
        y_n16[l][y_tr16[k][j]] = 1
        l += 1

13


In [603]:
#Unary Neural Net dataset 16
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.40)(out1)
out1 = Dense(14, activation='softmax', bias=False)(out1) 

unary_model16 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model16.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [605]:
unary_model16.fit(X_n16,y_n16,batch_size=128,nb_epoch=10,validation_split=.05, verbose=2)

Train on 797 samples, validate on 42 samples
Epoch 1/10
0s - loss: 0.3574 - acc: 0.8984 - val_loss: 0.7826 - val_acc: 0.7619
Epoch 2/10
0s - loss: 0.3420 - acc: 0.9134 - val_loss: 0.7573 - val_acc: 0.7143
Epoch 3/10
0s - loss: 0.3112 - acc: 0.9159 - val_loss: 0.7961 - val_acc: 0.7381
Epoch 4/10
0s - loss: 0.2792 - acc: 0.9297 - val_loss: 0.7625 - val_acc: 0.7381
Epoch 5/10
0s - loss: 0.2837 - acc: 0.9260 - val_loss: 0.8701 - val_acc: 0.7143
Epoch 6/10
0s - loss: 0.2428 - acc: 0.9423 - val_loss: 0.8138 - val_acc: 0.7857
Epoch 7/10
0s - loss: 0.2623 - acc: 0.9385 - val_loss: 0.7710 - val_acc: 0.7619
Epoch 8/10
0s - loss: 0.2275 - acc: 0.9511 - val_loss: 0.9564 - val_acc: 0.7143
Epoch 9/10
0s - loss: 0.2098 - acc: 0.9460 - val_loss: 0.9023 - val_acc: 0.7619
Epoch 10/10
0s - loss: 0.2046 - acc: 0.9598 - val_loss: 0.7823 - val_acc: 0.7619


<keras.callbacks.History at 0x7f6283a74898>

In [606]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model16.layers[1].get_weights())(inp)
out1 = Dropout(0.40)(out1)
out1 = Dense(14, activation='linear', bias=False,
            weights= unary_model16.layers[-1].get_weights())(out1) 

unary_model16b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model16b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func16(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model16b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [607]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 16


crf16 = CRF(feat_func16,1+14**2,14, 0, 'B')
crf_u16 = CRF(feat_func16,1+14**2,14, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf16.K-1), np.zeros(crf16.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc16 = 0

acc16_t1 = np.zeros(17)
acc16_u = np.zeros(17)
for n in range(17):
    acc16_t1[n] = 1 - hamming(crf16.MAP(X_ts16[n]), y_ts16[n])
    acc16_u[n] = 1 - hamming(crf_u16.MAP(X_ts16[n]), y_ts16[n])
print('Initial accuracy score Binary: ',100* acc16_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc16_u.mean(), '%.')
print('Zero-one-loss Unary: ', 100*acc16_u[acc16_u==1].sum()/17, '%')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr16.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr16.shape[0],batch_size):
        
        X_b, y_b = X_tr16[min_batch[k:k+batch_size]],y_tr16[min_batch[k:k+batch_size]]
        t = (X_tr16.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf16.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf16.W[1:] = crf16.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc16_test = np.zeros(17)
            
            for n in range(17):
                acc16_test[n] = 1 - hamming(crf16.MAP(X_ts16[n]), y_ts16[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc16_test.mean(), '%.')
            
            if best_acc16 < acc16_test.mean():
                best_acc16 = acc16_test.mean()
                print('Zero-one-loss: ', 100*acc16_test[acc16_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  65.4286784413 %.
Accuracy score Unary only:  68.1657007768 %.
Zero-one-loss Unary:  0.0 %
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  65.7158686593 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  67.4601762245 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  68.3489180182 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  69.0193060706 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  69.7878531848 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  70.1498467919 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  69.672442949 %.
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  71.9618824599 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Ac

KeyboardInterrupt: 

In [608]:
# Data Extraction Data set 17

X_tr17, y_tr17 = [], []

for k in range(30):
    
    T = data17['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data17['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data17['BOF_tr_K'][0][k][t,:]
        lab[t] = data17['label_tr'][0][k][t,0]
    X_tr17.append(feat)
    y_tr17.append(lab)
    
X_tr17 = np.asarray(X_tr17, dtype='object')
y_tr17 = np.asarray(y_tr17, dtype='object')


# Data Extraction Testing

X_ts17, y_ts17 = [], []

for k in range(17):
    
    T = data17['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data17['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data17['BOF_te_K'][0][k][t,:]
        lab[t] = data17['label_te'][0][k][t,0]
    X_ts17.append(feat)
    y_ts17.append(lab)
    
X_ts17 = np.asarray(X_ts17, dtype='object')
y_ts17 = np.asarray(y_ts17, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr17[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr17[k].max())
for k in range(17):
    max_l = max(max_l, y_ts17[k].max())
print(max_l)


X_n17, y_n17 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr17[k].shape[0]):
        X_n17[l] = X_tr17[k][j]
        y_n17[l][y_tr17[k][j]] = 1
        l += 1

8


In [609]:
#Unary Neural Net dataset 17
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(9, activation='softmax', bias=False)(out1) 

unary_model17 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model17.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [612]:
unary_model17.fit(X_n17,y_n17,batch_size=128,nb_epoch=10,validation_split=.05, verbose=2)

Train on 880 samples, validate on 47 samples
Epoch 1/10
0s - loss: 0.3407 - acc: 0.9148 - val_loss: 0.7082 - val_acc: 0.7447
Epoch 2/10
0s - loss: 0.3209 - acc: 0.9250 - val_loss: 0.7192 - val_acc: 0.7447
Epoch 3/10
0s - loss: 0.3184 - acc: 0.9307 - val_loss: 0.6746 - val_acc: 0.7660
Epoch 4/10
0s - loss: 0.3131 - acc: 0.9318 - val_loss: 0.7668 - val_acc: 0.7447
Epoch 5/10
0s - loss: 0.3206 - acc: 0.9330 - val_loss: 0.6985 - val_acc: 0.7447
Epoch 6/10
0s - loss: 0.3058 - acc: 0.9227 - val_loss: 0.7776 - val_acc: 0.7447
Epoch 7/10
0s - loss: 0.2978 - acc: 0.9386 - val_loss: 0.6820 - val_acc: 0.7660
Epoch 8/10
0s - loss: 0.2892 - acc: 0.9307 - val_loss: 0.8635 - val_acc: 0.7447
Epoch 9/10
0s - loss: 0.3118 - acc: 0.9250 - val_loss: 0.7591 - val_acc: 0.7447
Epoch 10/10
0s - loss: 0.2897 - acc: 0.9330 - val_loss: 0.7198 - val_acc: 0.7660


<keras.callbacks.History at 0x7f628358b128>

In [613]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model17.layers[1].get_weights())(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(9, activation='linear', bias=False,
            weights= unary_model17.layers[-1].get_weights())(out1) 

unary_model17b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model17b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func17(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model17b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [614]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 17


crf17 = CRF(feat_func17,1+9**2,9, 0, 'B')
crf_u17 = CRF(feat_func17,1+9**2,9, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf17.K-1), np.zeros(crf17.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc17 = 0

acc17_t1 = np.zeros(17)
acc17_u = np.zeros(17)
for n in range(17):
    acc17_t1[n] = 1 - hamming(crf17.MAP(X_ts17[n]), y_ts17[n])
    acc17_u[n] = 1 - hamming(crf_u17.MAP(X_ts17[n]), y_ts17[n])
print('Initial accuracy score Binary: ',100* acc17_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc17_u.mean(), '%.')
print('Zero-one-loss Unary: ', 100*acc17_u[acc17_u==1].sum()/17, '%')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr17.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr17.shape[0],batch_size):
        
        X_b, y_b = X_tr17[min_batch[k:k+batch_size]],y_tr17[min_batch[k:k+batch_size]]
        t = (X_tr17.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf17.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf17.W[1:] = crf17.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc17_test = np.zeros(17)
            
            for n in range(17):
                acc17_test[n] = 1 - hamming(crf17.MAP(X_ts17[n]), y_ts17[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc17_test.mean(), '%.')
            
            if best_acc17 < acc17_test.mean():
                best_acc17 = acc17_test.mean()
                print('Zero-one-loss: ', 100*acc17_test[acc17_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  55.2974393272 %.
Accuracy score Unary only:  65.7816537461 %.
Zero-one-loss Unary:  0.0 %
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  59.2798587809 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  62.7439420119 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  63.5492089873 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  64.7568941809 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  65.2420180161 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  65.9020901217 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  66.2884966451 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  66.9101886267 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current 

In [615]:
# Data Extraction Data set 18

X_tr18, y_tr18 = [], []

for k in range(30):
    
    T = data18['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data18['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data18['BOF_tr_K'][0][k][t,:]
        lab[t] = data18['label_tr'][0][k][t,0]
    X_tr18.append(feat)
    y_tr18.append(lab)
    
X_tr18 = np.asarray(X_tr18, dtype='object')
y_tr18 = np.asarray(y_tr18, dtype='object')


# Data Extraction Testing

X_ts18, y_ts18 = [], []

for k in range(17):
    
    T = data18['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data18['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data18['BOF_te_K'][0][k][t,:]
        lab[t] = data18['label_te'][0][k][t,0]
    X_ts18.append(feat)
    y_ts18.append(lab)
    
X_ts18 = np.asarray(X_ts18, dtype='object')
y_ts18 = np.asarray(y_ts18, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr18[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr18[k].max())
for k in range(17):
    max_l = max(max_l, y_ts18[k].max())
print(max_l)


X_n18, y_n18 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr18[k].shape[0]):
        X_n18[l] = X_tr18[k][j]
        y_n18[l][y_tr18[k][j]] = 1
        l += 1

10


In [616]:
#Unary Neural Net dataset 18
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(11, activation='softmax', bias=False)(out1) 

unary_model18 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model18.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [621]:
unary_model18.fit(X_n18,y_n18,batch_size=128,nb_epoch=10,validation_split=.05, verbose=2)

Train on 561 samples, validate on 30 samples
Epoch 1/10
0s - loss: 0.1674 - acc: 0.9715 - val_loss: 1.0387 - val_acc: 0.7333
Epoch 2/10
0s - loss: 0.1734 - acc: 0.9697 - val_loss: 1.0699 - val_acc: 0.7667
Epoch 3/10
0s - loss: 0.1721 - acc: 0.9697 - val_loss: 0.9753 - val_acc: 0.7667
Epoch 4/10
0s - loss: 0.1857 - acc: 0.9661 - val_loss: 1.2478 - val_acc: 0.7333
Epoch 5/10
0s - loss: 0.1677 - acc: 0.9697 - val_loss: 1.0765 - val_acc: 0.7333
Epoch 6/10
0s - loss: 0.1686 - acc: 0.9679 - val_loss: 1.0514 - val_acc: 0.7333
Epoch 7/10
0s - loss: 0.1724 - acc: 0.9697 - val_loss: 1.0847 - val_acc: 0.7333
Epoch 8/10
0s - loss: 0.1666 - acc: 0.9679 - val_loss: 1.0502 - val_acc: 0.7667
Epoch 9/10
0s - loss: 0.1701 - acc: 0.9697 - val_loss: 1.1422 - val_acc: 0.7667
Epoch 10/10
0s - loss: 0.1732 - acc: 0.9679 - val_loss: 1.0268 - val_acc: 0.7000


<keras.callbacks.History at 0x7f6283017c88>

In [622]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model18.layers[1].get_weights())(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(11, activation='linear', bias=False,
            weights= unary_model18.layers[-1].get_weights())(out1) 

unary_model18b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model18b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func18(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model18b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [623]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 18


crf18 = CRF(feat_func18,1+11**2,11, 0, 'B')
crf_u18 = CRF(feat_func18,1+11**2,11, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf18.K-1), np.zeros(crf18.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc18 = 0

acc18_t1 = np.zeros(17)
acc18_u = np.zeros(17)
for n in range(17):
    acc18_t1[n] = 1 - hamming(crf18.MAP(X_ts18[n]), y_ts18[n])
    acc18_u[n] = 1 - hamming(crf_u18.MAP(X_ts18[n]), y_ts18[n])
print('Initial accuracy score Binary: ',100* acc18_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc18_u.mean(), '%.')
print('Zero-one-loss Unary: ', 100*acc18_u[acc18_u==1].sum()/17, '%')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr18.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr18.shape[0],batch_size):
        
        X_b, y_b = X_tr18[min_batch[k:k+batch_size]],y_tr18[min_batch[k:k+batch_size]]
        t = (X_tr18.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf18.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf18.W[1:] = crf18.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc18_test = np.zeros(17)
            
            for n in range(17):
                acc18_test[n] = 1 - hamming(crf18.MAP(X_ts18[n]), y_ts18[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc18_test.mean(), '%.')
            
            if best_acc18 < acc18_test.mean():
                best_acc18 = acc18_test.mean()
                print('Zero-one-loss: ', 100*acc18_test[acc18_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  69.3901327097 %.
Accuracy score Unary only:  74.0365061781 %.
Zero-one-loss Unary:  0.0 %
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  71.9122395916 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  71.5662188304 %.
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  72.0564149088 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  72.7484564313 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  72.4024356701 %.
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  71.814200376 %.
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  71.814200376 %.
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  72.32361983 %.
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Accuracy:  72.32361983 %.
Epoch/Iteration:  4 / 18 . Current Average Test Hamming Accuracy:  

In [624]:
# Data Extraction Data set 19

X_tr19, y_tr19 = [], []

for k in range(30):
    
    T = data19['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data19['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data19['BOF_tr_K'][0][k][t,:]
        lab[t] = data19['label_tr'][0][k][t,0]
    X_tr19.append(feat)
    y_tr19.append(lab)
    
X_tr19 = np.asarray(X_tr19, dtype='object')
y_tr19 = np.asarray(y_tr19, dtype='object')


# Data Extraction Testing

X_ts19, y_ts19 = [], []

for k in range(17):
    
    T = data19['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data19['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data19['BOF_te_K'][0][k][t,:]
        lab[t] = data19['label_te'][0][k][t,0]
    X_ts19.append(feat)
    y_ts19.append(lab)
    
X_ts19 = np.asarray(X_ts19, dtype='object')
y_ts19 = np.asarray(y_ts19, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr19[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr19[k].max())
for k in range(17):
    max_l = max(max_l, y_ts19[k].max())
print(max_l)


X_n19, y_n19 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr19[k].shape[0]):
        X_n19[l] = X_tr19[k][j]
        y_n19[l][y_tr19[k][j]] = 1
        l += 1

9


In [629]:
#Unary Neural Net dataset 19
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(256, activation='relu')(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(10, activation='softmax', bias=False)(out1) 

unary_model19 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model19.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [630]:
unary_model19.fit(X_n19,y_n19,batch_size=128,nb_epoch=12,validation_split=.05, verbose=2)

Train on 917 samples, validate on 49 samples
Epoch 1/12
0s - loss: 1.8860 - acc: 0.3871 - val_loss: 1.6566 - val_acc: 0.5102
Epoch 2/12
0s - loss: 1.4009 - acc: 0.5703 - val_loss: 1.2791 - val_acc: 0.5714
Epoch 3/12
0s - loss: 1.0432 - acc: 0.6848 - val_loss: 0.9339 - val_acc: 0.6735
Epoch 4/12
0s - loss: 0.8388 - acc: 0.7503 - val_loss: 0.6305 - val_acc: 0.7959
Epoch 5/12
0s - loss: 0.6999 - acc: 0.7841 - val_loss: 0.7319 - val_acc: 0.7347
Epoch 6/12
0s - loss: 0.6064 - acc: 0.8201 - val_loss: 0.4901 - val_acc: 0.8163
Epoch 7/12
0s - loss: 0.5481 - acc: 0.8353 - val_loss: 0.6108 - val_acc: 0.8163
Epoch 8/12
0s - loss: 0.4902 - acc: 0.8462 - val_loss: 0.5369 - val_acc: 0.8163
Epoch 9/12
0s - loss: 0.4419 - acc: 0.8811 - val_loss: 0.5942 - val_acc: 0.7959
Epoch 10/12
0s - loss: 0.4372 - acc: 0.8735 - val_loss: 0.5620 - val_acc: 0.7959
Epoch 11/12
0s - loss: 0.4207 - acc: 0.8757 - val_loss: 0.5292 - val_acc: 0.8367
Epoch 12/12
0s - loss: 0.3766 - acc: 0.8953 - val_loss: 0.5242 - val_acc:

<keras.callbacks.History at 0x7f62826fee80>

In [631]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(256, activation='relu',
            weights= unary_model19.layers[1].get_weights())(inp)
out1 = Dropout(0.1)(out1)
out1 = Dense(10, activation='linear', bias=False,
            weights= unary_model19.layers[-1].get_weights())(out1) 

unary_model19b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model19b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func19(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model19b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [632]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 19


crf19 = CRF(feat_func19,1+10**2,10, 0, 'B')
crf_u19 = CRF(feat_func19,1+10**2,10, 1e-7, 'U')
alpha = 0.099
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf19.K-1), np.zeros(crf19.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc19 = 0

acc19_t1 = np.zeros(17)
acc19_u = np.zeros(17)
for n in range(17):
    acc19_t1[n] = 1 - hamming(crf19.MAP(X_ts19[n]), y_ts19[n])
    acc19_u[n] = 1 - hamming(crf_u19.MAP(X_ts19[n]), y_ts19[n])
print('Initial accuracy score Binary: ',100* acc19_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc19_u.mean(), '%.')
print('Zero-one-loss Unary: ', 100*acc19_u[acc19_u==1].sum()/17, '%')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr19.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr19.shape[0],batch_size):
        
        X_b, y_b = X_tr19[min_batch[k:k+batch_size]],y_tr19[min_batch[k:k+batch_size]]
        t = (X_tr19.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf19.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf19.W[1:] = crf19.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc19_test = np.zeros(17)
            
            for n in range(17):
                acc19_test[n] = 1 - hamming(crf19.MAP(X_ts19[n]), y_ts19[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc19_test.mean(), '%.')
            
            if best_acc19 < acc19_test.mean():
                best_acc19 = acc19_test.mean()
                print('Zero-one-loss: ', 100*acc19_test[acc19_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  74.9779499692 %.
Accuracy score Unary only:  78.0944515209 %.
Zero-one-loss Unary:  5.88235294118 %
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  75.3768271871 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  75.9671035254 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  78.5249693708 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  78.393077082 %.
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  79.2693855001 %.
Zero-one-loss:  5.88235294118 %
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  82.0772453525 %.
Zero-one-loss:  17.6470588235 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  81.6882476482 %.
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  82.4264252722 %.
Zero-one-loss:  17.6470588235 %
Epoch/Iter

In [633]:
# Data Extraction Data set 20

X_tr20, y_tr20 = [], []

for k in range(30):
    
    T = data20['BOF_tr_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data20['BOF_tr_M'][0][k][t,:]
        feat[t,30:] = data20['BOF_tr_K'][0][k][t,:]
        lab[t] = data20['label_tr'][0][k][t,0]
    X_tr20.append(feat)
    y_tr20.append(lab)
    
X_tr20 = np.asarray(X_tr20, dtype='object')
y_tr20 = np.asarray(y_tr20, dtype='object')


# Data Extraction Testing

X_ts20, y_ts20 = [], []

for k in range(17):
    
    T = data20['BOF_te_M'][0][k].shape[0]
    feat = np.zeros((T,60))
    lab = np.zeros(T, dtype='i4')
   
    for t in range(T):
        feat[t,:30] = data20['BOF_te_M'][0][k][t,:]
        feat[t,30:] = data20['BOF_te_K'][0][k][t,:]
        lab[t] = data20['label_te'][0][k][t,0]
    X_ts20.append(feat)
    y_ts20.append(lab)
    
X_ts20 = np.asarray(X_ts20, dtype='object')
y_ts20 = np.asarray(y_ts20, dtype='object')

num_node = 0
for k in range(30):
    for j in range(y_tr20[k].shape[0]):
        num_node += 1
        
max_l = 0 
for k in range(30):
     max_l = max(max_l, y_tr20[k].max())
for k in range(17):
    max_l = max(max_l, y_ts20[k].max())
print(max_l)


X_n20, y_n20 = np.zeros((num_node,60)), np.zeros((num_node,max_l+1), dtype='i4')

l = 0
for k in range(30):
    for j in range(X_tr20[k].shape[0]):
        X_n20[l] = X_tr20[k][j]
        y_n20[l][y_tr20[k][j]] = 1
        l += 1

9


In [659]:
#Unary Neural Net dataset 20
inp = Input((60,), batch_shape=(128,60))

out1 = Dense(512, activation='relu')(inp)
out1 = Dropout(0.35)(out1)
out1 = Dense(10, activation='softmax', bias=False)(out1) 

unary_model20 = Model(inp,out1)

sgd = adam(0.009,epsilon=1e-8)

unary_model20.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [660]:
unary_model20.fit(X_n20,y_n20,batch_size=128,nb_epoch=10,validation_split=.05, verbose=2)

Train on 477 samples, validate on 26 samples
Epoch 1/10
0s - loss: 1.9421 - acc: 0.3962 - val_loss: 2.1142 - val_acc: 0.4231
Epoch 2/10
0s - loss: 1.4534 - acc: 0.5514 - val_loss: 1.7871 - val_acc: 0.4615
Epoch 3/10
0s - loss: 1.1779 - acc: 0.6709 - val_loss: 1.8439 - val_acc: 0.5000
Epoch 4/10
0s - loss: 1.0290 - acc: 0.6897 - val_loss: 1.6332 - val_acc: 0.6154
Epoch 5/10
0s - loss: 0.9016 - acc: 0.7505 - val_loss: 2.0020 - val_acc: 0.5385
Epoch 6/10
0s - loss: 0.8354 - acc: 0.7841 - val_loss: 2.0479 - val_acc: 0.5769
Epoch 7/10
0s - loss: 0.7604 - acc: 0.7966 - val_loss: 1.8765 - val_acc: 0.6154
Epoch 8/10
0s - loss: 0.6991 - acc: 0.7925 - val_loss: 2.1036 - val_acc: 0.6154
Epoch 9/10
0s - loss: 0.6699 - acc: 0.8260 - val_loss: 1.9409 - val_acc: 0.7308
Epoch 10/10
0s - loss: 0.6438 - acc: 0.8155 - val_loss: 1.9129 - val_acc: 0.6923


<keras.callbacks.History at 0x7f62813e19e8>

In [661]:
inp = Input((60,), batch_shape=(1,60))

out1 = Dense(512, activation='relu',
            weights= unary_model20.layers[1].get_weights())(inp)
out1 = Dropout(0.35)(out1)
out1 = Dense(10, activation='linear', bias=False,
            weights= unary_model20.layers[-1].get_weights())(out1) 

unary_model20b = Model(inp,out1)

sgd = adam(0.007,epsilon=1e-7)

unary_model20b.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

def feat_func20(i,j,x,t,N,L):
    
    binary_chi = np.zeros(L**2)
    #unary_chi = np.zeros(N*L)
    #unary_chi[i*N:(i+1)*N] = x[t]
    unary_chi = unary_model20b.predict(x[t].reshape((1,60)))[0,i]
    
    if t > 0:
    
        binary_chi[(L*i+j)] = 1
        
    
    
    output = np.zeros(1 + L**2)
    output[0] = unary_chi
    output[1:] = binary_chi
    
    return output

In [663]:
# Adam stochastic mini-batch gradient descent on the (negative) pseudo-log-liklelihood.
# Dataset 20


crf20 = CRF(feat_func20,1+10**2,10, 0, 'B')
crf_u20 = CRF(feat_func20,1+10**2,10, 1e-7, 'U')
alpha = 0.11
B1, B2 = 0.9, 0.999
eps = 1e-7
m, v = np.zeros(crf20.K-1), np.zeros(crf20.K-1)
num_epochs = 20
batch_size = 5
epoch = 1
best_acc20 = 0

acc20_t1 = np.zeros(17)
acc20_u = np.zeros(17)
for n in range(17):
    acc20_t1[n] = 1 - hamming(crf20.MAP(X_ts20[n]), y_ts20[n])
    acc20_u[n] = 1 - hamming(crf_u20.MAP(X_ts20[n]), y_ts20[n])
print('Initial accuracy score Binary: ',100* acc20_t1.mean(), '%.')
print('Accuracy score Unary only: ',100* acc20_u.mean(), '%.')
print('Zero-one-loss Unary: ', 100*acc20_u[acc20_u==1].sum()/17, '%')

while epoch <= num_epochs:
    
    min_batch = np.arange(X_tr20.shape[0])
    np.random.shuffle(min_batch)
    
    for k in range(0, X_tr20.shape[0],batch_size):
        
        X_b, y_b = X_tr20[min_batch[k:k+batch_size]],y_tr20[min_batch[k:k+batch_size]]
        t = (X_tr20.shape[0]//batch_size)*(epoch-1) + (k // batch_size) + 1
        g = crf20.grad_pll(X_b, y_b)
        m = B1 * m + (1-B1) * g
        v = B2 * v + np.square(np.sqrt(1-B2) * g)
        m_b = m / (1 - B1**t)
        v_b = v / (1 - B2**t)
        crf20.W[1:] = crf20.W[1:] - (alpha * m_b) / (np.sqrt(v_b) + eps)
        
        if (k // batch_size) % 2 == 0:
            
            acc20_test = np.zeros(17)
            
            for n in range(17):
                acc20_test[n] = 1 - hamming(crf20.MAP(X_ts20[n]), y_ts20[n])
            print('Epoch/Iteration: ', epoch, '/', t-1, '. Current Average Test Hamming Accuracy: ',
                  100* acc20_test.mean(), '%.')
            
            if best_acc20 < acc20_test.mean():
                best_acc20 = acc20_test.mean()
                print('Zero-one-loss: ', 100*acc20_test[acc20_test==1].sum()/17, '%')
    epoch += 1

Initial accuracy score Binary:  55.7117504951 %.
Accuracy score Unary only:  71.6397770731 %.
Zero-one-loss Unary:  0.0 %
Epoch/Iteration:  1 / 0 . Current Average Test Hamming Accuracy:  58.0265168323 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 2 . Current Average Test Hamming Accuracy:  67.8851617948 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  1 / 4 . Current Average Test Hamming Accuracy:  70.4309094556 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 6 . Current Average Test Hamming Accuracy:  73.1396945489 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  2 / 8 . Current Average Test Hamming Accuracy:  72.963637619 %.
Epoch/Iteration:  2 / 10 . Current Average Test Hamming Accuracy:  74.1700667147 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 12 . Current Average Test Hamming Accuracy:  74.7411162367 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 14 . Current Average Test Hamming Accuracy:  75.0070566049 %.
Zero-one-loss:  0.0 %
Epoch/Iteration:  3 / 16 . Current Average Test Hamming Ac

In [674]:
train_x = [X_n, X_n2, X_n3, X_n4, X_n5, X_n6, X_n7, X_n8, X_n9, X_n10, X_n11,
          X_n12, X_n13, X_n14, X_n15, X_n16, X_n17, X_n18, X_n19, X_n20]

train_ye = [y_n, y_n2,y_n3,y_n4,y_n5,y_n6,y_n7,y_n8,y_n9,y_n10,y_n11,y_n12,y_n13,
           y_n14,y_n15,y_n16,y_n17,y_n18,y_n19,y_n20]

train_y = []

for Y in train_ye:
    
    Y_n = np.zeros(Y.shape[0], dtype='i4')
    for k in range(Y.shape[0]):
        Y_n[k] = Y[k].argmax()
        
    train_y.append(Y_n)

In [681]:
test_x = [X_ts, X_ts2, X_ts3, X_ts4, X_ts5, X_ts6, X_ts7, X_ts8, X_ts9, X_ts10, X_ts11, X_ts12, X_ts13, 
          X_ts14, X_ts15, X_ts16, X_ts17, X_ts18, X_ts19, X_ts20]

test_y = [y_ts, y_ts2, y_ts3, y_ts4, y_ts5, y_ts6, y_ts7, y_ts8, y_ts9, y_ts10, y_ts11, 
          y_ts12, y_ts13, y_ts14, y_ts15, y_ts16, y_ts17, y_ts18, y_ts19, y_ts20]

In [677]:
import sklearn
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [687]:
hamm_svc = np.zeros(20)
zo_svc = np.zeros(20)

for k in range(20):
    clf_s = SVC()
    clf_s.fit(train_x[k],train_y[k])
    
    ham_t = np.zeros(test_y[k].shape[0])
    for j in range(test_y[k].shape[0]):
        ham_t[j] = accuracy_score(test_y[k][j], clf_s.predict(test_x[k][j]))
    
    hamm_svc[k] = ham_t.mean()
    zo_svc[k] = ham_t[ham_t==1].sum()/ham_t.shape[0]

In [690]:
hamm_svc_r = np.zeros(20)

for k in range(20):
    hamm_svc_r[k] = round(hamm_svc[k],3)


In [691]:
hamm_svc_r

array([ 0.392,  0.464,  0.346,  0.342,  0.237,  0.289,  0.258,  0.289,
        0.45 ,  0.299,  0.441,  0.511,  0.49 ,  0.49 ,  0.346,  0.372,
        0.44 ,  0.493,  0.476,  0.478])

In [695]:
u_h = [0.776, 0.739, 0.552, 0.801, 0.731, 0.649, 0.731, 0.633, 0.813, 0.573,
       0.662, 0.805, 0.769, 0.797, 0.753, 0.682, 0.658, 0.740, 0.781, 0.716]

u_z = np.zeros(20)
u_z[12] = 0.059
u_z[-2] = 0.059

b_z = np.zeros(20)
b_z[1] = 0.059
b_z[2] = 0.059
b_z[11] = 0.294
b_z[13] = 0.118
b_z[-2] = 0.176

b_h = [0.872, 0.782, 0.623, 0.856, 0.773, 0.680, 0.779, 0.648, 0.879, 0.686, 0.719,
       0.847, 0.816, 0.789, 0.828, 0.736, 0.706, 0.769, 0.840, 0.767]

In [698]:
import pandas as pd
def highlight_max(s):
    is_max = s == s.max()
    return ['color: red' if v else '' for v in is_max]

In [726]:
def get_res(k):
    hamm = [round(hamm_svc_r.mean(),3), round(array(u_h).mean(),3), round(array(b_h).mean(),3)]
    zo = [round(zo_svc.mean(),3), round(u_z.mean(),3), round(b_z.mean(),3)]
    models = ['SVC', 'Unary only', 'Unary and Binary']
    met = ['Average Hamming', 'Zero-One']
    res_df = pd.DataFrame(hamm)
    res_df.columns = ['Average Hamming:']
    res_df['Zero-One:'] = zo
    res_df.columns.name = 'Model:'
    return res_df

In [727]:
get_res(19).style.apply(highlight_max)

Model:,Average Hamming:,Zero-One:
0,0.395,0.0
1,0.718,0.006
2,0.77,0.035
