In [1]:
import numpy as np
import tensorflow as tf
from keras.datasets import mnist, fashion_mnist
import time
tf.logging.set_verbosity(tf.logging.WARN)

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [None]:
import numpy as np
anp.unique(np.array([1,2,3,1,5,23]))


# Self-Coded Setup

In [2]:
def get_save_path(i):
    return '/tmp/boostmodels' + str(i)

def get_scope_variable(scope, var, shape=None, initializer=None):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        v = tf.get_variable(var, shape=shape, initializer=initializer)
    return v

In [3]:
class MulticlassClassifier(object):
    
    def __init__(self, x, y, num_features, num_classes, lr, reg, scope=""):
        """ init the model with hyper-parameters etc """
        self.x = x
        self.y = y
        self.num_features = num_features
        self.num_classes = num_classes
        
        initializer = tf.contrib.layers.xavier_initializer(uniform=False)
        self.W1 = get_scope_variable(scope=scope, var="W1", shape=[784, num_features], initializer=initializer)
        self.W2 = get_scope_variable(scope=scope, var="W2", shape=[num_features, num_classes], initializer=initializer)
        self.b1 = get_scope_variable(scope=scope, var="b1", shape=[num_features], initializer=initializer)
        self.b2 = get_scope_variable(scope=scope, var="b2", shape=[num_classes], initializer=initializer)

        scores = tf.matmul(tf.nn.relu(tf.matmul(tf.layers.batch_normalization(self.x), self.W1) + self.b1), self.W2) + self.b2
        self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=scores)) + reg * (tf.nn.l2_loss(self.W1) + tf.nn.l2_loss(self.W2))

        self.predictions = tf.argmax(scores, axis=1)        
        self.acc = tf.reduce_mean(tf.cast(tf.equal(self.predictions, self.y), tf.float32))
        self.incorrect = tf.not_equal(self.predictions, self.y)

        self.metrics = self.loss, self.acc
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimize = tf.train.AdamOptimizer(lr).minimize(self.loss)

In [4]:
def train_classifiers(data, iterations):
    
    (x_train, y_train), (x_test, y_test) = data
    probs = np.ones(60000)/60000
    alpha = np.zeros(iterations)    

    for i in np.arange(iterations):
        
        tf.reset_default_graph()
        
        print("trial", i, "probability distribution:", "%0.2f "*10 % tuple(np.bincount(y_train, weights=probs)))
        x = tf.placeholder(tf.float32, shape=[None, 784])
        y = tf.placeholder(tf.int64, shape=[None])
        c = MulticlassClassifier(x=x, y=y, num_features=128, num_classes=10, lr=0.001, reg=0.005, scope="boosting")
        s = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "boosting"))
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            print("begin training classifier")
            train_time = time.time()
            for step in range(2001):
                indices = np.random.choice(60000, 1000, p=probs)
                x_ = x_train[indices]
                y_ = y_train[indices]
                sess.run(c.optimize, feed_dict={x:x_, y:y_})
                if step % 1000 == 0:
                    train_loss, train_acc = sess.run([c.loss, c.acc], feed_dict={x:x_, y:y_})
                    print("\tepoch %d: train loss %g, train error %g"%(step/60, train_loss, 1 - train_acc))  
            train_time = time.time() - train_time
            print("end training classifier // time elapsed: %.4f s"%(train_time))

            eval_train_time = time.time()
            incorrect = sess.run(c.incorrect, feed_dict={x:x_train, y:y_train})
            correct = incorrect * 2 - 1
            train_error = np.sum(probs[incorrect.astype(bool)])
            eval_train_time = time.time() - eval_train_time
            print("train set error: %.4f // time elapsed: %.4f s"%(train_error, eval_train_time))   

            if train_error < 0.5:
                s.save(sess=sess, save_path=get_save_path(i))
            else:
                return (i-1), alpha

            eval_test_time = time.time()
            test_error = 1 - sess.run(c.acc, feed_dict={x:x_test, y:y_test})
            eval_test_time = time.time() - eval_test_time
            print("test set error: %.4f // time elapsed: %.4f s"%(test_error, eval_test_time)) 

            alpha[i] = 0.5 * np.log((1 - train_error)/train_error)
            probs = probs * np.exp(alpha[i]*correct)
            probs /= np.sum(probs)
            
    return iterations, alpha

In [10]:
def eval_classifiers(data, iterations, alpha): 
    
    (x_train, y_train), (x_test, y_test) = data
    scores = np.zeros((10000, 10))
    
    for i in np.arange(iterations):
        
        tf.reset_default_graph()

        x = tf.placeholder(tf.float32, shape=[None, 784])
        y = tf.placeholder(tf.int64, shape=[None])
        c = MulticlassClassifier(x=x, y=y, num_features=128, num_classes=10, lr=0.001, reg=0.001, scope="boosting")

        s = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "boosting"))
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            s.restore(sess, get_save_path(i))
            predictions = sess.run(c.predictions, feed_dict={x:x_test, y:y_test})
            scores[np.arange(10000), predictions] += alpha[i]
            
        if i % 25 == 0:
            print("iterations:", i, "| boosted accuracy:", np.mean(np.equal(np.argmax(scores, axis=1), y_test)))
            
    print("iterations:", iterations, "| boosted accuracy:", np.mean(np.equal(np.argmax(scores, axis=1), y_test)))

In [7]:
def boost(data, iterations):
    iterations, alpha = train_classifiers(data, iterations)
    eval_classifiers(data, iterations, alpha)

# MNIST Results

In [15]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape((60000, 28*28))
x_test = x_test.reshape((10000, 28*28))
data = (x_train, y_train), (x_test, y_test)

In [16]:
boost(data, 50)

trial 0 probability distribution: 0.10 0.11 0.10 0.10 0.10 0.09 0.10 0.10 0.10 0.10 
begin training classifier
	epoch 0: train loss 109.816, train error 0.907
	epoch 16: train loss 0.396871, train error 0.011
	epoch 33: train loss 0.255341, train error 0.000999987
end training classifier // time elapsed: 16.9397 s
train set error: 0.0053 // time elapsed: 0.1555 s
test set error: 0.0494 // time elapsed: 0.0330 s
trial 1 probability distribution: 0.10 0.09 0.13 0.08 0.10 0.13 0.10 0.08 0.08 0.10 
begin training classifier
	epoch 0: train loss 104.364, train error 0.916
	epoch 16: train loss 0.558219, train error 0.014
	epoch 33: train loss 0.267767, train error 0.005
end training classifier // time elapsed: 16.2604 s
train set error: 0.0059 // time elapsed: 0.1511 s
test set error: 0.0457 // time elapsed: 0.0285 s
trial 2 probability distribution: 0.08 0.07 0.12 0.14 0.10 0.10 0.09 0.12 0.10 0.09 
begin training classifier
	epoch 0: train loss 118.89, train error 0.905
	epoch 16: train l

test set error: 0.0962 // time elapsed: 0.0269 s
trial 20 probability distribution: 0.05 0.13 0.14 0.12 0.07 0.08 0.05 0.07 0.10 0.18 
begin training classifier
	epoch 0: train loss 118.808, train error 0.897
	epoch 16: train loss 0.43453, train error 0.035
	epoch 33: train loss 0.307091, train error 0.016
end training classifier // time elapsed: 16.4484 s
train set error: 0.0188 // time elapsed: 0.1722 s
test set error: 0.1631 // time elapsed: 0.0370 s
trial 21 probability distribution: 0.10 0.09 0.11 0.11 0.07 0.11 0.07 0.06 0.13 0.14 
begin training classifier
	epoch 0: train loss 158.764, train error 0.927
	epoch 16: train loss 0.365475, train error 0.000999987
	epoch 33: train loss 0.272354, train error 0
end training classifier // time elapsed: 16.1903 s
train set error: 0.0059 // time elapsed: 0.1553 s
test set error: 0.1015 // time elapsed: 0.0309 s
trial 22 probability distribution: 0.07 0.12 0.08 0.11 0.11 0.13 0.09 0.08 0.07 0.15 
begin training classifier
	epoch 0: train lo

test set error: 0.0758 // time elapsed: 0.0284 s
trial 40 probability distribution: 0.05 0.07 0.11 0.14 0.07 0.10 0.08 0.07 0.12 0.20 
begin training classifier
	epoch 0: train loss 112.891, train error 0.87
	epoch 16: train loss 0.413676, train error 0.009
	epoch 33: train loss 0.279347, train error 0.005
end training classifier // time elapsed: 16.5360 s
train set error: 0.0071 // time elapsed: 0.1401 s
test set error: 0.0928 // time elapsed: 0.0271 s
trial 41 probability distribution: 0.04 0.04 0.07 0.13 0.26 0.08 0.06 0.09 0.09 0.14 
begin training classifier
	epoch 0: train loss 96.5812, train error 0.801
	epoch 16: train loss 0.337813, train error 0.000999987
	epoch 33: train loss 0.253911, train error 0.000999987
end training classifier // time elapsed: 15.4321 s
train set error: 0.0041 // time elapsed: 0.1348 s
test set error: 0.1050 // time elapsed: 0.0253 s
trial 42 probability distribution: 0.10 0.04 0.08 0.09 0.15 0.14 0.05 0.08 0.13 0.13 
begin training classifier
	epoch 0

In [None]:
boost(data, 300)

iterations: 50 | boosted accuracy: 0.9804
iterations: 100 | boosted accuracy: 0.9811


# Fashion MNIST Results

In [8]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = x_train.reshape((60000, 28*28))
x_test = x_test.reshape((10000, 28*28))
data = (x_train, y_train), (x_test, y_test)

In [9]:
boost(data, 50)

trial 0 probability distribution: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
begin training classifier
	epoch 0: train loss 129.521, train error 0.894
	epoch 16: train loss 0.97251, train error 0.149
	epoch 33: train loss 0.689323, train error 0.1
end training classifier // time elapsed: 18.5972 s
train set error: 0.1157 // time elapsed: 0.1771 s
test set error: 0.1621 // time elapsed: 0.0384 s
trial 1 probability distribution: 0.11 0.06 0.10 0.09 0.15 0.06 0.21 0.07 0.08 0.07 
begin training classifier
	epoch 0: train loss 118.458, train error 0.897
	epoch 16: train loss 1.56991, train error 0.275
	epoch 33: train loss 1.07945, train error 0.248
end training classifier // time elapsed: 17.0630 s
train set error: 0.2406 // time elapsed: 0.1915 s
test set error: 0.2039 // time elapsed: 0.0314 s
trial 2 probability distribution: 0.12 0.05 0.14 0.10 0.13 0.04 0.26 0.06 0.06 0.05 
begin training classifier
	epoch 0: train loss 205.89, train error 0.855
	epoch 16: train loss 2.08148

# Keras Setup

In [2]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = x_train.reshape((60000, 28, 28, 1))
x_test = x_test.reshape((10000, 28, 28, 1))
data = (x_train, y_train), (x_test, y_test)

In [3]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten
from keras import regularizers

In [4]:
def train_classifier(x, y, x_train, y_train, i):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=3, strides=1, activation='relu', input_shape=(28, 28, 1)))
    model.add(Conv2D(64, kernel_size=3, strides=1, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Conv2D(64, kernel_size=3, strides=1, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dense(10, activation='softmax', kernel_regularizer=regularizers.l2(0.01)))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(x, y, epochs=10, batch_size=128)
    loss, acc = model.evaluate(x, y, batch_size=128)
    predictions = np.argmax(model.predict(x_train, batch_size=128), axis=1)
    model.save('/tmp/my_model' + str(i) + '.h5')  # creates a HDF5 file 'my_model.h5'
    del model  # deletes the existing model
    return (1-acc), predictions != y_train

In [5]:
def train_classifiers(data, iterations):
    
    (x_train, y_train), (x_test, y_test) = data
    probs = np.ones(60000)/60000
    alpha = np.zeros(iterations)

    for i in np.arange(iterations):
        print("trial", i, "probability distribution:", "%0.2f "*10 % tuple(np.bincount(y_train, weights=probs)))
        
        indices = np.random.choice(60000, 60000, p=probs)
        err, incorrect = train_classifier(x_train[indices], y_train[indices], x_train, y_train, i)
        correct = incorrect * 2 - 1
        train_error = np.sum(probs[incorrect.astype(bool)])
        print("train set error: %.4f"%(train_error))   

        if train_error >= 0.5:
            return (i-1), alpha

        alpha[i] = 0.5 * np.log((1 - train_error)/train_error)
        probs = probs * np.exp(alpha[i]*correct)
        probs /= np.sum(probs)
            
    return iterations, alpha

In [6]:
def eval_classifiers(data, iterations, alpha): 
    
    (x_train, y_train), (x_test, y_test) = data
    scores = np.zeros((10000, 10))
    
    for i in np.arange(iterations):
        model = load_model('/tmp/my_model' + str(i) + '.h5')
        predictions = np.argmax(model.predict(x_test, batch_size=128), axis=1)
        scores[np.arange(10000), predictions] += alpha[i]
            
        if i % 25 == 0:
            print("iterations:", i, "| boosted accuracy:", np.mean(np.equal(np.argmax(scores, axis=1), y_test)))
            
    print("iterations:", iterations, "| boosted accuracy:", np.mean(np.equal(np.argmax(scores, axis=1), y_test)))
        
    return scores

In [7]:
def boost(data, iterations):
    iterations, alpha = train_classifiers(data, iterations)
    eval_classifiers(data, iterations, alpha)
    return alpha

In [8]:
alpha = boost(data, 100)

trial 0 probability distribution: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0752
trial 1 probability distribution: 0.16 0.06 0.17 0.09 0.12 0.07 0.13 0.06 0.07 0.07 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0723
trial 2 probability distribution: 0.13 0.04 0.28 0.10 0.11 0.05 0.13 0.04 0.04 0.07 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.1333
trial 3 probability distribution: 0.12 0.03 0.19 0.10 0.13 0.03 0.27 0.05 0.03 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0641
trial 4 probability distribution: 0.21 0.03 0.16 0.09 0.10 0.02 0.25 0.04 0.02 0.07 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Ep

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0769
trial 8 probability distribution: 0.12 0.02 0.21 0.14 0.13 0.01 0.24 0.06 0.02 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0530
trial 9 probability distribution: 0.15 0.02 0.16 0.12 0.15 0.04 0.22 0.04 0.03 0.06 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0642
trial 10 probability distribution: 0.18 0.02 0.11 0.12 0.18 0.03 0.22 0.07 0.03 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.1023
trial 11 probability distribution: 0.18 0.01 0.15 0.12 0.16 0.02 0.23 0.05 0.02 0.06 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train s

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0689
trial 15 probability distribution: 0.13 0.02 0.15 0.08 0.21 0.03 0.26 0.06 0.03 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0797
trial 16 probability distribution: 0.16 0.02 0.13 0.09 0.20 0.02 0.24 0.05 0.03 0.06 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0611
trial 17 probability distribution: 0.15 0.02 0.13 0.09 0.20 0.03 0.26 0.03 0.02 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0694
trial 18 probability distribution: 0.16 0.02 0.16 0.09 0.16 0.03 0.27 0.06 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0625
trial 22 probability distribution: 0.10 0.02 0.14 0.11 0.10 0.05 0.35 0.04 0.04 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0675
trial 23 probability distribution: 0.13 0.02 0.16 0.10 0.18 0.03 0.26 0.04 0.04 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0891
trial 24 probability distribution: 0.11 0.02 0.17 0.14 0.17 0.03 0.26 0.04 0.03 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0545
trial 25 probability distribution: 0.13 0.02 0.13 0.14 0.20 0.02 0.24 0.04 0.03 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0762
trial 29 probability distribution: 0.18 0.02 0.22 0.12 0.14 0.02 0.19 0.05 0.03 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0710
trial 30 probability distribution: 0.18 0.02 0.17 0.11 0.15 0.03 0.23 0.04 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.1166
trial 31 probability distribution: 0.13 0.01 0.13 0.09 0.24 0.02 0.26 0.04 0.03 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0514
trial 32 probability distribution: 0.12 0.01 0.19 0.13 0.16 0.02 0.24 0.04 0.03 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.1307
trial 36 probability distribution: 0.15 0.02 0.17 0.08 0.17 0.02 0.28 0.04 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.1793
trial 37 probability distribution: 0.15 0.02 0.16 0.10 0.15 0.03 0.29 0.04 0.02 0.03 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0689
trial 38 probability distribution: 0.16 0.02 0.15 0.08 0.17 0.03 0.27 0.07 0.02 0.03 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0574
trial 39 probability distribution: 0.15 0.03 0.14 0.11 0.23 0.03 0.21 0.05 0.03 0.03 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0491
trial 43 probability distribution: 0.15 0.02 0.13 0.11 0.21 0.03 0.22 0.04 0.02 0.07 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0776
trial 44 probability distribution: 0.12 0.02 0.14 0.09 0.17 0.02 0.31 0.03 0.03 0.06 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0430
trial 45 probability distribution: 0.16 0.02 0.15 0.09 0.14 0.03 0.30 0.04 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0823
trial 46 probability distribution: 0.16 0.01 0.20 0.08 0.16 0.03 0.25 0.04 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0552
trial 50 probability distribution: 0.14 0.01 0.18 0.09 0.15 0.03 0.23 0.08 0.03 0.06 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0629
trial 51 probability distribution: 0.17 0.02 0.18 0.09 0.12 0.03 0.26 0.06 0.02 0.06 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0569
trial 52 probability distribution: 0.14 0.06 0.16 0.07 0.15 0.03 0.25 0.04 0.04 0.06 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0788
trial 53 probability distribution: 0.20 0.04 0.16 0.07 0.16 0.05 0.20 0.06 0.03 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0625
trial 57 probability distribution: 0.14 0.03 0.18 0.09 0.16 0.02 0.25 0.06 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0709
trial 58 probability distribution: 0.18 0.02 0.19 0.07 0.11 0.01 0.31 0.05 0.03 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0751
trial 59 probability distribution: 0.13 0.02 0.19 0.09 0.12 0.02 0.33 0.04 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0649
trial 60 probability distribution: 0.18 0.02 0.19 0.10 0.12 0.02 0.27 0.04 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0488
trial 64 probability distribution: 0.14 0.02 0.15 0.09 0.16 0.02 0.28 0.05 0.04 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0882
trial 65 probability distribution: 0.14 0.02 0.19 0.07 0.17 0.03 0.25 0.06 0.03 0.03 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0702
trial 66 probability distribution: 0.17 0.03 0.15 0.07 0.15 0.04 0.30 0.05 0.03 0.03 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.1906
trial 67 probability distribution: 0.13 0.02 0.19 0.09 0.15 0.03 0.29 0.04 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0687
trial 71 probability distribution: 0.15 0.01 0.11 0.11 0.22 0.03 0.27 0.04 0.03 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0551
trial 72 probability distribution: 0.17 0.03 0.15 0.08 0.18 0.03 0.26 0.04 0.04 0.03 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0676
trial 73 probability distribution: 0.14 0.02 0.22 0.07 0.13 0.02 0.27 0.08 0.03 0.03 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0709
trial 74 probability distribution: 0.14 0.01 0.15 0.09 0.17 0.02 0.28 0.07 0.02 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0529
trial 78 probability distribution: 0.14 0.02 0.15 0.11 0.12 0.02 0.28 0.09 0.03 0.03 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0570
trial 79 probability distribution: 0.17 0.01 0.14 0.11 0.17 0.02 0.23 0.06 0.05 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0737
trial 80 probability distribution: 0.16 0.02 0.12 0.10 0.14 0.03 0.30 0.04 0.03 0.07 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0703
trial 81 probability distribution: 0.15 0.02 0.14 0.11 0.15 0.02 0.28 0.04 0.03 0.06 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0489
trial 85 probability distribution: 0.14 0.02 0.16 0.11 0.18 0.02 0.27 0.04 0.02 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0665
trial 86 probability distribution: 0.16 0.01 0.16 0.11 0.22 0.02 0.21 0.05 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0497
trial 87 probability distribution: 0.17 0.01 0.17 0.12 0.18 0.02 0.21 0.04 0.02 0.05 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train set error: 0.0578
trial 88 probability distribution: 0.13 0.02 0.18 0.09 0.18 0.02 0.26 0.05 0.02 0.04 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
train