In [1]:
import numpy as np
import tensorflow as tf
from keras.datasets import mnist, fashion_mnist
import time

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def get_save_path(i):
    return '/tmp/boostmodels' + str(i)

def get_scope_variable(scope, var, shape=None, initializer=None):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        v = tf.get_variable(var, shape=shape, initializer=initializer)
    return v

In [None]:
def convert_binary(y):
    y_onehot = -np.ones((y.shape[0], 10))
    y_onehot[np.arange(y.shape[0]), y] = 1
    return y_onehot

In [3]:
class OneVsAllMulticlassClassifier(object):
    
    def get_vars(i):
        initializer = tf.contrib.layers.xavier_initializer(uniform=False)
        W1 = get_scope_variable(scope=scope, var="W1_" + str(i), shape=[784, num_features], initializer=initializer)
        W2 = get_scope_variable(scope=scope, var="W2_" + str(i), shape=[num_features, num_classes], initializer=initializer)
        b1 = get_scope_variable(scope=scope, var="b1_" + str(i), shape=[num_features], initializer=initializer)
        b2 = get_scope_variable(scope=scope, var="b2_" + str(i), shape=[num_classes], initializer=initializer)
        return W1, W2, b1, b2
    
    def bn_fc_relu_fc(w1, w2, b1, b2, x):
        return tf.matmul(tf.nn.relu(tf.matmul(tf.layers.batch_normalization(x), w1) + b1), w2) + b2
    
    def loss_fn(y, scores, reg, W1, W2):
        return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=y, logits=scores)) + reg * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2))
    
    def optimize(i):
        with tf.control_dependencies(update_ops):
            self.optimizer.minimize(self.loss[i])
    
    def __init__(self, x, y, num_features, num_classes, lr, reg, scope=""):
        """ init the model with hyper-parameters etc """
        self.x = x
        self.y = y
        self.num_features = num_features
        self.num_classes = num_classes
        
        self.params = []
        self.scores = []
        self.losses = []
        self.predictions = []
        self.acc = []
        self.incorrect = []
        for i in range(10):
            W1, W2, b1, b2 = get_vars(i)
            self.params.append(get_vars(i))
            self.scores.append(bn_fc_relu_fc(W1, W2, b1, b2, self.x))
            self.losses.append(loss_fn(self.y[i], scores[i], reg, W1, W2))
            
            self.predictions.append(tf.argmax(scores[i], axis=1))
            self.acc.append(tf.reduce_mean(tf.cast(tf.equal(self.predictions[i], self.y[i]), tf.float32)))
            self.incorrect.append(tf.not_equal(self.predictions[i], self.y[i]))
            self.metrics.append((self.losses[i], self.acc[i]))
            
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        self.optimizer = tf.train.AdamOptimizer(lr)

In [None]:
def train_classifiers(data, iterations):
    
    (x_train, y_train), (x_test, y_test) = data
    
    probs = np.ones(60000, 10) / (60000 * 10)
    alpha = np.zeros(iterations)    

    for i in np.arange(iterations):
        
        tf.reset_default_graph()
        
        print("trial", i, "probability distribution:\n", "%0.2f "*10 % tuple(np.bincount(y_train, weights=probs)))
        x = tf.placeholder(tf.float32, shape=[None, 784])
        y = tf.placeholder(tf.int64, shape=[None, 10])
        c = OneVsAllMulticlassClassifier(x=x, y=y, num_features=128, num_classes=10, lr=0.001, reg=0.005, scope="boosting")
        s = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "boosting"))
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            print("begin training classifier")
            train_time = time.time()
            for step in range(2001):
                indices = np.random.choice(60000 * 10, 1000, p=probs)
                x_ = x_train[(indices/10).astype(int)]
                y_ = y_train[(indices/10).astype(int)]
                y_in = np.remainder(indices,10)
                sess.run(c.optimize, feed_dict={x:x_, y:y_, y_in:y_in_})
                if step % 1000 == 0:
                    train_loss, train_acc = sess.run([c.loss, c.acc], feed_dict={x:x_, y:y_, y_in:y_in_})
                    print("\tepoch %d: train loss %g, train error %g"%(step/60, train_loss, 1 - train_acc))  
            train_time = time.time() - train_time
            print("end training classifier // time elapsed: %.4f s"%(train_time))

            eval_train_time = time.time()
            incorrect = sess.run(c.incorrect, feed_dict={x:x_train, y:y_train})
            correct = incorrect * 2 - 1
            train_error = np.sum(probs[incorrect.astype(bool)])
            eval_train_time = time.time() - eval_train_time
            print("train set error: %.4f // time elapsed: %.4f s"%(train_error, eval_train_time))   

            if train_error < 0.5:
                s.save(sess=sess, save_path=get_save_path(i))
            else:
                return (i-1), alpha

            eval_test_time = time.time()
            test_error = 1 - sess.run(c.acc, feed_dict={x:x_test, y:y_test})
            eval_test_time = time.time() - eval_test_time
            print("test set error: %.4f // time elapsed: %.4f s"%(test_error, eval_test_time)) 

            alpha[i] = 0.5 * np.log((1 - train_error)/train_error)
            probs = probs * np.exp(alpha[i]*correct)
            probs /= np.sum(probs)
            
    return iterations, alpha

In [None]:
def eval_classifiers(data, iterations, alpha): 
    
    (x_train, y_train), (x_test, y_test) = data
    scores = np.zeros((10000, 10))
    
    for i in np.arange(iterations):
        
        tf.reset_default_graph()

        x = tf.placeholder(tf.float32, shape=[None, 784])
        y = tf.placeholder(tf.int64, shape=[None])
        c = MulticlassClassifier(x=x, y=y, num_features=128, num_classes=10, lr=0.001, reg=0.001, scope="boosting")

        s = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "boosting"))
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            s.restore(sess, get_save_path(i))
            predictions = sess.run(c.predictions, feed_dict={x:x_test, y:y_test})
            scores[np.arange(10000), predictions] += alpha[i]
        
    return scores

In [None]:
def boost(data, iterations):
    iterations, alpha = train_classifiers(data, iterations)
    scores = eval_classifiers(data, iterations, alpha)
    print("iterations:", iterations, "| boosted accuracy:", np.mean(np.equal(np.argmax(scores, axis=1), y_test)))

# Keras Implementation

In [1]:
import numpy as np
import keras as keras
from keras import backend as K
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten
from keras import regularizers
from keras.datasets import mnist, fashion_mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
def get_binary_classifier():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=3, strides=1, activation='relu', input_shape=(28, 28, 1)))
    model.add(Conv2D(64, kernel_size=3, strides=1, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Conv2D(64, kernel_size=3, strides=1, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.02)))
    model.add(Dense(10, activation='relu', kernel_regularizer=regularizers.l2(0.02)))
    model.add(Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.01)))
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

def get_learners(num_classes):
    models = []
    for _ in range(num_classes):
        models.append(get_binary_classifier())
    return models
              
def del_models(models):
    for i in range(len(models)):
        del models[0]

In [None]:
models = get_learners(10)
probs = np.ones(60000*10) / (60000*10)
num_iterations = 20
alpha = np.zeros(num_iterations)
H = np.zeros((10000, 10)) # for testing

for round in range(20):
    print("\nEPOCH:", round, "PROBS:", "%0.2f "*10 % tuple(np.sum(probs.reshape((60000,10)), axis=0)))

    # sample from distribution
    indices = np.random.choice(60000*10, 600000, p=probs)
    
    # parameters needed to update distribution
    h = np.zeros((60000,10))
    
    # train models
    error = 0
    for i in range(10):
        idx = np.ndarray.flatten(np.array(np.where(indices % 10 == i)))
        examples = (idx/10).astype(int)
        x = x_train[examples]
        y = (y_train == i)[examples]
        models[i].fit(x, y, epochs=5, batch_size=128, verbose=0)
        test_loss, test_err = models[i].evaluate(x_test, (y_test == i), batch_size=128, verbose=0)        
        
        # calculate error
        output = models[i].predict(x_train, batch_size=128).reshape(60000)
        h[:,i] = output-0.5
        incorrect = ((h[:,i] > 0) != (y_train == 0))
        train_err = np.sum(probs.reshape((60000,10))[incorrect.reshape(60000), 0])         
        error += train_err
        
        print("class %d: train err %0.2f, test err %0.2f"%(i, train_err, test_err))
    
    # update distribution   
    print("weighted error:", error)
    alpha[round] = 0.5 * np.log((1-error)/error)
    
    Y = -np.ones((60000,10))
    Y[np.arange(60000), y_train] = 1
    Y = np.ndarray.flatten(Y)
        
    probs = probs * np.exp(alpha[round] * Y * h.reshape(600000))
    probs /= np.sum(probs)
    
    # test    
    for i in range(10):
        output = models[i].predict(x_test, batch_size=128).reshape(10000)
        H[:,i] += alpha[round] * output-0.5
    print("test error:", np.mean(np.argmax(H, axis=1) != y_test))


EPOCH: 0 PROBS: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
class 0: train err 0.00, test err 1.00
class 1: train err 0.02, test err 1.00
class 2: train err 0.02, test err 1.00
class 3: train err 0.02, test err 1.00
class 4: train err 0.02, test err 1.00
class 5: train err 0.02, test err 1.00
class 6: train err 0.02, test err 1.00
class 7: train err 0.02, test err 1.00
class 8: train err 0.02, test err 1.00
class 9: train err 0.02, test err 0.99
weighted error: 0.17829166666666663
test error: 0.0116

EPOCH: 1 PROBS: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
class 0: train err 0.00, test err 1.00
class 1: train err 0.02, test err 1.00
class 2: train err 0.02, test err 1.00
class 3: train err 0.02, test err 1.00
class 4: train err 0.02, test err 1.00
class 5: train err 0.02, test err 1.00
class 6: train err 0.02, test err 1.00
class 7: train err 0.02, test err 1.00
class 8: train err 0.02, test err 1.00
class 9: train err 0.02, test err 1.00
weighted error: 0.17839278562

class 0: train err 0.00, test err 1.00
class 1: train err 0.02, test err 1.00
class 2: train err 0.02, test err 1.00
class 3: train err 0.02, test err 1.00
class 4: train err 0.02, test err 1.00
class 5: train err 0.02, test err 1.00
class 6: train err 0.02, test err 1.00
class 7: train err 0.02, test err 1.00
class 8: train err 0.02, test err 1.00
class 9: train err 0.02, test err 1.00
weighted error: 0.17738361375126369
test error: 0.0056

EPOCH: 17 PROBS: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
class 0: train err 0.00, test err 1.00
class 1: train err 0.02, test err 1.00
class 2: train err 0.02, test err 1.00
class 3: train err 0.02, test err 1.00
class 4: train err 0.02, test err 1.00
class 5: train err 0.02, test err 1.00
class 6: train err 0.02, test err 1.00


In [14]:
def boost(data, num_iterations):
    (x_train, y_train), (x_test, y_test) = data
    num_train = y_train.shape[0]
    num_test = y_test.shape[0]
    num_classes = len(np.unique(y_train))

    models = get_learners(num_classes)
    probs = np.ones(num_train*num_classes) / (num_train*num_classes)

    # for testing
    alpha = np.zeros(num_iterations) 
    H = np.zeros((num_test, num_classes)) 

    for round in range(num_iterations):
        print("\nEPOCH:", round, "PROBS:", "%0.2f "*10 % tuple(np.sum(probs.reshape((num_train, num_classes)), axis=0)))

        # sample from distribution
        indices = np.random.choice(num_train*num_classes, num_train*num_classes, p=probs)

        # parameters needed to update distribution
        h = np.zeros((num_train, num_classes))

        # train models
        error = 0
        for i in range(num_classes):        
            idx = np.ndarray.flatten(np.array(np.where(indices % num_classes == i)))
            examples = (idx/num_classes).astype(int)
            x = x_train[examples]
            y = (y_train == i)[examples]        
            models[i].fit(x, y, epochs=5, batch_size=128, verbose=0)
            test_loss, test_acc = models[i].evaluate(x_test, (y_test == i), batch_size=128, verbose=0)
            test_err = 1 - test_acc
        
            # calculate error
            output = models[i].predict(x_train, batch_size=128).reshape(num_train)
            h[:,i] = output-0.5
            incorrect = ((h[:,i] > 0) != (y_train == 0))
            train_err = np.sum(probs.reshape((num_train,num_classes))[incorrect.reshape(num_train), 0])         
            error += train_err

            print("class %d: train err %0.2f, test err %0.2f"%(i, train_err, test_err))
 
        # update distribution   
        print("weighted error:", error)
        alpha[round] = 0.5 * np.log((1-error)/error)
    
        Y = -np.ones((num_train, num_classes))
        Y[np.arange(num_train), y_train] = 1
        Y = np.ndarray.flatten(Y)
        
        probs = probs * np.exp(alpha[round] * Y * h.reshape(num_train*num_classes))
        probs /= np.sum(probs)
    
        # test    
        for i in range(num_classes):
            output = models[i].predict(x_test, batch_size=128).reshape(num_test)
            H[:,i] += alpha[round] * output-0.5
        print("test error:", np.mean(np.argmax(H, axis=1) != y_test))

# MNIST Results

In [8]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))
data = (x_train, y_train), (x_test, y_test)

In [9]:
boost(data, 10)


EPOCH: 0 PROBS: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
class 0: train err 0.00, test err 0.00
class 1: train err 0.02, test err 0.00
class 2: train err 0.02, test err 0.01
class 3: train err 0.02, test err 0.00
class 4: train err 0.02, test err 0.00
class 5: train err 0.02, test err 0.00
class 6: train err 0.02, test err 0.00
class 7: train err 0.02, test err 0.00
class 8: train err 0.02, test err 0.00
class 9: train err 0.02, test err 0.01
weighted error: 0.17827833333333326
test error: 0.0132

EPOCH: 1 PROBS: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
class 0: train err 0.00, test err 0.00
class 1: train err 0.02, test err 0.00
class 2: train err 0.02, test err 0.00
class 3: train err 0.02, test err 0.00
class 4: train err 0.02, test err 0.00
class 5: train err 0.02, test err 0.00
class 6: train err 0.02, test err 0.00
class 7: train err 0.02, test err 0.00
class 8: train err 0.02, test err 0.00
class 9: train err 0.02, test err 0.00
weighted error: 0.17909052096

# Fashion MNIST Results

In [12]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = x_train.reshape((-1, 28, 28, 1))
x_test = x_test.reshape((-1, 28, 28, 1))
data = (x_train, y_train), (x_test, y_test)

In [15]:
boost(data, 10)


EPOCH: 0 PROBS: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
class 0: train err 0.00, test err 0.03
class 1: train err 0.02, test err 0.00
class 2: train err 0.02, test err 0.03
class 3: train err 0.02, test err 0.02
class 4: train err 0.02, test err 0.04
class 5: train err 0.02, test err 0.01
class 6: train err 0.02, test err 0.06
class 7: train err 0.02, test err 0.01
class 8: train err 0.02, test err 0.01
class 9: train err 0.02, test err 0.01
weighted error: 0.17796499999999998
test error: 0.1005

EPOCH: 1 PROBS: 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 0.10 
class 0: train err 0.00, test err 0.03
class 1: train err 0.02, test err 0.00
class 2: train err 0.02, test err 0.03
class 3: train err 0.02, test err 0.02
class 4: train err 0.02, test err 0.03
class 5: train err 0.02, test err 0.01
class 6: train err 0.02, test err 0.05
class 7: train err 0.02, test err 0.01
class 8: train err 0.02, test err 0.00
class 9: train err 0.02, test err 0.01
weighted error: 0.16744201677