In [1]:
import numpy as np
import random, time, pickle, gzip, math
from sklearn.preprocessing import normalize

### Construct 4/9 MNIST

In [2]:
f = gzip.open('data/mnist.pkl.gz', 'rb')
u = pickle._Unpickler( f )
u.encoding = 'latin1'
train_set, valid_set, test_set = u.load()

train_set_49 = train_set[0][np.logical_or(train_set[1] == 4, train_set[1] == 9)]
train_label_set_49 = train_set[1][np.logical_or(train_set[1] == 4, train_set[1] == 9)]
train_label_set_49_corrected = [int(i == 4)*2 - 1 for i in train_label_set_49]

test_set_49 = test_set[0][np.logical_or(test_set[1] == 4, test_set[1] == 9)]
test_label_set_49 = test_set[1][np.logical_or(test_set[1] == 4, test_set[1] == 9)]
test_label_set_49_corrected = [int(i == 4)*2 - 1 for i in test_label_set_49]

valid_set_49 = valid_set[0][np.logical_or(valid_set[1] == 4, valid_set[1] == 9)]
valid_label_set_49 = valid_set[1][np.logical_or(valid_set[1] == 4, valid_set[1] == 9)]
valid_label_set_49_corrected = [int(i == 4)*2 - 1 for i in valid_label_set_49]


train_set_49_whole = [train_set_49, train_label_set_49_corrected]
test_set_49_whole = [test_set_49, test_label_set_49_corrected]
valid_set_49_whole = [valid_set_49, valid_label_set_49_corrected]

f = open('data/mnist49data', 'wb')
pickle.dump([train_set_49_whole, valid_set_49_whole, test_set_49_whole], f)
f.close()

### Logistic Oracles

In [3]:
class DataHolder:
    def __init__(self, dataset = 'MNIST', lam = 1):
        self.lam = lam
        self.load_dataset(dataset)
        
    def load_dataset(self, dataset):
        if dataset == 'MNIST':
            print('-----------------------------------------------')
            print('Loading MNIST 4/9 data...')
            print('-----------------------------------------------\n')
            self.load_mnist_49()
        else:
            raise ValueError('No dataset exists by that name.')
            
        self.data_dim = self.train_set[0][0].size
        self.num_train_examples = self.train_set[0].shape[0]
        self.num_test_examples = self.test_set[0].shape[0]
        
    def load_mnist_49(self):
        f = open('data/mnist49data', 'rb')
        train_set, valid_set, test_set = pickle.load(f)
        f.close()
        self.train_set = [normalize(train_set[0], axis = 1, norm = 'l2'), train_set[1]]
        self.valid_set = [normalize(valid_set[0], axis = 1, norm = 'l2'), valid_set[1]]
        self.test_set = [normalize(test_set[0], axis = 1, norm = 'l2'), test_set[1]]
        
    def fetch_correct_datamode(self, mode = 'TRAIN'):
        if mode == 'TRAIN':
            return self.train_set
        elif mode == 'VALIDATE':
            return self.validate_set
        elif mode == 'TEST':
            return self.test_set
        else:
            raise ValueError('Wrond mode value provided.')
            
    def logistic_indiv_func(self, data_index, model, mode='TRAIN'):
        data_set = self.fetch_correct_datamode(mode)
        
        v = -1.0*data_set[1][data_index]*np.dot(model, data_set[0][data_index])
        
        return np.log(np.exp(v) + 1)
    
    def logistic_indiv_grad(self, data_index, model):
        data_set = self.train_set
        
        v = -1.0*data_set[1][data_index]*np.dot(model, data_set[0][data_index])
        
        return -1*data_set[1][data_index]*data_set[0][data_index]*(np.exp(v)/(1+np.exp(v)))
    
    def logistic_indiv_grad_coeff(self, data_index, model):
        data_set = self.train_set
        v = -1.0*data_set[1][data_index]*np.dot(model, data_set[0][data_index])
        return -1*data_set[1][data_index]*(np.exp(v)/(1 + np.exp(v)))

    def logistic_indiv_hess(self, data_index, model):
        data_set = self.train_set
        v = -1.0*data_set[1][data_index]*np.dot(model, data_set[0][data_index])
        return (data_set[1][data_index])*((math.pow(np.exp(v),0.5))/(np.exp(v)+1))*data_set[0][data_index]

    def logistic_batch_func(self, data_batch, model):
        func_val = 0.0
        for data_indiv in data_batch:
            func_val += self.logistic_indiv_func(data_indiv, model, 'TRAIN')
        avg_func_val = func_val / len(data_batch)
        return avg_func_val + self.lam*np.dot(model, model)

    def logistic_batch_grad(self, data_batch, model):
        batch_grad = np.zeros(self.data_dim)
        for data_indiv in data_batch:
            batch_grad += self.logistic_indiv_grad(data_indiv, model)
        avg_batch_grad = batch_grad / len(data_batch)
        return avg_batch_grad + 2*self.lam*model

    def logistic_batch_hess_full(self, data_batch, model):
        batch_hess = np.zeros((self.data_dim, self.data_dim))
        for data_indiv in data_batch:
            batch_hess += np.outer(self.logistic_indiv_hess(data_indiv, model), self.logistic_indiv_hess(data_indiv, model))
        avg_batch_hess = batch_hess / len(data_batch)
        return avg_batch_hess + 2*self.lam*np.identity(self.data_dim)

    def logistic_batch_hess_vec_product(self, data_batch, model, vector):
        hess_vec = np.zeros(self.data_dim)
        for data_indiv in data_batch:
            vtemp = self.logistic_indiv_hess(data_indiv, model)
            hess_vec += np.dot(vtemp, vector)*vtemp
        avg_hess_vec = hess_vec/len(data_batch)
        return avg_hess_vec + 2*self.lam*vector

    def test_error(self, model):
        func_val = 0.0
        data_batch = range(0, self.num_test_examples)
        for data_indiv in data_batch:
            func_val += self.logistic_indiv_func(data_indiv, model, 'TEST')
        avg_func_val = func_val / len(data_batch)
        return avg_func_val

    def error_01(self, model, mode='TRAIN'):
        data_set = self.fetch_correct_datamode(mode)
        num_examples = data_set[0].shape[0]
        error = 0
        for i in range(num_examples):
            error += abs(np.sign(np.dot(model, data_set[0][i])) - data_set[1][i])/2

        return error/num_examples

### LiSSA

In [10]:
def lissa_main(init_x, num_epochs, num_lissa_iter, outer_grad_size, hessian_batch_size, quad_stepsize, data_holder):
    num_examples = data_holder.num_train_examples
    print(data_holder.train)
    curr_x = init_x

    epochs = []
    wall_times = []
    trainerror = []

    num_iter = num_epochs*num_examples*1.0/(outer_grad_size + num_lissa_iter*hessian_batch_size)
    start_time = time.time()

    for curr_iter in range(int(num_iter)):
        epochs += [curr_iter*(outer_grad_size + num_lissa_iter*hessian_batch_size)/num_examples]
        wall_times += [time.time() - start_time]
        trainerror += [data_holder.logistic_batch_func(range(0, num_examples), curr_x)]
        curr_grad = data_holder.logistic_batch_grad(range(0, num_examples), curr_x)
        curr_step = np.zeros(curr_x.size)

        for lissa_iter in range(num_lissa_iter):
            rand_index = random.sample(range(num_examples), hessian_batch_size)
            sub_step = data_holder.logistic_batch_hess_vec_product(rand_index, curr_x, curr_step)
            curr_quad_step = curr_grad - sub_step
            curr_step = curr_step + quad_stepsize*curr_quad_step
        curr_x = curr_x - curr_step

    output_data = {'epochs': epochs, 'wall_times': wall_times, 'trainerror': trainerror}
    return curr_x, output_data

In [11]:
# First Order Method to get x1 (init_x)
def grad_descent(num_iter, init_x, stepsize, batch_size, data_holder):
    num_examples = data_holder.num_train_examples
    curr_x = init_x

    for curr_iter in range(num_iter):
        curr_grad = data_holder.logistic_batch_grad(random.sample(range(num_examples), batch_size), curr_x)
        curr_x = curr_x - stepsize*curr_grad

    return curr_x

### Train

In [13]:
data_holder = DataHolder(lam=1e-4, dataset = 'MNIST')
num_examples = data_holder.num_train_examples

num_epochs = 25
num_lissa_iter = num_examples
outer_grad_size = num_examples
hessian_batch_size = 1
stepsize = 1

gd_init_x = np.zeros(data_holder.data_dim)
gd_iter = 5
gd_stepsize = 5.0
init_x = grad_descent(gd_iter, gd_init_x, gd_stepsize, num_examples, data_holder)
print(init_x.shape)
print ('-----------------------------------------------')
print ('Training model...')
print ('-----------------------------------------------\n')

f = open('output/LiSSAOutputMNIST','wb')
result, output_data = lissa_main(init_x, num_epochs, num_lissa_iter, outer_grad_size, hessian_batch_size, stepsize, data_holder)
pickle.dump(output_data, f)
f.close()
print ('-----------------------------------------------')
print ('Training complete')
print ('-----------------------------------------------')

-----------------------------------------------
Loading MNIST 4/9 data...
-----------------------------------------------

(784,)
-----------------------------------------------
Training model...
-----------------------------------------------



AttributeError: 'DataHolder' object has no attribute 'train'

In [7]:
print(result)
print(output_data)

[ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000