In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.svm import LinearSVC
import numpy as np
from linear_models import LinearOneVsAllClassifier, LinearBinaryClassifier
import os
from utils import generate_exp_data, subset_multiclass_data, find_noise_bounds_binary, find_noise_bounds_multi

In [3]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [None]:
mnist_train_images = np.copy(mnist.train.images)
mnist_train_labels = np.argmax(mnist.train.labels, axis=1)
mnist_test_images = mnist.test.images
mnist_test_labels = np.argmax(mnist.test.labels, axis=1)

# Setup Multiclass Experiments

In [None]:
label_dict = {0:0, 1:1, 2:2, 3:3}
multi_train_images, multi_train_labels = subset_multiclass_data(mnist_train_images, mnist_train_labels, label_dict)
multi_test_images, multi_test_labels = subset_multiclass_data(mnist_test_images, mnist_test_labels, label_dict)

In [None]:
num_models = 5
num_classes = 4
mnist_num_dim = 28 * 28
sparse_features_perc = .75
multi_models = []
zeroed_features_list = []
for i in xrange(num_models):
    sparse_multi_images = np.copy(multi_train_images)
    zeroed_features = np.random.randint(0, mnist_num_dim, int(sparse_features_perc * mnist_num_dim))
    zeroed_features_list.append(zeroed_features)
    sparse_multi_images[:, zeroed_features] = 0.0
    model = LinearSVC(loss='hinge')
    model.fit(sparse_multi_images, multi_train_labels)
    model = LinearOneVsAllClassifier(num_classes, model.coef_, model.intercept_)
    multi_models.append(model)

In [None]:
[model.evaluate(multi_test_images, multi_test_labels) for model in multi_models]

In [None]:
# check that the models have no nonzero weights in any of the zeroed out dimensions
[sum(multi_models[i].weights[:,zeroed_features_list[i]].reshape(-1,)) for i in xrange(len(multi_models))]

In [None]:
os.mkdir('linear_models/multi')

In [None]:
for i, model in enumerate(multi_models):
    np.save('linear_models/multi/w_{}.npy'.format(i), model.weights)
    np.save('linear_models/multi/b_{}.npy'.format(i), model.bias)

In [None]:
X_exp_multi, Y_exp_multi = generate_exp_data(1000, multi_test_images, multi_test_labels, multi_models)

In [5]:
[model.evaluate(X_exp_multi, Y_exp_multi) for model in multi_models]

[1.0, 1.0, 1.0, 1.0, 1.0]

In [None]:
os.mkdir('linear_experiments_data/multi')

In [None]:
np.save('linear_experiments_data/multi/X_exp.npy', X_exp_multi)
np.save('linear_experiments_data/multi/Y_exp.npy', Y_exp_multi)

In [4]:
X_exp_multi = np.load('linear_experiments_data/multi/X_exp.npy')
Y_exp_multi = np.load('linear_experiments_data/multi/Y_exp.npy')
num_models = 5
num_classes = 4
multi_models = []
for i in xrange(num_models):
    c = np.load('linear_models/multi/w_{}.npy'.format(i))
    b = np.load('linear_models/multi/b_{}.npy'.format(i))
    model = LinearOneVsAllClassifier(num_classes, c, b)
    multi_models.append(model)

In [6]:
min_bounds = find_noise_bounds_multi(multi_models, X_exp_multi)

KeyboardInterrupt: 

# Setup Binary Experiments

In [None]:
binary_train_images, binary_train_labels = subset_multiclass_data(mnist_train_images, mnist_train_labels, {4: -1, 9:1})
binary_test_images, binary_test_labels = subset_multiclass_data(mnist_test_images, mnist_test_labels, {4: -1, 9:1})

In [None]:
binary_test_images.shape

In [None]:
binary_models = []
num_models = 5
mnist_num_dim = 28 * 28
sparse_features_perc
linear_models = []
zeroed_features_list = []
for i in xrange(num_models):
    sparse_binary_images = np.copy(binary_train_images)
    zeroed_features = np.random.randint(0, mnist_num_dim, int(sparse_features_perc * mnist_num_dim))
    zeroed_features_list.append(zeroed_features)
    sparse_binary_images[:, zeroed_features] = 0.0
    model = LinearSVC(loss='hinge')    
    model.fit(sparse_binary_images, binary_train_labels)
    model = LinearBinaryClassifier(model.coef_.T, model.intercept_)
    binary_models.append(model)

In [None]:
[model.evaluate(binary_test_images, binary_test_labels) for model in binary_models]

In [None]:
[sum(binary_models[i].weights[zeroed_features_list[i]].reshape(-1,)) for i in xrange(len(binary_models))]

In [None]:
os.mkdir('linear_models/binary')

In [None]:
for i, model in enumerate(binary_models):
    np.save('linear_models/binary/w_{}.npy'.format(i), model.weights)
    np.save('linear_models/binary/b_{}.npy'.format(i), model.bias)

In [None]:
X_exp_binary, Y_exp_binary = generate_exp_data(1000, binary_test_images, binary_test_labels, binary_models)

In [None]:
[model.evaluate(X_exp_binary, Y_exp_binary) for model in binary_models]

In [None]:
print X_exp_binary.shape
print Y_exp_binary.shape

In [None]:
os.mkdir('linear_experiments_data/binary')
np.save('linear_experiments_data/binary/X_exp.npy', X_exp_binary)
np.save('linear_experiments_data/binary/Y_exp.npy', Y_exp_binary)

In [None]:
min_bounds, max_bounds = find_noise_bounds_binary(binary_models, X_exp_binary, Y_exp_binary)

In [None]:
print np.median(min_bounds), np.median(max_bounds)