In [1]:
%load_ext autoreload
%autoreload 2
from __future__ import division
from __future__ import print_function

import sys, os, gc, math
import numpy as np
from scipy.fftpack import dct,idct
from keras.datasets import mnist, fashion_mnist
from PIL import Image


sys.path.append('../')

from models.util import *


#Seed used for choosing classes, training points, and test points.
#SEED = 14
SEED=11

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
num_samples = 500
sqrt_n = 28
input_shape=(sqrt_n,sqrt_n,1)
n = sqrt_n*sqrt_n
k = 6
c=2.0

In [3]:
#Load MNIST data
(X_train, _), (X_test, _) = fashion_mnist.load_data()
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

m_data = np.concatenate((X_train,X_test))

In [4]:
#Normalize the data
m_data = m_data/255.0

In [5]:
#Check MNIST results for 1000 random images - BP
#n=40000, k=40, t <= 16/41 * n / k / 6 ~ 65.03
t = int(16*n/41.0/4.0/k/c)
subset_idx = np.random.choice(np.arange(m_data.shape[0]),num_samples)
m_data_sub_bp = m_data[subset_idx]
m_data_y_bp = np.zeros((num_samples,sqrt_n,sqrt_n))
t_values_bp = np.zeros(num_samples)
for i in range(num_samples):
    #first sample an element from the data
    x = m_data_sub_bp[i,:,:,0].flatten()
    #Now sample a t - must be atleast 1
    t_l = np.random.randint(1,t)
    t_values_bp[i] = t_l
    #Now samnple the an index set from [n] with cardinality = t_l
    s = np.random.choice(np.arange(n),t_l)
    e = np.zeros(n)
    #Now create the vector e
    #pick a value for each element between 0 and 1 as the images are normalized
    for j in range(t_l):
        e[s[j]] = np.random.uniform()
    y = x + e
    m_data_y_bp[i,:,:] = y.reshape((sqrt_n,sqrt_n))  

In [6]:
#Form the matrix F
F = get_matrix(n,tf='dct')

In [None]:
%%capture three
#BP MNIST
errors_l2_bp = np.zeros(m_data_y_bp.shape[0])
errors_l1_bp = np.zeros(m_data_y_bp.shape[0])
bot_l2_bp = np.zeros(m_data_y_bp.shape[0])
tau_bp = np.zeros(m_data_y_bp.shape[0])
diff_l2_bp = np.zeros(m_data_y_bp.shape[0])
diff_l1_bp = np.zeros(m_data_y_bp.shape[0])

for i in range(num_samples):
    y = m_data_y_bp[i,:,:].flatten()
    x = m_data_sub_bp[i,:,:].flatten()
    
    #Get actual top k and bottom k (we use the faster transform which may introduce some error)
    x_hat_top_k, x_hat_bot_k =  get_top_bot_k_vec(dct(x, norm='ortho'),k=k)    
    e = y - x 
    bot_l2_bp[i] = np.linalg.norm(x_hat_bot_k)
    eta = bot_l2_bp[i]
    I = np.identity(n)
    A = np.concatenate((F.T,I), axis=1)
    x_hat_approx = socp(y,A,n=2*n,eta=eta)[:n]
    x_hat_approx_top_k = get_topk_vec(x_hat_approx,k=k)

    
    #Get rho and tau
    delta_2 = np.sqrt(4.0*c*k*t_values_bp[i]/float(n))
    rho = delta_2/(np.sqrt(1 - delta_2**2) - delta_2/4.0)

    tau = np.sqrt(1 + delta_2)/(np.sqrt(1 - delta_2**2) - delta_2/4.0)
    tau_bp[i] = tau
    
    
    #Note the errors
    errors_l2_bp[i] = np.linalg.norm(x_hat_top_k.flatten()- x_hat_approx.flatten())
    errors_l1_bp[i] = np.linalg.norm(x_hat_top_k.flatten()- x_hat_approx.flatten(), ord=1)




    #Get the multiplicative constant
    c_l2 = ( tau* (3 + rho*(2*np.sqrt(k+t_values_bp[i]) -1) ))/(1-rho)
    c_l1 = (2*tau*np.sqrt(k + t_values_bp[i]))/(1-rho)
    
    
    #Calculate the difference from the upper bound
    diff_l2_bp[i] = (c_l2*bot_l2_bp[i]) - errors_l2_bp[i] 
    diff_l1_bp[i] = (c_l1*bot_l2_bp[i]) - errors_l1_bp[i]
    if diff_l2_bp[i] < 0:
        print (diff_l2_bp[i])
    if diff_l1_bp[i] < 0:
        print (diff_l1_bp[i])

In [13]:
print(np.mean(t_values_bp), 
      np.mean(tau_bp), 
      np.mean(errors_l2_bp), 
      np.mean(diff_l2_bp), 
      np.mean(errors_l1_bp), 
      np.mean(diff_l1_bp))

2.964 1.501730171459867 5.579348501136094 207.57804197768274 14.090147039922611 193.75618782133768


In [None]:
mnist_tup_bp = (m_data_y_bp, m_data_sub_bp, t_values_bp,tau_bp,errors_l2_bp, errors_l1_bp, diff_l2_bp, diff_l1_bp )

import pickle
with open('data/fmnist_theory_socp_l0.pickle', 'wb') as f:
    pickle.dump(mnist_tup_bp, f)
