In [1]:
import numpy as np
import numpy.linalg as npla
import scipy
import scipy.linalg as sla
import pickle
import os
from sklearn.datasets import load_svmlight_file

from methods import GradientDescent, Newton, Nesterov, RBFGS, LBFGS, BFGS
from methods.rbfgs import Uniform, Gaussian, CustomDiscrete, ConstantDistribution, \
                          MatrixDistribution
from oracles import LogRegL2Oracle, create_log_reg_oracle, QuadraticOracle
from utils import select_basis_columns, run_rbfgs_experiment, run_nesterov_experiment, run_all_methods

import scipy.sparse.linalg as spla
import scipy.sparse as sps

In [2]:
file = './datasets/w8a.txt'
A, b = load_svmlight_file(file, n_features=None)
print('(n, d) = {}'.format(A.shape))
print('density = {}'.format(A.count_nonzero() / (A.shape[0] * A.shape[1])))
oracle = create_log_reg_oracle(A, b, regcoef=1e-3)

(n, d) = (49749, 300)
density = 0.03883401341400497


In [3]:
with open('./results/w8a/regcoef=1.00e-03/svd.pkl', 'rb') as file:
    U, sigma_diag, V = pickle.load(file)
print('Smoothness based on singular values: {}'.format(sigma_diag.max()**2 / (4. * A.shape[0])))
print('Smoothness based on row norms: {}'.format(np.max(npla.norm(A.toarray(), axis=1))**2 / 4.))

Smoothness based on singular values: 0.6611993844944808
Smoothness based on row norms: 28.5


In [4]:
sketch_sizes = [1, 2, 5, 10, 50, 100, 200]
def experiment_w8a(regcoef):
    oracle = create_log_reg_oracle(A, b, regcoef)
    run_all_methods(
        oracle=oracle, 
        sketch_sizes=sketch_sizes, 
        max_iter=1000, 
        mat=A.toarray(), 
        output_folder='./results/w8a/regcoef={:.2e}'.format(regcoef), 
        sigma_tolerance=1e-8, 
        method_tolerance=1e-15, 
        stopping_criteria='grad_abs', 
        random_state=0, 
    )

In [5]:
%%time
experiment_w8a(1e-5)

Read SVD from ./results/w8a/regcoef=1.00e-05/svd.pkl
Singular values above tolerance: 266

RBFGS-SVD sketch... Done
RBFGS-SVD sketch no sigma... Done
RBFGS-gauss... Done
RBFGS-coord...Done
BFGS... Done
Nesterov...Done

All runs completed.
CPU times: user 115 ms, sys: 171 ms, total: 287 ms
Wall time: 293 ms


In [10]:
%%time
experiment_w8a(1e-4)

Read SVD from ./results/w8a/regcoef=1.00e-04/svd.pkl
Singular values above tolerance: 266

BFGS... Done

All runs completed.
CPU times: user 30.5 ms, sys: 249 ms, total: 280 ms
Wall time: 290 ms


In [7]:
%%time
experiment_w8a(1e-3)

Read SVD from ./results/w8a/regcoef=1.00e-03/svd.pkl
Singular values above tolerance: 266

RBFGS-SVD sketch... Done
RBFGS-SVD sketch no sigma... Done
RBFGS-gauss... Done
RBFGS-coord...Done
BFGS... Done
Nesterov...

TypeError: __init__() got an unexpected keyword argument 'stepsize'

In [None]:
%%time
experiment_w8a(1e-2)

In [None]:
%%time
experiment_w8a(1./A.shape[0])

### Nesterov

In [2]:
file = './datasets/w8a.txt'
A, b = load_svmlight_file(file, n_features=None)
regcoef = 1e-3
print('(n, d) = {}'.format(A.shape))
print('density = {}'.format(A.count_nonzero() / (A.shape[0] * A.shape[1])))
oracle = create_log_reg_oracle(A, b, regcoef=regcoef)

(n, d) = (49749, 300)
density = 0.03883401341400497


In [3]:
np.random.seed(42)
x_0 = np.random.normal(size=A.shape[1])
output_folder = './results/w8a/regcoef=1.00e-03'
method = Nesterov(oracle, x_0, stopping_criteria='grad_abs', 
                  tolerance=1e-16)
method.run(max_iter=50000, max_time=30)
method.oracle = None
with open('{}/nesterov.pkl'.format(output_folder), 'wb') as file:
    pickle.dump(method, file)