In [1]:
%pylab inline
import numpy as np
import pickle
import scipy
from methods import LBFGS
from oracles import LogRegL2Oracle, create_log_reg_oracle
from sklearn.datasets import load_svmlight_file
from tqdm import tqdm_notebook

Populating the interactive namespace from numpy and matplotlib


In [2]:
def solve_logreg(A, b, regcoef, dataset_name, tolerance=1e-15):
    import os
    import pickle
    os.system('mkdir -p ./logreg_solutions/{}'.format(dataset_name))
    
    oracle = create_log_reg_oracle(A, b, regcoef)
    method = LBFGS(oracle, x_0, tolerance=tolerance, stopping_criteria='grad_abs', 
                   memory_size=10, line_search_options={'method': 'Wolfe'})
    method.run(10000)

    with open('./logreg_solutions/{}/regcoef={:.2e}'\
              .format(dataset_name, regcoef), 'wb') as file:
        pickle.dump({'x_star': method.hist['x_star'], 
                     'func_star': method.hist['func'][-1]}, file)

### Gisette

In [4]:
file = './datasets/gisette_scale'
A, b = load_svmlight_file(file, n_features=None)
A = A.toarray()
x_0 = np.zeros((A.shape[1]))

In [5]:
%%time
for regcoef in tqdm_notebook([1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1./A.shape[0]]):
    solve_logreg(A, b, regcoef, 'gisette')

HBox(children=(IntProgress(value=0, max=7), HTML(value='')))


CPU times: user 23min 56s, sys: 9.1 s, total: 24min 5s
Wall time: 45.2 s


### w8a

In [6]:
file = './datasets/w8a.txt'
A, b = load_svmlight_file(file, n_features=None)
x_0 = np.zeros((A.shape[1]))

In [7]:
for regcoef in tqdm_notebook([1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1./A.shape[0]]):
    solve_logreg(A, b, regcoef, 'w8a')

HBox(children=(IntProgress(value=0, max=7), HTML(value='')))




### a9a

In [6]:
file = './datasets/a9a.txt'
A, b = load_svmlight_file(file, n_features=None)
x_0 = np.zeros((A.shape[1]))

In [7]:
for regcoef in tqdm_notebook([1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1./A.shape[0]]):
    solve_logreg(A, b, regcoef, 'a9a')

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




### covtype

In [9]:
file = './datasets/covtype.libsvm.binary.scale'
A, b = load_svmlight_file(file, n_features=None)
x_0 = np.zeros((A.shape[1]))

In [10]:
for regcoef in tqdm_notebook([1e-3, 1e-2]):
    solve_logreg(A, b, regcoef, 'covtype')

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




### real_sim

In [5]:
file = './datasets/real-sim'
A, b = load_svmlight_file(file, n_features=None)
x_0 = np.zeros((A.shape[1]))

In [6]:
for regcoef in tqdm_notebook([1e-3, 1e-4]):
    solve_logreg(A, b, regcoef, 'real_sim')

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




### colon-cancer

In [3]:
file = './datasets/colon-cancer'
A, b = load_svmlight_file(file, n_features=None)
x_0 = np.zeros((A.shape[1]))

In [18]:
for regcoef in tqdm_notebook([1e-1]):
    solve_logreg(A, b, regcoef, 'colon_cancer', tolerance=0.5*1e-16)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




In [14]:
oracle = create_log_reg_oracle(A, b, regcoef)
method = LBFGS(oracle, x_0, tolerance=0.5*1e-16, stopping_criteria='grad_abs', 
               memory_size=10, line_search_options={'method': 'Wolfe'})
method.run(1000)