In [52]:
import sys
import os
import numpy as np
sys.path.append("../")
from src.params import *
import src.utils as utils
from src.lossfunction import LogisticLoss
from src.regularizer import GL1
from src.naive.ProbGL1 import ProbGL1
from numba import jit


In [124]:
from src.Problem import Problem
@jit(nopython=True, cache=True)
def _proximal_gradient_jit(X, alpha, gradf, K, p, starts, ends, Lambda_group):
    proximal = np.zeros((p, 1))
    for i in range(K):
        start, end = starts[i], ends[i]
        XG_i = X[start:end]
        gradfG_i = gradf[start:end]
        gradient_step = XG_i - alpha * gradfG_i
        gradient_step_norm = np.sqrt(np.dot(gradient_step.T, gradient_step))[0][0]
        if gradient_step_norm != 0:
            temp = 1 - ((Lambda_group[i] * alpha) / gradient_step_norm)
        else:
            temp = -1
        sG_i = max(temp, 0) * gradient_step
        proximal[start:end] = sG_i
    return proximal

In [197]:
class ProbGL1(Problem):
    def __init__(self, f, r) -> None:
        super().__init__(f, r)
        self.K = self.r.K
        self.n, self.p = self.f.n, self.f.p
        self.starts = self.r.starts
        self.ends = self.r.ends
        self.Lambda_group = self.r.Lambda_group

    def func(self, x):
        return self.f.evaluate_function_value(x) + self.r.evaluate_function_value_jit(x)

    def gradf(self, x):
        return self.f.gradient()

    def ipg(self, xk, gradfxk, alphak, rxk, method, **kwargs):
        if method == 'sampling':
            init_epsilon = kwargs['init_epsilon']
            t = kwargs['t']
            mode = kwargs['mode']
            seed = kwargs['seed']
            xprox = self._pg(xk, gradfxk, alphak)
            x = self._ipg_sample(xprox, gradfxk, rxk, alphak, init_epsilon, t, mode, seed)
        elif method == 'algorithm':
            raise ValueError(f'{method} is not implemented.')
        else:
            raise ValueError(f'{method} is not defined.')
        return x

    def _ipg_sample(self, xprox, xk, gradfxk, alphak, init_epsilon, t=1, mode='whole', seed=None, **kwargs):
        self.seed = seed
        epsilon = init_epsilon
        ck = (np.sqrt(6 / (1 + t) * alphak) - np.sqrt(2 / alphak)) ** 2 / 4
        count = 1
        while True:
            if count > 100:
                raise utils.AlgorithmError("_ipg_sample: cannot sample a (x,y) pair satisfying the gap condition!")
            x, y = self._sample_primal_dual(xprox, xk, gradfxk, alphak, epsilon, mode)
            diff = x - xk
            gap = self._duality_gap(x, y, xk, gradfxk, alphak)
            # print(f"gap:{gap} | target:{np.dot(diff.T, diff)[0][0]} | ck:{ck}")
            if gap <= ck * (np.dot(diff.T, diff)[0][0]):
                self.count = count
                self.epsilon = epsilon
                print(count, epsilon)
                return x
            epsilon *= 0.8
            count += 1

    def _sample_primal_dual(self, xprox, xk, gradfxk, alphak, epsilon, mode='whole', **kwargs):
        if self.seed:
            np.random.seed(self.seed)
        if mode == 'whole':
            delta = np.random.randn(*xprox.shape)
            delta_norm = utils.l2_norm(delta)
            delta *= (epsilon / delta_norm)
            x = xprox + delta
        elif mode == 'blocks':
            raise utils.AlgorithmError(f"mode:{mode} is not implemented yet.")
        else:
            raise utils.AlgorithmError(f"mode:{mode} is not defined.")
        gradient_step = xk - alphak * gradfxk
        temp = (x - gradient_step) / alphak
        dual_norm = self.r.dual(temp)
        y = min(1, 1 / dual_norm) * temp
        return x, y

    def _duality_gap(self, x, y, xk, gradfxk, alphak):
        gradient_step = xk - alphak * gradfxk
        temp = x - gradient_step
        primal = np.dot(temp.T, temp)[0][0] / (2 * alphak) + self.r.evaluate_function_value_jit(x)
        dual_negative = ((alphak / 2) * (np.dot(y.T, y)) + np.dot(gradient_step.T, y))[0][0]
        return primal + dual_negative

    def _pg(self, xk, gradfxk, alphak):
        xprox = _proximal_gradient_jit(xk, alphak, gradfxk,
                                       self.K, self.p, self.starts,
                                       self.ends, self.Lambda_group)
        return xprox

In [198]:
datasetName = 'a9a'
frac = 0.4
fileType = fileTypeDict[datasetName]
print("Working on: {}...".format(datasetName))
X, y = utils.set_up_xy(datasetName, fileType, dbDir='../../db')
f = LogisticLoss(X, y, datasetName)
p = X.shape[1]
num_of_groups = max(int(p * frac), 1)
group = utils.gen_group(p, num_of_groups)
Lambda = 1
r = GL1(Lambda=Lambda, group=group)

Working on: a9a...


In [199]:
prob = ProbGL1(f, r)

In [200]:
xk = np.zeros((f.p, 1))
fxk = prob.func(xk)
gradfxk = prob.gradf(xk)
alphak = 1
rxk = 1
method='sampling';init_epsilon=0; t=1; mode='whole';seed=0
xnew = prob.ipg(xk, gradfxk, alphak, rxk, method='sampling',init_epsilon=0.1, t=1, mode='whole', seed=0)

21 0.0011529215046068484


In [162]:
xk = np.ones((f.p, 1))
fxk = prob.func(xk)
gradfxk = prob.gradf(xk)
alphak = 1
rxk = 1
prob.seed = seed
xprox = prob._pg(xk, gradfxk, alphak)

In [183]:
x, y = prob._sample_primal_dual(xprox, xk, gradfxk, alphak, epsilon=1e-5, mode='whole')
prob._duality_gap(x, y, xk, gradfxk, alphak)
np.sum(np.abs(x - xprox))

In [207]:
class Solver:
    def __init__(self, prob, params, **kwargs):
        self.prob = prob
        self.__dict__.update(params)
        self.__dict__.update(kwargs)

In [212]:
params = {'eta': 233, 'gamma':334}
solver = Solver('prob', params, warm=True)

In [213]:
solver.eta, solver.gamma, solver.prob

(233, 334, 'prob')

In [214]:
solver.__dict__

{'prob': 'prob', 'eta': 233, 'gamma': 334, 'warm': True}