In [1]:
import numpy as np
import cvxpy as cp

Projected Newton method uses:

$$
w^{k + 1/2} = w^k - \alpha_k H_k^{-1} \nabla \mathcal{L}(\theta)
$$

then updates the projection under the Hessian metric

$$
w^{k+1} = \arg\min_{v \in \mathcal{C}} \lVert v - w^{k + 1/2} \rVert_{H_k}.
$$

Note that for our case $\mathcal{C} = \{ \theta \in \mathbb{R}^d : \theta^\top X^\top X \theta \leq 1/4 \}$. Thus the above constrainted optimization problem is a QCQP. 

In [67]:
class projected_Newton(object):
    def __init__(self, features, obs, d, theta):
        self.features = features
        self.obs = obs
        self.d = d
        self.theta = theta
        self.n = len(self.obs)
    
    
    def get_gradient(self):
        grad = np.zeros(self.d)
        self.inner = np.dot(self.features,self.theta)
        self.mu = np.zeros((self.n,self.d))
        self.mu[:,0] = 4 * (self.inner - self.obs) / (4 * self.inner ** 2 - 1)
        for i in range(1,self.d):
            self.mu[:,i] = self.mu[:,0]
        self.mult = self.mu * self.features.copy()
#        for i in range(self.n):
#            grad += self.mult[i]
        grad = np.sum(self.mult,axis=0)
        return grad
    
    def get_hessian(self):
        inner = np.dot(self.features,self.theta)
        dot_mu_root = np.sqrt((4 * (inner ** 2 - 8 * self.obs * inner + 1)) / (1 - 4 * inner ** 2))
        X = self.features
        for i in range(self.d):
            X[:,i] = np.multiply(X[:,i],dot_mu_root)
        hess = np.einsum('ij,ik->jk', X, X)
        return -1.0 * hess

In [74]:
d = 2
X = np.random.normal(size=(10,2))
obs = np.zeros(10) - 1/2
theta = np.zeros(d)
reg = projected_Newton(X,obs,d,theta)

In [75]:
hess = np.zeros((d,d))
grad = np.zeros(d)
print(X)
for i in range(10):
    x,y = X[i], obs[i]
    inner = np.inner(x,theta)
    mu = 4 * (inner - y) / (4 * inner ** 2 - 1)
    dot_mu = - (4 * (inner ** 2 - 8 * y * inner + 1)) / (1 - 4 * inner ** 2)
    grad = grad + mu * x
    hess = hess + dot_mu * np.outer(x,x)

[[-0.97333405  1.6612397 ]
 [-1.14870505 -0.63096407]
 [ 0.12672883 -1.85037336]
 [-0.08713633  0.5583663 ]
 [-1.67614951  0.87828572]
 [-0.21134754 -0.03692729]
 [ 1.12868887  0.082899  ]
 [-0.7542952   0.01207813]
 [-0.57491847  1.2604819 ]
 [-1.32886731 -0.71974798]]


In [76]:
grad

array([10.99867151, -2.43067609])

In [77]:
reg.get_gradient()

array([10.99867151, -2.43067609])

In [78]:
reg.get_hessian()

array([[-36.336079  ,   9.29360376],
       [  9.29360376, -39.1204273 ]])

In [79]:
hess

array([[-36.336079  ,   9.29360376],
       [  9.29360376, -39.1204273 ]])

In [80]:
X

array([[-1.94666811,  3.32247939],
       [-2.2974101 , -1.26192815],
       [ 0.25345766, -3.70074672],
       [-0.17427266,  1.1167326 ],
       [-3.35229902,  1.75657144],
       [-0.42269507, -0.07385458],
       [ 2.25737774,  0.16579799],
       [-1.50859039,  0.02415627],
       [-1.14983693,  2.5209638 ],
       [-2.65773463, -1.43949595]])