In [2]:
# Package Imports 

import numpy as np
from tqdm import tqdm
from numba import njit

In [14]:
# Gisette Dataset Pre-Processing

def preprocessing_y(filename):
    data = []
    with open(filename) as file:
        for row in file.readlines():
            data.append((row.strip()).split(" "))
    data = np.array(data).astype(int)
    data = np.squeeze(data, axis=1)
    return data

def preprocessing_x(filename):
    data = []
    with open(filename) as file:
        for row in file.readlines():
            data.append((row.strip()).split(" "))
    data = np.array(data).astype(int)
    return data

# Here the shape of x_train is (5000, 6000) = (dimension, number of examples)

x_train = preprocessing_x("gisette_train.data")
y_train = preprocessing_y("gisette_train.labels")


In [25]:
# In this section, we define the functions used in the optimization part 
    
def loss(theta, lbda):
    bAx = y_train * np.dot(x_train, theta)
    return np.mean(np.log(1. + np.exp(- bAx))) + lbda * np.linalg.norm(theta) ** 2 / 2.

def grad(theta, lbda, n):
    bAx = y_train * np.dot(x_train, theta)
    temp = 1. / (1. + np.exp(bAx))
    grad = - np.dot(x_train.T, y_train * temp) / n + lbda * theta
    return grad

def grad_i(i, theta, lbda):
    grad = - x_train[i] * y_train[i] / (1. + np.exp(y_train[i] * np.dot(x_train[i], theta)))
    grad += lbda * theta
    return grad

def lipschitz_constant(lbda):
    return np.linalg.norm(x_train, ord=2) ** 2 / (4. * n) + lbda

In [26]:
## SVRG

def optimize_svrg(itr1, itr2, x_train, y_train):
    n = x_train.shape[0]
    d = x_train.shape[1]
    lbda = 1. / n ** (0.5)
    stepsize = 1 / lipschitz_constant(lbda)
    theta_init = np.zeros(d)
    theta_bar = theta_init.copy()
    for k in range(itr1):
        grad_k =  grad(theta_bar, lbda, n)
        theta = theta_bar.copy()
        for t in range(itr2):
            i = np.random.randint(1, n)
            theta = theta - stepsize * (grad_i(i, theta, lbda) - grad_i(i, theta_bar, lbda) + grad_k)    
        theta_bar = theta
        print("After iteration {} loss is now: ".format(k), loss(theta_bar, lbda))
    return theta_bar

optimize_svrg(10, 10, x_train, y_train)

After iteration 0 loss is now:  1.2370983846568533
After iteration 1 loss is now:  0.4155042710489129
After iteration 2 loss is now:  0.3183698314391979
After iteration 3 loss is now:  0.2927373692944291
After iteration 4 loss is now:  0.2581287386028342
After iteration 5 loss is now:  0.24131418588766826
After iteration 6 loss is now:  0.22700572674636588
After iteration 7 loss is now:  0.21574836593558397
After iteration 8 loss is now:  0.20665044863677662
After iteration 9 loss is now:  0.19907707020245716


array([-2.59454013e-06, -5.60258412e-06, -2.54240195e-05, ...,
       -9.19156035e-06,  4.68052528e-07, -3.14511049e-05])

In [None]:
## SVRG2

def svrg2(itr1,itr2, stepsize, dim):
    return None

## CURVATURE MATCHING

def curvature_matching(f, stepsize):
    return None

## ACTION MATCHING

def action_matching(f, stepsize):
    return None