In [177]:
# Package Imports 

import numpy as np
from tqdm import tqdm
from numba import njit

In [58]:
# Gisette Dataset Pre-Processing

def preprocessing_y(filename):
    data = []
    with open(filename) as file:
        for row in file.readlines():
            data.append((row.strip()).split(" "))
    data = np.array(data).astype(int)
    return data.T

def preprocessing_x(filename):
    data = []
    with open(filename) as file:
        for row in file.readlines():
            data.append((row.strip()).split(" "))
    data = np.array(data).astype(int)
    return data.T

# Here the shape of x_train is (5000, 6000) = (dimension, number of examples)

x_train = preprocessing_x("gisette_train.data")
y_train = preprocessing_y("gisette_train.labels")


In [185]:
# In this section, we define the functions used in the optimization part 

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def activation(w, b, x_i):
    return sigmoid(np.dot(w.T, x_i) + b)
    
def average_grad_log_loss(w, b, X, Y):
    N = X.shape[1]
    A = sigmoid(np.dot(w.T, X) + b)
    cost = (-1 / N) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    dw = (1 / N) * np.dot(X, (A - Y).T)
    db = (1 / N) * np.sum(A - Y)
    return dw, db, cost

def grad_wstb(w, b, x_i, y_i):
    x_i = np.expand_dims(x_i, axis=1)
    a_i = activation(w, b, x_i)
    return a_i - y_i

def grad_wstw(w, b, x_i, y_i):
    x_i = np.expand_dims(x_i, axis=1)
    a_i = activation(w, b, x_i)
    return x_i * (a_i - y_i)

In [186]:

## SVRG

def optimize_svrg(itr1, itr2, stepsize, x_train, y_train):
    N = x_train.shape[1]
    d = x_train.shape[0]
    w_bar, b_bar = np.zeros((d, 1)), 0
    
    for k in range(itr1):
        dw_bar, db_bar,_ =  average_grad_log_loss(w_bar, b_bar, x_train, y_train)   
        w, b = w_bar, b_bar
        for t in range(itr2):
    
            i = np.random.randint(1, N)
            w = w - stepsize * (grad_wstw(w, b, x_train.T[i], y_train.T[i]) - 
                                    grad_wstw(w_bar, b_bar, x_train.T[i], y_train.T[i]) + dw_bar)
            b = b - stepsize * (grad_wstb(w, b, x_train.T[i], y_train.T[i]) - 
                                    grad_wstb(w_bar, b_bar, x_train.T[i], y_train.T[i])+ db_bar)
            
        w_bar = w
        b_bar = b
        print("cost is ", average_grad_log_loss(w_bar, b_bar, x_train, y_train)[2])
        
    return w_bar, b_bar



In [None]:
## SVRG2

def svrg2(itr1,itr2, stepsize, dim):
    return None

## CURVATURE MATCHING

def curvature_matching(f, stepsize):
    return None

## ACTION MATCHING

def action_matching(f, stepsize):
    return None