In [1]:
import numpy as np

import multiprocessing as mp

In [2]:
# def alpha_estimator(m, X):
#     N = len(X)
#     n = int(N/m) # must be an integer
#     Y = np.sum(X.reshape(n, m),1)
#     eps = np.spacing(1)

#     Y_log_norm =  np.log(np.abs(Y) + eps).mean()
#     X_log_norm =  np.log(np.abs(X) + eps).mean()
#     diff = (Y_log_norm - X_log_norm) / np.log(m)
#     return 1 / diff

# def hill_estimator(X,k):
#     X = np.log(np.abs(X)) #assuming symmetric
#     Xs = np.sort(X)
#     term1 = np.mean(Xs[-(k+1):])
#     term2 = Xs[k]
#     return 1/(term1-term2)

In [3]:
# Corollary 2.4 in Mohammadi 2014 - for multi-d
def alpha_estimator(m, X):
    # X is N by d matrix
    N = len(X)
    n = int(N/m) # must be an integer
    Y = np.sum(X.reshape((n, m, -1)), 1)
    eps = np.spacing(1)
    Y_log_norm = np.log(np.linalg.norm(Y, axis=1) + eps).mean()
    X_log_norm = np.log(np.linalg.norm(X, axis=1) + eps).mean()
    diff = (Y_log_norm - X_log_norm) / np.log(m)
    return 1/diff

In [4]:
# number of data points
n = 10000
# dimension of x
d = 100
# batch size
batch = 10

sig_w = 3
sig_x = 1
sig_y = 3

# std_noise = 3
w = np.random.normal(0,3,d)
# w = sig_w * np.random.randn(d,1)
X = sig_x * np.random.randn(n,d)
Y = X@w.reshape(-1,1) + sig_y * np.random.randn(n,1)

In [47]:
Y.shape

(10000, 1)

In [24]:
(np.dot(Xk,w0)).shape

(10,)

In [20]:
np.dot(np.transpose(Xk), (Yk-np.dot(Xk,w0)))

array([[1553.28688832, 1599.83429978, 1552.4751576 , 1653.06332266,
        1608.00416658, 1413.40637158, 1441.95281649, 1348.91756913,
        1668.96388044, 1545.73517396]])

In [5]:
def loss_batch(w,X,Y,batch):
    idx = np.random.randint(Y.shape[0], size = batch)
    X1 = X[idx,:]
    Y1 = Y[idx,:]
    loss = np.sum(np.square(X1@w -Y1))/(2*batch)
    return loss

def loss(w,X,Y):
    loss = np.sum(np.square(np.dot(X,w) -Y))/(2*X.shape[0])
    return loss

def linearreg(d,X,Y,K,stepsize,batch,w_star):
    loss_list = []
    w_list = []
    
    w0 = np.random.uniform(-3,3,d)
    w_list.append(w0)
    loss_list.append(loss_batch(w0,X,Y,batch))
    
    for k in range(K):
        
        idx = np.random.randint(Y.shape[0], size = batch)
        Xk = X[idx,:]
        Yk = Y[idx,:]
        w = w_list[-1].reshape(-1,1) - stepsize / batch * (Xk.T @ (Xk @ w_list[-1].reshape(-1,1) - Yk))
        w_list.append(w.reshape(-1))
        
        loss_list.append(loss_batch(w,X,Y,batch))
        
    w_list = np.array(w_list)
    loss_list = np.array(loss_list)
    
    w_mean = np.mean(w_list[-500:], axis = 0)
    
    return w_mean

def linearreg_unif(d,X,Y,K,base_lr,max_lr,batch,w_star):
    loss_list = []
    w_list = []
    
    w0 =np.random.uniform(-3,3,d)
    w_list.append(w0)
    loss_list.append(loss_batch(w0,X,Y,batch))
    
    for k in range(K):
        
        stepsize = np.random.uniform(low=base_lr, high=max_lr)
        idx = np.random.randint(Y.shape[0], size = batch)
        Xk = X[idx,:]
        Yk = Y[idx,:]
#         temp = Xk @ w_list[-1].reshape(-1,1) - Yk
#         temp = Xk.T @ temp
#         print(temp.shape)
        
        w = w_list[-1].reshape(-1,1) - stepsize / batch * (Xk.T @ (Xk @ w_list[-1].reshape(-1,1) - Yk))
#         print((Xk @ w_list[-1].reshape(-1,1) )shape)
        w_list.append(w.reshape(-1))
        
        loss_list.append(loss_batch(w,X,Y,batch))
        
    w_list = np.array(w_list)
    loss_list = np.array(loss_list)
    
    w_mean = np.mean(w_list[-500:], axis = 0)
    w_final = w_list[-1] - w_star
    
    return w_mean, w_final, loss_list

def linearreg_cyc(d,X,Y,K,base_lr,max_lr,stepsize,batch,w_star):
    loss_list = []
    w_list = []
    
    w0 =np.random.uniform(-3,3,d)
    w_list.append(w0)
    loss_list.append(loss_batch(w0,X,Y,batch))
    
    for k in range(K):
        
        cycle = np.floor(1 + (k+1) / (2*stepsize))
        loc = np.abs((k+1) / stepsize - 2*cycle +1)
        lr = base_lr + (max_lr - base_lr) * max(0,(1-loc))
        
        idx = np.random.randint(Y.shape[0], size = batch)
        Xk = X[idx,:]
        Yk = Y[idx,:]
        w = w_list[-1].reshape(-1,1) - stepsize / batch * (Xk.T @ (Xk @ w_list[-1].reshape(-1,1) - Yk))
        w_list.append(w.reshape(-1))
        
        loss_list.append(loss_batch(w,X,Y,batch))
        
    w_list = np.array(w_list)
    loss_list = np.array(loss_list)
    
    w_mean = np.mean(w_list[-500:], axis = 0)
    w_final = w_list[-1] - w_star
    
    return w_mean, w_final

def linearreg_mc(d,X,Y,K,base_lr,max_lr,p,stepsize,batch,w_star):
    loss_list = []
    w_list = []
    
    onestep = (max_lr-base_lr)/stepsize
    
    w0 = np.random.uniform(-3,3,d)
    w_list.append(w0)
    loss_list.append(loss_batch(w0,X,Y,batch))
#     lr_list=[]
    
    for k in range(K):
        
        if k == 0:
            lr = base_lr
        elif k == 1:
            lr = base_lr + onestep
        else:
            if lr <= base_lr:
                p = max(p, 1-p)
                lr = base_lr + onestep
            elif lr >= max_lr:
                p = min(p,1-p)
                lr = max_lr - onestep
            else:
                temp = np.random.uniform(0,1)
                if temp <= p:
                    lr = min(max_lr, lr+onestep)
                else:
                    lr = max(base_lr, lr-onestep)
#         lr_list.append(lr)
        idx = np.random.randint(Y.shape[0], size = batch)
        Xk = X[idx,:]
        Yk = Y[idx,:]
        w = w_list[-1].reshape(-1,1) - stepsize / batch * (Xk.T @ (Xk @ w_list[-1].reshape(-1,1) - Yk))
        w_list.append(w.reshape(-1))
        
        loss_list.append(loss_batch(w,X,Y,batch))
        
    w_list = np.array(w_list)
    loss_list = np.array(loss_list)
    
    w_mean = np.mean(w_list[-500:], axis = 0)
    w_final = w_list[-1] - w_star
    
    return w_mean, w_final#, lr_list

def ideal_sol(X,Y):
    temp = np.dot(np.transpose(X), X)
    temp = np.linalg.inv(temp)
    temp = np.dot(temp, np.transpose(X))
    temp = np.dot(temp, Y)
    return temp

In [6]:
# unif lr

d = 100
K = 1000
T = 10000
mean_lr = [0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22]
# mean_lr = [0.14]
delta = 0.05
# base_lr =[ 0.01]
# max_lr = 0.03
batch = 15
alpha_list = []
w_star = ideal_sol(X,Y)

# pool = mp.Pool(mp.cpu_count())

for mean in mean_lr:
    random_vec = []
    
    for t in range(T):
        diff,_,_ = linearreg_unif(d, X, Y, K, mean - delta, mean + delta, batch, w_star)
#         diff = w - w_star
        random_vec.append(diff)
#         diff_list.append(diff)
    
#     random_vec = [pool.apply(linearreg_unif, args=(1,X,Y,K,base,max_lr,batch,w_star)) for t in range(T)]
    
    
    random_vec = np.array(random_vec)
    data = np.array(random_vec - np.mean(random_vec, axis = 0))
    alpha_list.append(alpha_estimator(100,data))
    
# pool.close()

### Batch = 10

In [97]:
# delta = 0.05
alpha_list

[1.9992081559596,
 2.0058759615058332,
 1.9966414023637025,
 1.9888339310322185,
 1.3223480616710073,
 1.1545610117347485,
 1.0441591429818526,
 0.9973064473474609,
 0.9672450767340022]

In [8]:
# delta = 0.04
alpha_list

[1.9980193521187706,
 1.994331313377948,
 2.008449962798776,
 1.9844368268515886,
 1.489800381785496,
 1.2522119658060922,
 1.127180064050747,
 1.0632379306684896,
 1.0201058997358983]

In [10]:
# delta = 0.03
alpha_list

[2.011808171891638,
 2.006497365754277,
 2.002973731131015,
 2.0011681103124825,
 1.6433117890744189,
 1.3396652156058708,
 1.2336902234418194,
 1.1191617514907952,
 1.0423329315758816]

In [12]:
# delta = 0.02
alpha_list

[2.001870966800299,
 1.9957802846557626,
 2.0084541341950914,
 1.9965154018900835,
 1.7738066631121632,
 1.4228938197647925,
 1.2755673483361696,
 1.1941087136869475,
 1.1101158020743571]

### batch = 5

In [6]:
# delta = 0.05
alpha_list

[0.9700783413836138,
 0.9546597864680119,
 0.904544079410688,
 0.9231340212818335,
 0.8954111632952743,
 0.9378181614711333,
 0.9890537042756602,
 0.9670302823223297,
 0.8527419243938376]

In [8]:
# delta = 0.05
# batch = 2
alpha_list

[0.9406916867568116,
 0.9847450270828545,
 0.9905103254191518,
 0.9881511133465686,
 nan,
 nan,
 nan,
 nan,
 nan]

In [10]:
# delta = 0.05
# batch = 7
alpha_list

[1.1729687493617813,
 1.0627197325762983,
 0.9984367321277543,
 0.9345199413471473,
 0.8842244827446193,
 0.9158282678401728,
 0.9370418156715172,
 0.9239248803386607,
 0.9450177261304024]

In [7]:
# delta = 0.05
# batch = 15
alpha_list

[1.9978611534191966,
 2.0044053084263735,
 2.000695085810468,
 1.9991325974971241,
 2.0009896611850366,
 1.9985937372848606,
 2.008359144185032,
 2.002451931712333,
 2.0086479940508997]