In [1]:
import numpy as np

import multiprocessing as mp

In [2]:
# def alpha_estimator(m, X):
#     N = len(X)
#     n = int(N/m) # must be an integer
#     Y = np.sum(X.reshape(n, m),1)
#     eps = np.spacing(1)

#     Y_log_norm =  np.log(np.abs(Y) + eps).mean()
#     X_log_norm =  np.log(np.abs(X) + eps).mean()
#     diff = (Y_log_norm - X_log_norm) / np.log(m)
#     return 1 / diff

# def hill_estimator(X,k):
#     X = np.log(np.abs(X)) #assuming symmetric
#     Xs = np.sort(X)
#     term1 = np.mean(Xs[-(k+1):])
#     term2 = Xs[k]
#     return 1/(term1-term2)

In [2]:
# Corollary 2.4 in Mohammadi 2014 - for multi-d
def alpha_estimator(m, X):
    # X is N by d matrix
    N = len(X)
    n = int(N/m) # must be an integer
    Y = np.sum(X.reshape((n, m, -1)), 1)
    eps = np.spacing(1)
    Y_log_norm = np.log(np.linalg.norm(Y, axis=1) + eps).mean()
    X_log_norm = np.log(np.linalg.norm(X, axis=1) + eps).mean()
    diff = (Y_log_norm - X_log_norm) / np.log(m)
    return 1/diff

In [3]:
# number of data points
n = 10000
# dimension of x
d = 100
# batch size
batch = 10

sig_w = 3
sig_x = 1
sig_y = 3

# std_noise = 3
w = np.random.normal(0,3,d)
# w = sig_w * np.random.randn(d,1)
X = sig_x * np.random.randn(n,d)
Y = X@w.reshape(-1,1) + sig_y * np.random.randn(n,1)

In [5]:
w

array([[0.49913832]])

In [24]:
(np.dot(Xk,w0)).shape

(10,)

In [20]:
np.dot(np.transpose(Xk), (Yk-np.dot(Xk,w0)))

array([[1553.28688832, 1599.83429978, 1552.4751576 , 1653.06332266,
        1608.00416658, 1413.40637158, 1441.95281649, 1348.91756913,
        1668.96388044, 1545.73517396]])

In [4]:
def loss_batch(w,X,Y,batch):
    idx = np.random.randint(Y.shape[0], size = batch)
    X1 = X[idx,:]
    Y1 = Y[idx,:]
    loss = np.sum(np.square(X1@w -Y1))/(2*batch)
    return loss

def loss(w,X,Y):
    loss = np.sum(np.square(np.dot(X,w) -Y))/(2*X.shape[0])
    return loss

def linearreg(d,X,Y,K,stepsize,batch,w_star):
    loss_list = []
    w_list = []
    
    w0 = np.random.uniform(-3,3,d)
    w_list.append(w0)
    loss_list.append(loss_batch(w0,X,Y,batch))
    
    for k in range(K):
        
        idx = np.random.randint(Y.shape[0], size = batch)
        Xk = X[idx,:]
        Yk = Y[idx,:]
        w = w_list[-1].reshape(-1,1) - stepsize / batch * (Xk.T @ (Xk @ w_list[-1].reshape(-1,1) - Yk))
        w_list.append(w.reshape(-1))
        
        loss_list.append(loss_batch(w,X,Y,batch))
        
    w_list = np.array(w_list)
    loss_list = np.array(loss_list)
    
    w_mean = np.mean(w_list[-500:], axis = 0)
    
    return w_mean

def linearreg_unif(d,X,Y,K,base_lr,max_lr,batch,w_star):
    loss_list = []
    w_list = []
    
    w0 =np.random.uniform(-3,3,d)
    w_list.append(w0)
    loss_list.append(loss_batch(w0,X,Y,batch))
    
    for k in range(K):
        
        stepsize = np.random.uniform(low=base_lr, high=max_lr)
        idx = np.random.randint(Y.shape[0], size = batch)
        Xk = X[idx,:]
        Yk = Y[idx,:]
#         temp = Xk @ w_list[-1].reshape(-1,1) - Yk
#         temp = Xk.T @ temp
#         print(temp.shape)
        
        w = w_list[-1].reshape(-1,1) - stepsize / batch * (Xk.T @ (Xk @ w_list[-1].reshape(-1,1) - Yk))
#         print((Xk @ w_list[-1].reshape(-1,1) )shape)
        w_list.append(w.reshape(-1))
        
        loss_list.append(loss_batch(w,X,Y,batch))
        
    w_list = np.array(w_list)
    loss_list = np.array(loss_list)
    
    w_mean = np.mean(w_list[-500:], axis = 0)
    w_final = w_list[-1] - w_star
    
    return w_mean, w_final, loss_list

def linearreg_cyc(d,X,Y,K,base_lr,max_lr,stepsize,batch,w_star):
    loss_list = []
    w_list = []
    
    w0 =np.random.uniform(-3,3,d)
    w_list.append(w0)
    loss_list.append(loss_batch(w0,X,Y,batch))
    
    for k in range(K):
        
        cycle = np.floor(1 + (k+1) / (2*stepsize))
        loc = np.abs((k+1) / stepsize - 2*cycle +1)
        lr = base_lr + (max_lr - base_lr) * max(0,(1-loc))
        
        idx = np.random.randint(Y.shape[0], size = batch)
        Xk = X[idx,:]
        Yk = Y[idx,:]
        w = w_list[-1].reshape(-1,1) - lr / batch * (Xk.T @ (Xk @ w_list[-1].reshape(-1,1) - Yk))
        w_list.append(w.reshape(-1))
        
        loss_list.append(loss_batch(w,X,Y,batch))
        
    w_list = np.array(w_list)
    loss_list = np.array(loss_list)
    
    w_mean = np.mean(w_list[-500:], axis = 0)
    w_final = w_list[-1] - w_star
    
    return w_mean, w_final

def linearreg_mc(d,X,Y,K,base_lr,max_lr,p,stepsize,batch,w_star):
    loss_list = []
    w_list = []
    
    onestep = (max_lr-base_lr)/stepsize
    
    w0 = np.random.uniform(-3,3,d)
    w_list.append(w0)
    loss_list.append(loss_batch(w0,X,Y,batch))
#     lr_list=[]
    
    for k in range(K):
        
        if k == 0:
            lr = base_lr
        elif k == 1:
            lr = base_lr + onestep
        else:
            if lr <= base_lr:
                p = max(p, 1-p)
                lr = base_lr + onestep
            elif lr >= max_lr:
                p = min(p,1-p)
                lr = max_lr - onestep
            else:
                temp = np.random.uniform(0,1)
                if temp <= p:
                    lr = min(max_lr, lr+onestep)
                else:
                    lr = max(base_lr, lr-onestep)
#         lr_list.append(lr)
        idx = np.random.randint(Y.shape[0], size = batch)
        Xk = X[idx,:]
        Yk = Y[idx,:]
        w = w_list[-1].reshape(-1,1) - lr / batch * (Xk.T @ (Xk @ w_list[-1].reshape(-1,1) - Yk))
        w_list.append(w.reshape(-1))
        
        loss_list.append(loss_batch(w,X,Y,batch))
        
    w_list = np.array(w_list)
    loss_list = np.array(loss_list)
    
    w_mean = np.mean(w_list[-500:], axis = 0)
    w_final = w_list[-1] - w_star
    
    return w_mean, w_final#, lr_list

def ideal_sol(X,Y):
    temp = np.dot(np.transpose(X), X)
    temp = np.linalg.inv(temp)
    temp = np.dot(temp, np.transpose(X))
    temp = np.dot(temp, Y)
    return temp

In [17]:
# mc lr

d = 100
K = 1000
T = 10000
mean_lr = [0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22]
delta = 0.05
stepsize = 10
p = 1
batch = 10
alpha_list = []
w_star = ideal_sol(X,Y)

# pool = mp.Pool(mp.cpu_count())

for mean in mean_lr:
    random_vec = []
    
    for t in range(T):
        diff,_ = linearreg_mc(d, X, Y, K, mean - delta, mean + delta, p, stepsize, batch, w_star)
#         diff = w - w_star
        random_vec.append(diff)
#         diff_list.append(diff)
    
#     random_vec = [pool.apply(linearreg_unif, args=(1,X,Y,K,base,max_lr,batch,w_star)) for t in range(T)]
    
    random_vec = np.array(random_vec)
    data = np.array(random_vec - np.mean(random_vec, axis = 0))
    alpha_list.append(alpha_estimator(100,data))
    
# pool.close()

### batch = 10

In [6]:
# delta = 0.05
# p = 0.6
alpha_list

[2.002046524425199,
 1.9993057626601152,
 1.9646224357506248,
 1.6161651157544217,
 0.9808721745312605,
 0.9275423196270094,
 0.89077630510346,
 0.9537082539371149,
 0.9743876057367452]

In [8]:
# delta = 0.05
# p = 0.7
alpha_list

[2.0007898223421674,
 2.000237518445299,
 1.9994715103487886,
 1.9382541932994244,
 1.1038531609750617,
 0.9636895094139215,
 0.934419566904034,
 0.882088120021222,
 0.8904309171594724]

In [10]:
# delta = 0.05
# p = 0.8
alpha_list

[2.0028777798052273,
 2.0004219028047263,
 2.00488114900863,
 1.9671666000156018,
 1.3004634159557238,
 1.1203209246048824,
 1.0430041412949023,
 1.0003540553962178,
 0.9552239239958898]

In [12]:
# delta = 0.05
# p = 0.9
alpha_list

[2.008836816792085,
 1.9960944117571662,
 1.9959043202512452,
 1.9938772348723002,
 1.428781234786798,
 1.2512188228467234,
 1.1523493881097504,
 1.0739715564066543,
 1.036683940882398]

### P = 0.6

In [14]:
# delta = 0.04
# p = 0.6
alpha_list

[2.003700207690419,
 1.9900085141581865,
 2.0001431910110257,
 1.9076964829958,
 1.0093872016052483,
 0.9076954382106143,
 0.9462614555706806,
 0.840042709143689,
 0.8789562301814527]

In [7]:
# delta = 0.03
# p = 0.6
alpha_list

[2.007280386000215,
 1.9939633787725872,
 1.9955746841220696,
 1.97388027583118,
 1.2914704606043201,
 1.0102961431068678,
 0.9508711819200671,
 0.9437380010007425,
 0.9507508789632385]

In [9]:
# delta = 0.02
# p = 0.6
alpha_list

[2.0047487984136514,
 2.0059994843976887,
 1.997108063889702,
 2.0056116939510145,
 1.5831318591564758,
 1.1827401448809856,
 1.0605788271248406,
 1.0247408290307496,
 0.9611613405087753]

### batch = 5

In [11]:
# delta = 0.05
# p = 0.6
alpha_list

[0.9264896909148191,
 0.9621836353446682,
 0.9506348937605029,
 0.9761174946713548,
 0.9695425538228692,
 0.9609424562589681,
 0.9673843419640878,
 0.954107313506329,
 0.9746761937722437]

In [13]:
# delta = 0.05
# p = 0.6
# batch = 2
alpha_list

[0.9818790857260454,
 0.9882371450191456,
 0.9672782542201056,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan]

In [15]:
# delta = 0.05
# p = 0.6
# batch = 7
alpha_list

[0.9042221668623771,
 0.8700888403966642,
 0.8933334380405398,
 0.957506602964338,
 0.895328100433898,
 0.9705782762630675,
 0.9462830725163469,
 0.9663269689546208,
 0.930257797257412]

In [7]:
# delta = 0.05
# p = 0.6
# batch = 15
alpha_list

[2.0035171846882553,
 2.0135258646059153,
 2.0063125438309637,
 2.013460499228646,
 2.0012637555100925,
 2.0003798761587923,
 1.9981436627871332,
 1.9998216614799522,
 2.002280135675251]

In [7]:
# delta = 0.05
# p = 0.4
# batch = 10
alpha_list

[1.9943753571714917,
 1.9950639854821763,
 1.957120456741454,
 1.6824819879290065,
 0.9494395188129017,
 0.875474701010148,
 0.9799488114324543,
 0.9159566383564083,
 0.9186597032047429]

In [9]:
# delta = 0.05
# p = 0.3
# batch = 10
alpha_list

[2.0001279737440205,
 1.9870249576420291,
 1.999514874384365,
 1.933101370983265,
 1.122724610672851,
 0.9828353759204994,
 0.929100507595582,
 0.8913551039301439,
 0.9175175543742821]

In [12]:
# delta = 0.05
# p = 0.2
# batch = 10
alpha_list

[2.00697100867133,
 2.0119853260321996,
 1.9933699529453421,
 1.9697108011403963,
 1.317389505426751,
 1.1324853754127113,
 1.041015666486291,
 1.00491468812273,
 0.9520538288573255]

In [14]:
# delta = 0.05
# p = 0.1
# batch = 10
alpha_list

[2.0051916622602155,
 1.9945705662745892,
 2.00490379217264,
 1.9827209049411674,
 1.4387004358208177,
 1.268236660419093,
 1.1531935022655535,
 1.086057667565457,
 1.0137602967847332]

In [16]:
# delta = 0.05
# p = 0
# batch = 10
alpha_list

[1.9988285504412928,
 2.0010328841064706,
 2.0091861275393095,
 1.9870029968082032,
 1.5465389233210884,
 1.3788900217096625,
 1.252981598305959,
 1.1940963626417411,
 1.1024873141084652]

In [18]:
# delta = 0.05
# p = 1
# batch = 10
alpha_list

[1.9963563423857555,
 2.0039873314793,
 2.001286985447247,
 1.9957829668936526,
 1.5398887595079216,
 1.3530620599289807,
 1.26730458591317,
 1.1873361180797293,
 1.1073351651498942]