In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.linalg import sqrtm

In [2]:
def generate_cir(N):
    x=np.zeros([N,N])
    for i in range(1,N-1):
        x[i][i]=1/3
        x[i][i+1]=1/3
        x[i][i-1]=1/3
    x[N-1][N-2]=1/3
    x[N-1][N-1]=1/3
    x[N-1][0]=1/3
    x[0][N-1]=1/3
    x[0][0]=1/3
    x[0][1]=1/3
    return x

def generate_ful(N):
    x=np.zeros([N,N])
    for i in range(N):
        for j in range(N):
            x[i][j]=1/N
    return x

def generate_star(N):
    x = np.zeros([N,N])
    for i in range(N):
        if i ==0 :
            for j in range(N):
                x[i][j] = 1/N
        else:
            x[i][0] = 1/8
            x[i][i] = 7/8
    return x


def alpha_estimator(m, X):
    # X is N by d matrix
    N = len(X)
    n = int(N/m) # must be an integer
    Y = np.sum(X.reshape((n, m, -1)), 1)
    eps = np.spacing(1)
    Y_log_norm = np.log(np.linalg.norm(Y, axis=1) + eps).mean()
    X_log_norm = np.log(np.linalg.norm(X, axis=1) + eps).mean()
    diff = (Y_log_norm - X_log_norm) / np.log(m)
    return 1/diff

# a_{i,j}: \sim N (0, std_a^2 I_d)
#  y = <a^T, x> + noise
# noise \sim N (0, std_noise^2)
def generate_data(std_a, std_noise, d, batch):
    data_a = []
    data_y = []
    for i in range(batch):
        a = np.random.normal(0, std_a, d)
        noise = np.random.normal(0, std_noise, 1)
        y = np.dot(a, x_true) + noise
        data_a.append(a)
        data_y.append(y)
    data_a = np.array(data_a)
    data_y = np.array(data_y)
    return data_a, data_y

def MSE(x, data_a, data_y):
    batch = len(data_y)
    mse = 0
    for i in range(batch):
        diff = np.dot(data_a[i], x) - data_y[i]
        mse += diff**2
    return mse / (2*batch)

def gradient(x, data_a, data_y):
    batch = len(data_y)
    grad = 0
    for i in range(batch):
        diff = np.dot(data_a[i], x) - data_y[i]
        diff = diff / batch * data_a[i]
        grad += diff
    return grad

In [3]:
nodes = 8
d = 100
x_true = np.random.normal(0,3,d)
X_true = np.ones((nodes,1)) @ x_true.reshape(1,-1)
batch = 5
iterations = 1000
num_last_iters = 500
std_nodes = [5] * nodes
std_noise = 3
num_exp = 1600
# initialization 
x0 = np.random.uniform(-3,3,d)
#batch_list = [1,2,3,4,5,6,7,8]
lr = 0.18
lr_list = np.linspace(0.165,0.205,num = 30)

In [8]:
Xd = np.ones((1,1)) @ x0.reshape(1,-1)
Xd.shape

(1, 100)

In [12]:
############################### Centralizaed
w = [1]
W = np.array(w)
nodes = 1
batch = 40
print('centralized start!')


lr_list = np.linspace(0.10,0.21,num = 30)
final_iter_all = []
error_iter_all = []
for lr in lr_list:
# for batch in batch_list:
    print(lr)
    Xd_across_exp = np.zeros((nodes, num_exp, d))
    error_record_d_averaged = np.zeros(iterations)

    for e in range(num_exp):

        if e%100 == 0:
            print(e)

        Xd = np.ones((nodes,1)) @ x0.reshape(1,-1) # each row i is x[i]
        error_record_d = np.zeros(iterations)
        Xd_averaged = np.zeros((nodes, d))

        for ite in range(iterations):

            # sample data and get gradient
            G = np.zeros((nodes, d)) # each row i is gradient of node i
            for i in range(nodes):

                # sample data
                A = np.random.randn(batch, d)
                noise = np.random.randn(batch,1) * std_noise
                b = A @ x_true.reshape(-1,1) + noise

                # compute gradient
                g = (1/batch) * (A.T @ (A @ Xd[i,:].reshape(-1,1) - b))
                G[i,:] = g.reshape(-1)

            # main update
            Xd = W @ Xd - lr * G
            
            # average the last 1000 iterates
            if ite >= iterations - num_last_iters:
                Xd_averaged += Xd

            # compute distance to the true solution
            error = np.linalg.norm(Xd - X_true,'fro')
            error_record_d[ite] = error
            
        # fill in Xd_across_exp 
        Xd_averaged /= num_last_iters
        
        for i in range(nodes):
            Xd_across_exp[i,e] = Xd_averaged[i]
            
        error_record_d_averaged += error_record_d
        
    error_record_d_averaged = error_record_d_averaged/num_exp
    final_iter_all.append(Xd_across_exp)
    error_iter_all.append(error_record_d_averaged)
np.save('./final_stepsize/final_iter_all_centnpy',final_iter_all)
np.save('./final_stepsize/error_iter_cent.npy', error_iter_all)

print('hypercube finished!')

alpha_nodes_all = []
#alpha_cent_all = []
for l in range(len(final_iter_all)):
    alpha_nodes = []
    for i in range(nodes):
        data_alpha = np.array(final_iter_all[l][i]-np.mean(final_iter_all[l][i],axis=0))
        alpha_nodes.append(alpha_estimator(40, data_alpha))

    #data_alpha = np.array(final_iter_cent_all[l]-np.mean(final_iter_cent_all[l], axis=0))
    #alpha_cent = alpha_estimator(40, data_alpha)
    alpha_nodes_all.append(alpha_nodes)
    #alpha_cent_all.append(alpha_cent)
alpha_nodes_all = np.transpose(alpha_nodes_all)

np.save('./final_stepsize/alpha_nodes_cent_010-021.npy',alpha_nodes_all)

centralized start!
0.1
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.10379310344827587
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.10758620689655173
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.11137931034482759
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.11517241379310345
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.11896551724137931
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.12275862068965518
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.12655172413793103
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1303448275862069
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.13413793103448277
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.13793103448275862
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1417241379310345
0
100
200
3

In [10]:
alpha_nodes_all

array([[2.0035321 , 2.00039592, 2.00224146, 2.02810599, 2.01470455,
        2.02380676, 2.0325168 , 1.99406727, 2.01752581, 1.99030675,
        2.00035605, 2.01220297, 2.02347685, 2.02638575, 2.02239196,
        2.01629731, 2.0105959 , 2.0125512 , 1.98310412, 2.01271682,
        2.02617723, 2.00838478, 2.00402965, 2.01318053, 2.00422068,
        2.00941731, 2.00430651, 2.01974572, 2.01594642, 1.99813311]])

In [None]:
############################### cycle
W = generate_cir(nodes)
print('cycle start!')

final_iter_all = []
error_iter_all = []
for lr in lr_list:
# for batch in batch_list:
    print(lr)
    Xd_across_exp = np.zeros((nodes, num_exp, d))
    error_record_d_averaged = np.zeros(iterations)

    for e in range(num_exp):

        if e%100 == 0:
            print(e)

        Xd = np.ones((nodes,1)) @ x0.reshape(1,-1) # each row i is x[i]
        error_record_d = np.zeros(iterations)
        Xd_averaged = np.zeros((nodes, d))

        for ite in range(iterations):

            # sample data and get gradient
            G = np.zeros((nodes, d)) # each row i is gradient of node i
            for i in range(nodes):

                # sample data
                A = np.random.randn(batch, d)
                noise = np.random.randn(batch,1) * std_noise
                b = A @ x_true.reshape(-1,1) + noise

                # compute gradient
                g = (1/batch) * (A.T @ (A @ Xd[i,:].reshape(-1,1) - b))
                G[i,:] = g.reshape(-1)

            # main update
            Xd = W @ Xd - lr * G
            
            # average the last 1000 iterates
            if ite >= iterations - num_last_iters:
                Xd_averaged += Xd

            # compute distance to the true solution
            error = np.linalg.norm(Xd - X_true,'fro')
            error_record_d[ite] = error
            
        # fill in Xd_across_exp 
        Xd_averaged /= num_last_iters
        
        for i in range(nodes):
            Xd_across_exp[i,e] = Xd_averaged[i]
            
        error_record_d_averaged += error_record_d
        
    error_record_d_averaged = error_record_d_averaged/num_exp
    final_iter_all.append(Xd_across_exp)
    error_iter_all.append(error_record_d_averaged)
np.save('./final_stepsize/final_iter_all_hyper.npy',final_iter_all)
np.save('./final_stepsize/error_iter_hyper.npy', error_iter_all)

print('hypercube finished!')

alpha_nodes_all = []
#alpha_cent_all = []
for l in range(len(final_iter_all)):
    alpha_nodes = []
    for i in range(nodes):
        data_alpha = np.array(final_iter_all[l][i]-np.mean(final_iter_all[l][i],axis=0))
        alpha_nodes.append(alpha_estimator(40, data_alpha))

    #data_alpha = np.array(final_iter_cent_all[l]-np.mean(final_iter_cent_all[l], axis=0))
    #alpha_cent = alpha_estimator(40, data_alpha)
    alpha_nodes_all.append(alpha_nodes)
    #alpha_cent_all.append(alpha_cent)
alpha_nodes_all = np.transpose(alpha_nodes_all)

np.save('./final_stepsize/alpha_nodes_cir.npy',alpha_nodes_all)

In [4]:
############################### hypercube
w = [[1,1,1,0,1,0,0,0],
         [1,1,0,1,0,1,0,0],
         [1,0,1,1,0,0,1,0],
         [0,1,1,1,0,0,0,1],
         [1,0,0,0,1,1,1,0],
         [0,1,0,0,1,1,0,1],
         [0,0,1,0,1,0,1,1],
         [0,0,0,1,0,1,1,1]]
w=np.array(w)
W = w/4
print('hypercube start!')

final_iter_all = []
error_iter_all = []
for lr in lr_list:
# for batch in batch_list:
    print(lr)
    Xd_across_exp = np.zeros((nodes, num_exp, d))
    error_record_d_averaged = np.zeros(iterations)

    for e in range(num_exp):

        if e%100 == 0:
            print(e)

        Xd = np.ones((nodes,1)) @ x0.reshape(1,-1) # each row i is x[i]
        error_record_d = np.zeros(iterations)
        Xd_averaged = np.zeros((nodes, d))

        for ite in range(iterations):

            # sample data and get gradient
            G = np.zeros((nodes, d)) # each row i is gradient of node i
            for i in range(nodes):

                # sample data
                A = np.random.randn(batch, d)
                noise = np.random.randn(batch,1) * std_noise
                b = A @ x_true.reshape(-1,1) + noise

                # compute gradient
                g = (1/batch) * (A.T @ (A @ Xd[i,:].reshape(-1,1) - b))
                G[i,:] = g.reshape(-1)

            # main update
            Xd = W @ Xd - lr * G
            
            # average the last 1000 iterates
            if ite >= iterations - num_last_iters:
                Xd_averaged += Xd

            # compute distance to the true solution
            error = np.linalg.norm(Xd - X_true,'fro')
            error_record_d[ite] = error
            
        # fill in Xd_across_exp 
        Xd_averaged /= num_last_iters
        
        for i in range(nodes):
            Xd_across_exp[i,e] = Xd_averaged[i]
            
        error_record_d_averaged += error_record_d
        
    error_record_d_averaged = error_record_d_averaged/num_exp
    final_iter_all.append(Xd_across_exp)
    error_iter_all.append(error_record_d_averaged)
np.save('./final_stepsize/final_iter_all_hyper.npy',final_iter_all)
np.save('./final_stepsize/error_iter_hyper.npy', error_iter_all)

print('hypercube finished!')

alpha_nodes_all = []
#alpha_cent_all = []
for l in range(len(final_iter_all)):
    alpha_nodes = []
    for i in range(nodes):
        data_alpha = np.array(final_iter_all[l][i]-np.mean(final_iter_all[l][i],axis=0))
        alpha_nodes.append(alpha_estimator(40, data_alpha))

    #data_alpha = np.array(final_iter_cent_all[l]-np.mean(final_iter_cent_all[l], axis=0))
    #alpha_cent = alpha_estimator(40, data_alpha)
    alpha_nodes_all.append(alpha_nodes)
    #alpha_cent_all.append(alpha_cent)
alpha_nodes_all = np.transpose(alpha_nodes_all)

np.save('./final_stepsize/alpha_nodes_hyper.npy',alpha_nodes_all)

hypercube start!
0.165
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.16637931034482759
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1677586206896552
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.16913793103448277
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17051724137931035
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17189655172413792
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17327586206896553
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1746551724137931
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1760344827586207
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17741379310344826
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17879310344827587
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.18017241379310345
0
100
200
30

In [6]:
############################# bipartite

W= [[1/5, 0, 0, 0, 1/5, 1/5, 1/5, 1/5 ],
        [0, 1/5, 0, 0, 1/5, 1/5, 1/5, 1/5 ],
        [0, 0, 1/5, 0, 1/5, 1/5, 1/5, 1/5 ],
        [0, 0, 0, 1/5, 1/5, 1/5, 1/5, 1/5 ],
        [1/5, 1/5, 1/5, 1/5, 1/5, 0, 0, 0 ],
        [1/5, 1/5, 1/5, 1/5, 0, 1/5, 0, 0 ],
        [1/5, 1/5, 1/5, 1/5, 0, 0, 1/5, 0 ],
        [1/5, 1/5, 1/5, 1/5, 0, 0, 0, 1/5 ]]
print('bipartite start!')

final_iter_all = []
error_iter_all = []
for lr in lr_list:
# for batch in batch_list:
    print(lr)
    Xd_across_exp = np.zeros((nodes, num_exp, d))
    error_record_d_averaged = np.zeros(iterations)

    for e in range(num_exp):

        if e%100 == 0:
            print(e)

        Xd = np.ones((nodes,1)) @ x0.reshape(1,-1) # each row i is x[i]
        error_record_d = np.zeros(iterations)
        Xd_averaged = np.zeros((nodes, d))

        for ite in range(iterations):

            # sample data and get gradient
            G = np.zeros((nodes, d)) # each row i is gradient of node i
            for i in range(nodes):

                # sample data
                A = np.random.randn(batch, d)
                noise = np.random.randn(batch,1) * std_noise
                b = A @ x_true.reshape(-1,1) + noise

                # compute gradient
                g = (1/batch) * (A.T @ (A @ Xd[i,:].reshape(-1,1) - b))
                G[i,:] = g.reshape(-1)

            # main update
            Xd = W @ Xd - lr * G
            
            # average the last 1000 iterates
            if ite >= iterations - num_last_iters:
                Xd_averaged += Xd

            # compute distance to the true solution
            error = np.linalg.norm(Xd - X_true,'fro')
            error_record_d[ite] = error
            
        # fill in Xd_across_exp 
        Xd_averaged /= num_last_iters
        
        for i in range(nodes):
            Xd_across_exp[i,e] = Xd_averaged[i]
            
        error_record_d_averaged += error_record_d
        
    error_record_d_averaged = error_record_d_averaged/num_exp
    final_iter_all.append(Xd_across_exp)
    error_iter_all.append(error_record_d_averaged)
np.save('./final_stepsize/final_iter_all_bipartite.npy',final_iter_all)
np.save('./final_stepsize/error_iter_bipartite.npy', error_iter_all)

print('bipartite finished!')

alpha_nodes_all = []
#alpha_cent_all = []
for l in range(len(final_iter_all)):
    alpha_nodes = []
    for i in range(nodes):
        data_alpha = np.array(final_iter_all[l][i]-np.mean(final_iter_all[l][i],axis=0))
        alpha_nodes.append(alpha_estimator(40, data_alpha))

    #data_alpha = np.array(final_iter_cent_all[l]-np.mean(final_iter_cent_all[l], axis=0))
    #alpha_cent = alpha_estimator(40, data_alpha)
    alpha_nodes_all.append(alpha_nodes)
    #alpha_cent_all.append(alpha_cent)
alpha_nodes_all = np.transpose(alpha_nodes_all)


np.save('./final_stepsize/alpha_nodes_bipartite.npy',alpha_nodes_all)

bipartite start!
0.165
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.16637931034482759
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1677586206896552
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.16913793103448277
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17051724137931035
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17189655172413792
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17327586206896553
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1746551724137931
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1760344827586207
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17741379310344826
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17879310344827587
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.18017241379310345
0
100
200
30

In [7]:
alpha_nodes_all

array([[2.00149168, 2.01060345, 2.00943218, 1.99912446, 2.02100271,
        2.00008958, 2.01357283, 2.03529638, 2.0239131 , 2.00784758,
        2.00265507, 1.99210849, 1.98966121, 1.92370586, 1.75728446,
        1.36091902, 0.9978477 , 0.97211402, 0.90872681, 0.9342058 ,
        0.96516107, 0.97220524, 0.86852138, 0.93748505, 0.92828194,
        0.91387819, 0.90070373, 0.88757551, 0.91313061, 0.92281967],
       [2.00006924, 2.02068465, 2.0008826 , 2.0236936 , 2.00623403,
        2.00577063, 2.00502943, 2.0246032 , 2.03390353, 2.0174439 ,
        2.01584017, 1.99534663, 1.99163613, 1.91519543, 1.74816036,
        1.35534523, 0.99493129, 0.97387699, 0.90911436, 0.92069223,
        0.9636631 , 0.97312486, 0.87714494, 0.94160646, 0.92512159,
        0.91383828, 0.88964972, 0.88495259, 0.91846921, 0.9261128 ],
       [1.99557489, 2.0287029 , 1.99423351, 2.00947466, 2.01370122,
        1.98973064, 2.02648144, 2.03356643, 2.01839775, 2.02996272,
        2.00006554, 1.98350364, 1.97410273, 1.

In [6]:
W = generate_ful(nodes)
print('fully start!')

final_iter_all = []
error_iter_all = []
for lr in lr_list:
    print(lr)
    Xd_across_exp = np.zeros((nodes, num_exp, d))
    error_record_d_averaged = np.zeros(iterations)

    for e in range(num_exp):

        if e%100 == 0:
            print(e)

        Xd = np.ones((nodes,1)) @ x0.reshape(1,-1) # each row i is x[i]
        error_record_d = np.zeros(iterations)
        Xd_averaged = np.zeros((nodes, d))

        for ite in range(iterations):

            # sample data and get gradient
            G = np.zeros((nodes, d)) # each row i is gradient of node i
            for i in range(nodes):

                # sample data
                A = np.random.randn(batch, d)
                noise = np.random.randn(batch,1) * std_noise
                b = A @ x_true.reshape(-1,1) + noise

                # compute gradient
                g = (1/batch) * (A.T @ (A @ Xd[i,:].reshape(-1,1) - b))
                G[i,:] = g.reshape(-1)

            # main update
            Xd = W @ Xd - lr * G
            
            # average the last 1000 iterates
            if ite >= iterations - num_last_iters:
                Xd_averaged += Xd

            # compute distance to the true solution
            error = np.linalg.norm(Xd - X_true,'fro')
            error_record_d[ite] = error
            
        # fill in Xd_across_exp 
        Xd_averaged /= num_last_iters
        
        for i in range(nodes):
            Xd_across_exp[i,e] = Xd_averaged[i]
            
        error_record_d_averaged += error_record_d
        
    error_record_d_averaged = error_record_d_averaged/num_exp
    final_iter_all.append(Xd_across_exp)
    error_iter_all.append(error_record_d_averaged)
np.save('final_iter_all_fully.npy',final_iter_all)
np.save('error_iter_fully.npy', error_iter_all)

print('fully finished!')
alpha_nodes_all = []
for l in range(len(final_iter_all)):
    alpha_nodes = []
    for i in range(nodes):
        data_alpha = np.array(final_iter_all[l][i]-np.mean(final_iter_all[l][i],axis=0))
        alpha_nodes.append(alpha_estimator(40, data_alpha))

    alpha_nodes_all.append(alpha_nodes)
alpha_nodes_all = np.transpose(alpha_nodes_all)

np.save('alpha_nodes_fully.npy',alpha_nodes_all)

fully start!
0.165
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.16637931034482759
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1677586206896552
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.16913793103448277
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17051724137931035
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17189655172413792
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17327586206896553
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1746551724137931
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1760344827586207
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17741379310344826
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17879310344827587
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.18017241379310345
0
100
200
300
40

In [13]:
def generate_star(N):
    x = np.zeros([N,N])
    for i in range(N):
        if i ==0 :
            for j in range(N):
                x[i][j] = 1/N
        else:
            x[i][0] = 1/2
            x[i][i] = 1/2
    return x

nodes = 8
lr_list = np.linspace(0.165,0.205,num = 30)
batch = 5
############################### star
W = generate_star(nodes)
print('star start!')

final_iter_all = []
error_iter_all = []
for lr in lr_list:
# for batch in batch_list:
    print(lr)
    Xd_across_exp = np.zeros((nodes, num_exp, d))
    error_record_d_averaged = np.zeros(iterations)

    for e in range(num_exp):

        if e%100 == 0:
            print(e)

        Xd = np.ones((nodes,1)) @ x0.reshape(1,-1) # each row i is x[i]
        error_record_d = np.zeros(iterations)
        Xd_averaged = np.zeros((nodes, d))

        for ite in range(iterations):

            # sample data and get gradient
            G = np.zeros((nodes, d)) # each row i is gradient of node i
            for i in range(nodes):

                # sample data
                A = np.random.randn(batch, d)
                noise = np.random.randn(batch,1) * std_noise
                b = A @ x_true.reshape(-1,1) + noise

                # compute gradient
                g = (1/batch) * (A.T @ (A @ Xd[i,:].reshape(-1,1) - b))
                G[i,:] = g.reshape(-1)

            # main update
            Xd = W @ Xd - lr * G
            
            # average the last 1000 iterates
            if ite >= iterations - num_last_iters:
                Xd_averaged += Xd

            # compute distance to the true solution
            error = np.linalg.norm(Xd - X_true,'fro')
            error_record_d[ite] = error
            
        # fill in Xd_across_exp 
        Xd_averaged /= num_last_iters
        
        for i in range(nodes):
            Xd_across_exp[i,e] = Xd_averaged[i]
            
        error_record_d_averaged += error_record_d
        
    error_record_d_averaged = error_record_d_averaged/num_exp
    final_iter_all.append(Xd_across_exp)
    error_iter_all.append(error_record_d_averaged)
np.save('final_iter_all_star_new.npy',final_iter_all)
np.save('error_iter_star_new.npy', error_iter_all)

print('star finished!')

alpha_nodes_all = []
#alpha_cent_all = []
for l in range(len(final_iter_all)):
    alpha_nodes = []
    for i in range(nodes):
        data_alpha = np.array(final_iter_all[l][i]-np.mean(final_iter_all[l][i],axis=0))
        alpha_nodes.append(alpha_estimator(40, data_alpha))

    #data_alpha = np.array(final_iter_cent_all[l]-np.mean(final_iter_cent_all[l], axis=0))
    #alpha_cent = alpha_estimator(40, data_alpha)
    alpha_nodes_all.append(alpha_nodes)
    #alpha_cent_all.append(alpha_cent)
alpha_nodes_all = np.transpose(alpha_nodes_all)


np.save('./final_stepsize/alpha_nodes_star_new.npy',alpha_nodes_all)

star start!
0.165
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.16637931034482759
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1677586206896552
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.16913793103448277
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17051724137931035
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17189655172413792
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17327586206896553
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1746551724137931
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.1760344827586207
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17741379310344826
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.17879310344827587
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
0.18017241379310345
0
100
200
300
400