# Bonus1: Parallel Algorithms (Decentralized Optimization)

### Name: Harman Singh Bath


## 0. You will do the following:

1. Read the lecture note: [click here](https://github.com/wangshusen/DeepLearning/blob/master/LectureNotes/Parallel/Parallel.pdf)

2. Implement federated averaging or decentralized optimization.

3. Plot the convergence curve. (The x-axis can be ```number of epochs``` or ```number of communication```. You must make sure the label is correct.)

4. Convert the .IPYNB file to .HTML file.

    * The HTML file must contain **the code** and **the output after execution**.
    
5. Upload this .HTML file to your Google Drive, Dropbox, or your Github repo. (If it is submitted to Google Drive or Dropbox, you must make the file open-access.)

6. Submit the link to this .HTML file to Canvas.

    * Example: https://github.com/wangshusen/CS583-2020S/blob/master/homework/Bonus1/Bonus1.html



# 1. Data processing

- Download the Diabete dataset from https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/diabetes
- Load the data using sklearn.
- Preprocess the data.

## 1.1. Load the data

In [1]:
from sklearn import datasets
import numpy

x_sparse, y = datasets.load_svmlight_file('diabetes')
x = x_sparse.todense()

print('Shape of x: ' + str(x.shape))
print('Shape of y: ' + str(y.shape))

Shape of x: (768, 8)
Shape of y: (768,)


## 1.2. Partition to training and test sets

In [2]:
# partition the data to training and test sets
n = x.shape[0]
n_train = 640
n_test = n - n_train

rand_indices = numpy.random.permutation(n)
train_indices = rand_indices[0:n_train]
test_indices = rand_indices[n_train:n]

x_train = x[train_indices, :]
x_test = x[test_indices, :]
y_train = y[train_indices].reshape(n_train, 1)
y_test = y[test_indices].reshape(n_test, 1)

print('Shape of x_train: ' + str(x_train.shape))
print('Shape of x_test: ' + str(x_test.shape))
print('Shape of y_train: ' + str(y_train.shape))
print('Shape of y_test: ' + str(y_test.shape))

Shape of x_train: (640, 8)
Shape of x_test: (128, 8)
Shape of y_train: (640, 1)
Shape of y_test: (128, 1)


## 1.3. Feature scaling

Use the standardization to trainsform both training and test features

In [3]:
# Standardization
import numpy

# calculate mu and sig using the training set
d = x_train.shape[1]
mu = numpy.mean(x_train, axis=0).reshape(1, d)
sig = numpy.std(x_train, axis=0).reshape(1, d)

# transform the training features
x_train = (x_train - mu) / (sig + 1E-6)

# transform the test features
x_test = (x_test - mu) / (sig + 1E-6)

print('test mean = ')
print(numpy.mean(x_test, axis=0))

print('test std = ')
print(numpy.std(x_test, axis=0))

test mean = 
[[ 0.03333134 -0.00899833  0.07757726  0.0988063   0.001419    0.0548428
   0.0095346   0.01509145]]
test std = 
[[1.06805005 1.03258155 0.86336279 0.98452403 0.91739704 1.14284902
  0.98057207 0.91956871]]


## 1.4. Add a dimension of all ones

In [4]:
n_train, d = x_train.shape
x_train = numpy.concatenate((x_train, numpy.ones((n_train, 1))), axis=1)

n_test, d = x_test.shape
x_test = numpy.concatenate((x_test, numpy.ones((n_test, 1))), axis=1)

print('Shape of x_train: ' + str(x_train.shape))
print('Shape of x_test: ' + str(x_test.shape))

Shape of x_train: (640, 9)
Shape of x_test: (128, 9)


In [5]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

# 2. Worker

Definine a Worker Class

In [7]:
import numpy

# Class defining a worker node
class Worker:
    def __init__(self, node_name, x, y, alpha, beta, lam, scale_factor):
        self.node_name = node_name
        self.neighbors=[]
        self.x = x # s x d local feature matrix
        self.y = y # s x 1 local label matrix
        self.s = x.shape[0] # number of local samples
        self.d = x.shape[1] # number of features
        self.scale_factor = scale_factor
        self.lam = lam
        self.v = numpy.zeros((self.d,1)) # d x 1
        self.alpha = alpha
        self.beta = beta
        
        # randomly shuffle the samples
        rand_indices = numpy.random.permutation(self.s)
        self.x = self.x[rand_indices, :]
        self.y = self.y[rand_indices, :]
        
        self.w = numpy.zeros((self.d,1)) # d x 1 model parameter vector
        
    # add neighbors
    def add_neighbor(self, neighbor):
        self.neighbors.append(neighbor)
    
    # set model parameters to latest
    def set_param(self, w):
        self.w = w
        
    # compute local loss
    def loss(self):
        yx = numpy.multiply(self.y, self.x) # s x d
        yxw = numpy.dot(yx, self.w) # s x 1
        vec1 = numpy.exp(-yxw) # s x 1
        vec2 = numpy.log(1+vec1) # s x 1
        return numpy.sum(vec2)
        
    # compute local gradient
    def gradient(self, y, x):
        yx = numpy.multiply(y, x) # s x d
        yxw = numpy.dot(yx, self.w) # s x 1
        vec1 = numpy.exp(yxw) # s x 1
        vec2 = numpy.divide(yx, 1+vec1) # s x d
        g = -numpy.sum(vec2, axis=0).reshape(self.d, 1) # d x 1
        return g
    
    def agd(self, g):
        self.v *= self.beta
        self.v += g
        self.w -= self.alpha*self.v
        
    def objective(self, lam, loss):
        reg = lam/2 * numpy.sum(self.w*self.w)
        self.obj = loss/self.s + reg
        return self.obj
        
    def client_update(self):
        g = self.gradient(self.y, self.x)

        for neighbor in self.neighbors:
            scaled_weights = self.scale_factor * neighbor.w
            self.w += scaled_weights
        self.w /= 1+(self.scale_factor*len(self.neighbors))
        self.agd(g)
        local_loss =self.loss()
        return self.w, local_loss

# 3. Simulation Generator

In [9]:
import math

def create_workers(m,x,y,alpha,beta,lam,scale_factor):
    n,d = x.shape
    s = math.floor(n/m)
    
    workers = []
    for i in range(m):
        indices = list(range(i * s, (i+1) * s))
        workers.append(Worker("worker_"+str(i+1), x[indices,:], y[indices,:], alpha, beta, lam, scale_factor))
    return workers

In [8]:
def aggregate(weights, losses):
    w = numpy.mean(numpy.array(weights), axis=0)
    loss = sum(losses)
    return w, loss

def objective(lam, w, loss, n):
    reg = lam/2 * numpy.sum(w*w)
    obj = loss/n + reg
    return obj

