# 基本定义

In [None]:
import math
import random
import re
import sys
import time
import pickle

import matplotlib.pyplot as plt

import torch
import torch.utils.data

## 数据集属性

In [None]:
VRConfig = {
    'rounds': 10,
    'displayInterval': 4000,
    
    'weight_decay': 0.01,
    'honestSize': 50,
    'byzantineSize': 20,
}

In [None]:
dataSetConfig = {
    'name': 'ijcnn1',
    'dataSet' : 'ijcnn1',
    'dataSetSize': 49990,
    'maxFeature': 22,
    'findingType': '1',
}

VRConfig['SET_SIZE'] = dataSetConfig['dataSetSize']

# ijcnn1参数
SGDConfig = VRConfig.copy()
SGDConfig['gamma'] = 2e-2

batchConfig = VRConfig.copy()
batchConfig['batchSize'] = 50
batchConfig['gamma'] = 1e-2

SVRGConfig = VRConfig.copy()
SVRGConfig['snapshotInterval'] = dataSetConfig['dataSetSize']
SVRGConfig['gamma'] = 2e-2

SAGAConfig = VRConfig.copy()
SAGAConfig['gamma'] = 2e-2

SARAHConfig = VRConfig.copy()
SARAHConfig['gamma'] = 2e-2

In [None]:
# covtype参数
# dataSetConfig = {
#     'name': 'covtype',
#     'dataSet' : 'covtype.libsvm.binary.scale',
#     'dataSetSize': 581012,
#     'maxFeature': 54,
#     'findingType': '1',
# }

# VRConfig['SET_SIZE'] = dataSetConfig['dataSetSize']

# SGDConfig = VRConfig.copy()
# SGDConfig['gamma'] = 1e-2

# batchConfig = VRConfig.copy()
# batchConfig['batchSize'] = 50
# batchConfig['gamma'] = 5e-3

# SVRGConfig = VRConfig.copy()
# SVRGConfig['snapshotInterval'] = dataSetConfig['dataSetSize']
# SVRGConfig['gamma'] = 1e-2

# SAGAConfig = VRConfig.copy()
# SAGAConfig['gamma'] = 1e-2

# SARAHConfig = VRConfig.copy()
# SARAHConfig['gamma'] = 1e-2

In [None]:
SET_SIZE = dataSetConfig['dataSetSize']
maxFeature = dataSetConfig['maxFeature']
findingType = dataSetConfig['findingType']

CACHE_DIR = './cache/' + dataSetConfig['name'] + '_'
# ====================================================
# 报告函数
def log(*k, **kw):
    timeStamp = time.strftime('[%y-%m-%d %H:%M:%S] ', time.localtime())
    print(timeStamp, end='')
    print(*k, **kw)
    sys.stdout.flush()
    
def logAxis(path, Fmin):
#     return [math.log10(p-Fmin) for p in path]
    return [p-Fmin for p in path]

## 运行参数

In [None]:
# L = np.sum([(scipy.sparse.linalg.norm(X[i, :]) + 1)
#             ** 2 for i in range(X.shape[0])])
# L = Lambda + 1/(4*SET_SIZE) * L

SEED = 200

torch.manual_seed(SEED)#为CPU设置随机种子

w0 = torch.zeros(maxFeature + 1, dtype=torch.float64)
w0 = torch.nn.init.normal_(w0)

## 加载数据集

In [None]:
class SVM_dataSet(torch.utils.data.Dataset):
    def __init__(self, **dataSetConfig):
        super(SVM_dataSet, self).__init__()
        log('开始加载数据集')
        self.X = torch.zeros((SET_SIZE, maxFeature), dtype=torch.float64)
        self.Y = torch.zeros((SET_SIZE), dtype=torch.float64)
        __dir__ = '.'
        dataFile = __dir__ + '/dataset/' + dataSetConfig['dataSet']

        with open(dataFile, 'r') as f:
            posCount = 0
            negCount = 1
            for (line, vector) in enumerate(f):
                (cat, data) = vector.split(' ', 1)
                if cat == findingType:
                    self.Y[line] = 1
                    posCount += 1
                else:
                    self.Y[line] = 0
                    negCount += 1
                for piece in data.strip().split(' '):
                    match = re.search(r'(\S+):(\S+)', piece)
                    feature = int(match.group(1)) - 1  # 数据集从1开始
                    value = float(match.group(2))
                    # 插入矩阵
                    self.X[line][feature] = value
        log('加载数据集完成({})，正类：{}个，负类：{}个'.format(dataSetConfig['dataSet'], posCount, negCount))
    def __getitem__(self, index):
        return self.X[index], self.Y[index]
    def __len__(self):
        return SET_SIZE

In [None]:
dataset = SVM_dataSet(**dataSetConfig)

## 损失函数

In [None]:
def accuracy(w, dataset):
    correct = 0
    for data, label in dataset:
        pre = LogisticRegression(w, data) > 0.5
        correct += (pre.type(torch.uint8) == label.type(torch.uint8)).item()
    return correct / len(dataset)
def F(w, dataset, weight_decay):
    loss = 0
    for data, label in dataset:
        predict = LogisticRegression(w, data)
        loss += torch.nn.functional.binary_cross_entropy(predict, label)
    loss /= len(dataset)
    loss += weight_decay * torch.norm(w)**2 / 2
    return loss.item()
def G(w, dataset, weight_decay):
    G = torch.zeros_like(w, requires_grad=False, dtype=torch.float64)
    g = torch.zeros_like(w, requires_grad=False, dtype=torch.float64)
    for index in range(len(dataset)):
        x, y = dataset[index]
        predict = LogisticRegression(w, x)

        err = -(y-predict).data
        g[:-1] = err*x
        g[-1] = err
        G.add_(1/len(dataset), g)
    G.add_(weight_decay, w)
    return G
def LogisticRegression(w, x):
    out = w[:-1].dot(x) + w[-1]
    return torch.sigmoid(out)

In [None]:
def getVarience(w_local, honestSize):
    avg = w_local[:honestSize].mean(dim=0)
    s = 0
    for w in w_local[:honestSize]:
        s += (w - avg).norm()**2
    s /= honestSize
    return s.item()

# 聚合函数

In [None]:
def mean(wList):
    return torch.mean(wList, dim=0)

In [None]:
def gm(wList):
    max_iter = 80
    tol = 1e-5
    guess = torch.mean(wList, dim=0)
    for _ in range(max_iter):
        dist_li = torch.norm(wList-guess, dim=1)
        for i in range(len(dist_li)):
            if dist_li[i] == 0:
                dist_li[i] = 1
        temp1 = torch.sum(torch.stack([w/d for w, d in zip(wList, dist_li)]), dim=0)
        temp2 = torch.sum(1/dist_li)
        guess_next = temp1 / temp2
        guess_movement = torch.norm(guess - guess_next)
        guess = guess_next
        if guess_movement <= tol:
            break
    return guess

In [None]:
def Krum_(nodeSize, byzantineSize):
    honestSize = nodeSize - byzantineSize
    dist = torch.zeros(nodeSize, nodeSize, dtype=torch.float32)
    def Krum(wList):
        for i in range(nodeSize):
            for j in range(i, nodeSize):
                distance = wList[i].data - wList[j].data
                distance = (distance*distance).sum()
                distance = -distance # 两处都是取距离的最小值，需要改成负数
                dist[i][j] = distance.data
                dist[j][i] = distance.data
        k = nodeSize - byzantineSize - 2 + 1 # 算上自己和自己的0.00
        topv, _ = dist.topk(k=k, dim=1)
        sumdist = topv.sum(dim=1)
        resindex = sumdist.topk(1)[1].squeeze()
        return wList[resindex]
    return Krum

In [None]:
def median(wList):
    return wList.median(dim=0)[0]

# 优化算法

## Central SAGA

In [None]:
def CentralSAGA(w0, dataset, gamma, weight_decay, epoch=1, **kw):

    # 初始化
    w = w0.clone().detach()
    
    store = torch.zeros([SET_SIZE, w.size(0)], requires_grad=False, dtype=torch.float64)
    for index in range(SET_SIZE):
        x, y = dataset[index]
        predict = LogisticRegression(w, x)

        err = (predict-y).data
        store[index][:-1] = err*x
        store[index][-1] = err
        store[index].add_(weight_decay, w)

    G_avg = torch.mean(store, dim=0)
    path = [F(w, dataset, weight_decay)]
    
    # 中间变量分配空间
    new_G = torch.zeros(w0.size(), dtype=torch.float64)
    for e in range(epoch):
        for _ in range(SET_SIZE):
            # 更新梯度表
            index = random.randint(0, SET_SIZE-1)

            x, y = dataset[index]
            predict = LogisticRegression(w, x)
            
            # 计算梯度
            old_G = store[index]
            err = (predict-y).data
            new_G[:-1] = err*x
            new_G[-1] = err
            new_G.add_(weight_decay, w)
            
            gradient = new_G.data - old_G.data + G_avg.data
            
            G_avg.add_(1 / SET_SIZE, new_G.data - old_G.data)
            store[index] = new_G.data
            w.data.add_(-gamma, gradient.data)
        loss = F(w, dataset, weight_decay)
        path.append(loss)
        log('[SAGA]已迭代{:.0f}/{:.0f}趟, loss={}'.format(e+1, epoch, loss))
    return w, path

In [None]:
def SAGA_min(w0, dataset, gamma, weight_decay, epoch=1, **kw):

    # 初始化
    w = w0.clone().detach()
    
    store = torch.zeros([SET_SIZE, w.size(0)], requires_grad=False, dtype=torch.float64)
    for index in range(SET_SIZE):
        x, y = dataset[index]
        predict = LogisticRegression(w, x)

        err = -(y-predict).data
        store[index][:-1] = err*x
        store[index][-1] = err
        store[index].add_(weight_decay, w)

    G_avg = torch.mean(store, dim=0)
    
    # 中间变量分配空间
    new_G = torch.zeros(w0.size(), dtype=torch.float64)
    for e in range(epoch):
        for _ in range(SET_SIZE):
            # 更新梯度表
            index = random.randint(0, SET_SIZE-1)

            x, y = dataset[index]
            predict = LogisticRegression(w, x)
            
            # 计算梯度
            old_G = store[index]
            err = -(y-predict).data
            new_G[:-1] = err*x
            new_G[-1] = err
            new_G.add_(weight_decay, w)
            
            gradient = new_G.data - old_G.data + G_avg.data
            
            G_avg.add_(1 / SET_SIZE, new_G.data - old_G.data)
            store[index] = new_G.data
            w.data.add_(-gamma, gradient.data)
        log('[SAGA]已迭代{:.0f}/{:.0f}趟'.format(e+1, epoch))
    
    return w

## SGD

In [None]:
def SGD(w0, gamma, aggregate, weight_decay, honestSize=0, byzantineSize=0, attack=None,
            rounds=10, displayInterval=1000, SEED=100, fixSeed=False, **kw):
    assert byzantineSize == 0 or attack != None
    assert honestSize != 0
    
    if fixSeed:
        random.seed(SEED)

    nodeSize = honestSize + byzantineSize
    
    # 初始化
    w = w0.clone().detach()

    # 数据分片
    pieces = [(i*len(dataset)) // honestSize for i in range(honestSize+1)]
    dataPerNode = [pieces[i+1] - pieces[i] for i in range(honestSize)]

    path = [F(w, dataset, weight_decay)]
    variencePath = []
    log('[SGD]初始 loss={:.6f}, accuracy={:.2f} gamma={:}'.format(path[0], accuracy(w, dataset), gamma))
    
    # 中间变量分配空间
    new_G = torch.zeros_like(w0, dtype=torch.float64)
    message = torch.zeros(nodeSize, len(w0), dtype=torch.float64)

    log('开始迭代')
    for r in range(rounds):
        for k in range(displayInterval):
            # 诚实节点更新
            for node in range(honestSize):
                index = random.randint(pieces[node], pieces[node+1]-1)

                x, y = dataset[index]
                # 更新梯度表
                predict = LogisticRegression(w, x)
                err = (predict-y).data
                new_G[:-1] = err*x
                new_G[-1] = err
                new_G.add_(weight_decay, w)
                
                gradient = new_G
                
                message[node].copy_(gradient.data)

            # 同步
            # Byzantine攻击
            if attack != None:
                attack(message, byzantineSize)
            g = aggregate(message)
            w.add_(-gamma, g.data)
            
        loss = F(w, dataset, weight_decay)
        acc = accuracy(w, dataset)
        path.append(loss)
        var = getVarience(message, honestSize)
        variencePath.append(var)
        log('[SGD]已迭代 {}/{} rounds (interval: {:.0f}), loss={:.9f}, accuracy={:.2f}, var={:.9f}'.format(
            r+1, rounds, displayInterval, loss, acc, var
        ))
    return w, path, variencePath


## BatchSGD

In [None]:
def BatchSGD(w0, gamma, aggregate, weight_decay, honestSize=0, byzantineSize=0, attack=None, batchSize=50,
            rounds=10, displayInterval=1000, SEED=100, fixSeed=False, **kw):
    assert byzantineSize == 0 or attack != None
    assert honestSize != 0
    
    if fixSeed:
        random.seed(SEED)

    nodeSize = honestSize + byzantineSize
    
    # 初始化
    w = w0.clone().detach()

    # 数据分片
    pieces = [(i*len(dataset)) // honestSize for i in range(honestSize+1)]
    dataPerNode = [pieces[i+1] - pieces[i] for i in range(honestSize)]

    path = [F(w, dataset, weight_decay)]
    variencePath = []
    log('[BatchSGD]初始 loss={:.6f}, accuracy={:.2f} gamma={:}'.format(path[0], accuracy(w, dataset), gamma))
    
    # 中间变量分配空间
    new_G = torch.zeros_like(w0, dtype=torch.float64)
    message = torch.zeros(nodeSize, len(w0), dtype=torch.float64)

    log('开始迭代')
    for r in range(rounds):
        for k in range(displayInterval):
            # 诚实节点更新
            for node in range(honestSize):
                gradient = torch.zeros_like(new_G)
                for b in range(batchSize):
                    index = random.randint(pieces[node], pieces[node+1]-1)

                    x, y = dataset[index]
                    # 更新梯度表
                    predict = LogisticRegression(w, x)
                    err = (predict-y).data
                    new_G[:-1] = err*x
                    new_G[-1] = err
                    new_G.add_(weight_decay, w)
                    gradient.add_(1/batchSize, new_G)
                message[node].copy_(gradient.data)

            # 同步
            # Byzantine攻击
            if attack != None:
                attack(message, byzantineSize)
            g = aggregate(message)
            w.add_(-gamma, g.data)
            
        loss = F(w, dataset, weight_decay)
        acc = accuracy(w, dataset)
        path.append(loss)
        var = getVarience(message, honestSize)
        variencePath.append(var)
        log('[BatchSGD]已迭代 {}/{} rounds (interval: {:.0f}), loss={:.9f}, accuracy={:.2f}, var={:.9f}'.format(
            r+1, rounds, displayInterval, loss, acc, var
        ))
    return w, path, variencePath


## SAGA

In [None]:
def SAGA(w0, gamma, aggregate, weight_decay, honestSize=0, byzantineSize=0, attack=None, 
            rounds=10, displayInterval=1000, SEED=100, fixSeed=False, **kw):
    assert byzantineSize == 0 or attack != None
    assert honestSize != 0
    
    if fixSeed:
        random.seed(SEED)

    nodeSize = honestSize + byzantineSize
    
    # 初始化
    w = w0.clone().detach()

    store = torch.zeros([len(dataset), w.size(0)], requires_grad=False, dtype=torch.float64)
    for index in range(len(dataset)):
        x, y = dataset[index]
        predict = LogisticRegression(w, x)

        err = (predict-y).data
        store[index][:-1] = err*x
        store[index][-1] = err
        store[index].add_(weight_decay, w)

    # 数据分片
    pieces = [(i*len(dataset)) // honestSize for i in range(honestSize+1)]
    dataPerNode = [pieces[i+1] - pieces[i] for i in range(honestSize)]

    G_avg = torch.stack([
        store[pieces[i]:pieces[i+1]].mean(dim=0) for i in range(honestSize)
    ])
    path = [F(w, dataset, weight_decay)]
    variencePath = []
    log('[SAGA]初始 loss={:.6f}, accuracy={:.2f} gamma={:}'.format(path[0], accuracy(w, dataset), gamma))
    
    # 中间变量分配空间
    new_G = torch.zeros_like(w0, dtype=torch.float64)
    message = torch.zeros(nodeSize, len(w0), dtype=torch.float64)

    log('开始迭代')
    for r in range(rounds):
        for k in range(displayInterval):
            # 诚实节点更新
            for node in range(honestSize):
                index = random.randint(pieces[node], pieces[node+1]-1)

                x, y = dataset[index]
                # 更新梯度表
                predict = LogisticRegression(w, x)

                old_G = store[index]
                err = (predict-y).data
                new_G[:-1] = err*x
                new_G[-1] = err
                new_G.add_(weight_decay, w)

                gradient = new_G.data - old_G.data + G_avg[node].data

                G_avg[node].add_(1 / dataPerNode[node],
                                 new_G.data - old_G.data)
                store[index] = new_G.data

                message[node].copy_(gradient.data)

            # 同步
            # Byzantine攻击
            if attack != None:
                attack(message, byzantineSize)
            g = aggregate(message)
            w.add_(-gamma, g.data)
            
        loss = F(w, dataset, weight_decay)
        acc = accuracy(w, dataset)
        path.append(loss)
        var = getVarience(message, honestSize)
        variencePath.append(var)
        log('[SAGA]已迭代 {}/{} rounds (interval: {:.0f}), loss={:.9f}, accuracy={:.2f}, var={:.9f}'.format(
            r+1, rounds, displayInterval, loss, acc, var
        ))
    return w, path, variencePath

## SVRG

In [None]:
def SVRG(w0, gamma, aggregate, weight_decay, honestSize=0, byzantineSize=0, attack=None, 
            snapshotInterval=6000, rounds=10, displayInterval=1000, SEED=100, fixSeed=False, **kw):
    assert byzantineSize == 0 or attack != None
    assert honestSize != 0
    
    if fixSeed:
        random.seed(SEED)

    nodeSize = honestSize + byzantineSize
    
    # 初始化
    w = w0.clone().detach()

    # 数据分片
    pieces = [(i*len(dataset)) // honestSize for i in range(honestSize+1)]
    dataPerNode = [pieces[i+1] - pieces[i] for i in range(honestSize)]

    snapshot_g = torch.zeros(honestSize, len(w0), dtype=torch.float64)
    snapshot_w = torch.zeros(len(w0), dtype=torch.float64)

    path = [F(w, dataset, weight_decay)]
    variencePath = []
    log('[SVRG]初始 loss={:.6f}, accuracy={:.2f} gamma={:}'.format(path[0], accuracy(w, dataset), gamma))
    
    # 中间变量分配空间
    message = torch.zeros(nodeSize, len(w0), dtype=torch.float64)

    log('开始迭代')
    for r in range(rounds):
        for k in range(displayInterval):
            # snapshot
            if (r*displayInterval + k) % snapshotInterval == 0:
                snapshot_g.zero_()
                for node in range(honestSize):
                    for index in range(pieces[node], pieces[node+1]):
                        x, y = dataset[index]
                        # 更新梯度表
                        predict = LogisticRegression(w, x)

                        err = (predict-y).data
                        snapshot_g[node][:-1].add_(1/dataPerNode[node], err*x)
                        snapshot_g[node][-1].add_(1/dataPerNode[node], err)
                    snapshot_g[node].add_(weight_decay, w)
                snapshot_w.copy_(w)
            
            # 诚实节点更新
            message.zero_()
            for node in range(honestSize):
                index = random.randint(pieces[node], pieces[node+1]-1)

                x, y = dataset[index]
                # 随机梯度
                predict = LogisticRegression(w, x)
                err = (predict-y).data
                message[node][:-1].add_(err, x)
                message[node][-1].add_(err, 1)
                message[node].add_(weight_decay, w)
                
                # 修正梯度
                predict = LogisticRegression(snapshot_w, x)
                err = (predict-y).data
                message[node][:-1].add_(-err, x)
                message[node][-1].add_(-err, 1)
                message[node].add_(-weight_decay, snapshot_w)
                
                message[node].add_(1, snapshot_g[node])
                
            # 同步
            # Byzantine攻击
            if attack != None:
                attack(message, byzantineSize)
            g = aggregate(message)
            w.add_(-gamma, g)
            
        loss = F(w, dataset, weight_decay)
        acc = accuracy(w, dataset)
        path.append(loss)
        var = getVarience(message, honestSize)
        variencePath.append(var)
        log('[SVRG]已迭代 {}/{} rounds (interval: {:.0f}), loss={:.9f}, accuracy={:.2f}, var={:.9f}'.format(
            r+1, rounds, displayInterval, loss, acc, var
        ))
    return w, path, variencePath

## SARAH

In [None]:
def SARAH(w0, gamma, aggregate, weight_decay, honestSize=0, byzantineSize=0, attack=None, 
            snapshotInterval=6000, rounds=10, displayInterval=1000, SEED=100, fixSeed=False, **kw):
    assert byzantineSize == 0 or attack != None
    assert honestSize != 0
    
    if fixSeed:
        random.seed(SEED)

    nodeSize = honestSize + byzantineSize
    
    # 初始化
    w = w0.clone().detach()

    # 数据分片
    pieces = [(i*len(dataset)) // honestSize for i in range(honestSize+1)]
    dataPerNode = [pieces[i+1] - pieces[i] for i in range(honestSize)]

    lastGradients = torch.zeros_like(w0, dtype=torch.float64)

    path = [F(w, dataset, weight_decay)]
    variencePath = []
    log('[SARAH]初始 loss={:.6f}, accuracy={:.2f} gamma={:}'.format(path[0], accuracy(w, dataset), gamma))
    
    # 中间变量分配空间
    message = torch.zeros(nodeSize, len(w0), dtype=torch.float64)
    newG = torch.zeros_like(w0, dtype=torch.float64)
    lastw = torch.zeros_like(w0, dtype=torch.float64)
    
    # 随机的停止期限
    randomStop = 1

    log('开始迭代')
    for r in range(rounds):
        for k in range(displayInterval):
            # snapshot
            if (r*displayInterval + k) % randomStop == 0:
                message.zero_()
                for node in range(honestSize):
                    for index in range(pieces[node], pieces[node+1]):
                        x, y = dataset[index]
                        predict = LogisticRegression(w, x)

                        err = (predict-y).data
                        message[node][:-1].add_(1/dataPerNode[node], err*x)
                        message[node][-1].add_(1/dataPerNode[node], err)
                    message[node].add_(weight_decay, w)
                
                # 首次更新
                if attack != None:
                    attack(message, byzantineSize)
                g = aggregate(message)
                lastw.copy_(w)
                w.add_(-gamma, g)
                # 指定下一次停止时间
                randomStop = random.randint(1, snapshotInterval-1)
            
            # 诚实节点更新
            for node in range(honestSize):
                index = random.randint(pieces[node], pieces[node+1]-1)

                x, y = dataset[index]
                # 随机梯度
                predict = LogisticRegression(w, x)
                err = (predict-y).data
                message[node][:-1].add_(err, x)
                message[node][-1].add_(err, 1)
                message[node].add_(weight_decay, w)
                
                # 修正梯度
                predict = LogisticRegression(lastw, x)
                err = (predict-y).data
                message[node][:-1].add_(-err, x)
                message[node][-1].add_(-err, 1)
                message[node].add_(-weight_decay, lastw)

            # 保存旧结果
            lastw.copy_(w)
            # 同步, Byzantine攻击
            if attack != None:
                attack(message, byzantineSize)
            g = aggregate(message)
            w.add_(-gamma, g)
            
        loss = F(w, dataset, weight_decay)
        acc = accuracy(w, dataset)
        path.append(loss)
        var = getVarience(message, honestSize)
        variencePath.append(var)
        log('[SARAH]已迭代 {}/{} rounds (interval: {:.0f}), loss={:.9f}, accuracy={:.2f}, var={:.9f}'.format(
            r+1, rounds, displayInterval, loss, acc, var
        ))
    return w, path, variencePath

# 恶意攻击

In [None]:
def white(messages, byzantinesize):
    # 均值相同，方差较大
    mu = torch.mean(messages[0:-byzantinesize], dim=0)
    messages[-byzantinesize:].copy_(mu)
    noise = torch.randn((byzantinesize, messages.size(1)), dtype=torch.float64)
    messages[-byzantinesize:].add_(30, noise)
def maxValue(messages, byzantinesize):
    mu = torch.mean(messages[0:-byzantinesize], dim=0)
    meliciousMessage = -3*mu
    messages[-byzantinesize:].copy_(meliciousMessage)
def zeroGradient(messages, byzantinesize):
    s = torch.sum(messages[0:-byzantinesize], dim=0)
    messages[-byzantinesize:].copy_(-s / byzantinesize)

# 驱动函数

In [None]:
def run(optimizer, aggregate, attack, config):
    
    if attack == None:
        title = '{}_{}_{}'.format(optimizer.__name__, 'baseline', aggregate.__name__)
    else:
        title = '{}_{}_{}'.format(optimizer.__name__, attack.__name__, aggregate.__name__)
    print(dataSetConfig['name'] + '_' + title)
    print('Fmin={}'.format(Fmin))

    _VRConfig = config.copy()
    _VRConfig['aggregate'] = aggregate
    _VRConfig['attack'] = attack
    if attack == None:
        _VRConfig['byzantineSize'] = 0
    w, path, variancePath = optimizer(w0, **_VRConfig)

    record = {
        **dataSetConfig,
        'gamma': _VRConfig['gamma'],
        'path': path,
        'variancePath': variancePath,
    }

    with open(CACHE_DIR + title, 'wb') as f:
        pickle.dump(record, f)

    axis = plt.axes()
    plt.plot(list(range(len(path))), logAxis(path, Fmin))
    axis.set_yscale('log')

# 运行实验

## 正确性测试

出现函数到达最小值后，重新回弹的现象，原因可能有
1. 目标函数写错：忘记加惩罚项，忘记除以二等
2. 触及机器精度边界

## 计算最小值

从零开始跑

In [None]:
_VRConfig = VRConfig.copy()
_VRConfig['epoch'] = dataSetConfig['epoch'] * HONEST_SIZE * 2
w_min = SAGA_min(w0, dataset, **_VRConfig)
Fmin = F(w_min, dataset, _VRConfig['weight_decay'])
print(Fmin)

精度不够继续跑

In [None]:
_VRConfig = VRConfig.copy()
# _VRConfig['epoch'] = dataSetConfig['epoch'] * HONEST_SIZE
_VRConfig['epoch'] = 20
w_min = SAGA_min(w_min, dataset, **_VRConfig)
Fmin = F(w_min, dataset, _VRConfig['weight_decay'])
print(Fmin)

存储Fmin

In [None]:
# with open(CACHE_DIR + 'Fmin', 'wb') as f:
#     pickle.dump({
#         'Fmin': Fmin,
#         'w_min': w_min
#     }, f)

读取Fmin

In [None]:
with open(CACHE_DIR + 'Fmin', 'rb') as f:
    obj = pickle.load(f)
    Fmin, w_min = obj['Fmin'], obj['w_min']

## SGD

### SGD - mean

In [None]:
run(optimizer = SGD, aggregate = mean, attack = None, config = SGDConfig)

white

In [None]:
run(optimizer = SGD, aggregate = mean, attack = white, config = SGDConfig)

max

In [None]:
run(optimizer = SGD, aggregate = mean, attack = maxValue, config = SGDConfig)

zero Gradient

In [None]:
run(optimizer = SGD, aggregate = mean, attack = zeroGradient, config = SGDConfig)

### SGD - geomtric median

In [None]:
run(optimizer = SGD, aggregate = gm, attack = None, config = SGDConfig)

white

In [None]:
run(optimizer = SGD, aggregate = gm, attack = white, config = SGDConfig)

max

In [None]:
run(optimizer = SGD, aggregate = gm, attack = maxValue, config = SGDConfig)

zero Gradient

In [None]:
run(optimizer = SGD, aggregate = gm, attack = zeroGradient, config = SGDConfig)

### SGD - Krum

baseline

In [None]:
Krum = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=0)
run(optimizer = SGD, aggregate = Krum, attack = None, config = SGDConfig)

white

In [None]:
Krum = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SGD, aggregate = Krum, attack = white, config = SGDConfig)

max

In [None]:
Krum = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SGD, aggregate = Krum, attack = maxValue, config = SGDConfig)

zero Gradient

In [None]:
Krum = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SGD, aggregate = Krum, attack = zeroGradient, config = SGDConfig)

### SGD - Median

In [None]:
run(optimizer = SGD, aggregate = median, attack = None, config = SGDConfig)

white

In [None]:
run(optimizer = SGD, aggregate = median, attack = white, config = SGDConfig)

max

In [None]:
run(optimizer = SGD, aggregate = median, attack = maxValue, config = SGDConfig)

zero Gradient

In [None]:
run(optimizer = SGD, aggregate = median, attack = zeroGradient, config = SGDConfig)

## BatchSGD

### BatchSGD - mean

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = batchConfig.copy()
_VRConfig['aggregate'] = aggregate_linear
_VRConfig['attack'] = None
_VRConfig['byzantineSize'] = 0

w, path, variancePath = FedBatchSGD(w0, **_VRConfig)

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'batchSize': _VRConfig['batchSize'],
    'path': path,
    'variancePath': variancePath,
}

with open(CACHE_DIR + 'BatchSGD_baseline_mean', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

white

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = batchConfig.copy()
_VRConfig['aggregate'] = aggregate_linear
_VRConfig['attack'] = whiteNoise
w, path, variancePath = FedBatchSGD(w0, **_VRConfig)

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'batchSize': _VRConfig['batchSize'],
    'path': path,
    'variancePath': variancePath,
}

with open(CACHE_DIR + 'BatchSGD_white_mean', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

max

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = batchConfig.copy()
_VRConfig['aggregate'] = aggregate_linear
_VRConfig['attack'] = maxValue
w, path, variancePath = FedBatchSGD(w0, **_VRConfig)

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'batchSize': _VRConfig['batchSize'],
    'path': path,
    'variancePath': variancePath,
}

with open(CACHE_DIR + 'BatchSGD_maxValue_mean', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

zero Gradient

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = batchConfig.copy()
_VRConfig['aggregate'] = aggregate_linear
_VRConfig['attack'] = zeroGradient
w, path, variancePath = FedBatchSGD(w0, **_VRConfig)

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'batchSize': _VRConfig['batchSize'],
    'path': path,
    'variancePath': variancePath,
}

with open(CACHE_DIR + 'BatchSGD_zeroGradient_mean', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

### BatchSGD - geomtric median

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = batchConfig.copy()
_VRConfig['aggregate'] = aggregate_geometric
_VRConfig['attack'] = None
_VRConfig['byzantineSize'] = 0
w, path, variancePath = FedBatchSGD(w0, **_VRConfig)

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'batchSize': _VRConfig['batchSize'],
    'path': path,
    'variancePath': variancePath,
}

with open(CACHE_DIR + 'BatchSGD_baseline_gm', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

white

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = batchConfig.copy()
_VRConfig['aggregate'] = aggregate_geometric
_VRConfig['attack'] = whiteNoise
w, path, variancePath = FedBatchSGD(w0, **_VRConfig)

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'batchSize': _VRConfig['batchSize'],
    'path': path,
    'variancePath': variancePath,
}

with open(CACHE_DIR + 'BatchSGD_white_gm', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

max

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = batchConfig.copy()
_VRConfig['aggregate'] = aggregate_geometric
_VRConfig['attack'] = maxValue
w, path, variancePath = FedBatchSGD(w0, **_VRConfig)

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'batchSize': _VRConfig['batchSize'],
    'path': path,
    'variancePath': variancePath,
}

with open(CACHE_DIR + 'BatchSGD_maxValue_gm', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

zero Gradient

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = batchConfig.copy()
_VRConfig['aggregate'] = aggregate_geometric
_VRConfig['attack'] = zeroGradient
w, path, variancePath = FedBatchSGD(w0, **_VRConfig)

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'batchSize': _VRConfig['batchSize'],
    'path': path,
    'variancePath': variancePath,
}

with open(CACHE_DIR + 'BatchSGD_zeroGradient_gm', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

## SAGA

### SAGA - mean

In [None]:
run(optimizer = SAGA, aggregate = mean, attack = None, config = SAGAConfig)

white

In [None]:
run(optimizer = SAGA, aggregate = mean, attack = white, config = SAGAConfig)

max

In [None]:
run(optimizer = SAGA, aggregate = mean, attack = maxValue, config = SAGAConfig)

zero Gradient

In [None]:
run(optimizer = SAGA, aggregate = mean, attack = zeroGradient, config = SAGAConfig)

### SAGA - geomtric median

baseline

In [None]:
run(optimizer = SAGA, aggregate = gm, attack = None, config = SAGAConfig)

white

In [None]:
run(optimizer = SAGA, aggregate = gm, attack = white, config = SAGAConfig)

max

In [None]:
run(optimizer = SAGA, aggregate = gm, attack = maxValue, config = SAGAConfig)

zero Gradient

In [None]:
run(optimizer = SAGA, aggregate = gm, attack = zeroGradient, config = SAGAConfig)

### SAGA - Krum

baseline

In [None]:
Krum = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=0)
run(optimizer = SAGA, aggregate = Krum, attack = None, config = SAGAConfig)

white

In [None]:
Krum = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SAGA, aggregate = Krum, attack = white, config = SAGAConfig)

max

In [None]:
Krum = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SAGA, aggregate = Krum, attack = maxValue, config = SAGAConfig)

zero Gradient

In [None]:
Krum = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SAGA, aggregate = Krum, attack = zeroGradient, config = SAGAConfig)

### SAGA - Median

In [None]:
run(optimizer = SAGA, aggregate = median, attack = None, config = SAGAConfig)

white

In [None]:
run(optimizer = SAGA, aggregate = median, attack = white, config = SAGAConfig)

max

In [None]:
run(optimizer = SAGA, aggregate = median, attack = maxValue, config = SAGAConfig)

zero Gradient

In [None]:
run(optimizer = SAGA, aggregate = median, attack = zeroGradient, config = SAGAConfig)

## SVRG

### SVRG - mean

In [None]:
run(optimizer = SVRG, aggregate = mean, attack = None, config = SVRGConfig)

In [None]:
config = SVRGConfig.copy()
config['displayInterval'] = dataSetConfig['dataSetSize']
config['snapshotInterval'] = dataSetConfig['dataSetSize']
run(optimizer = SVRG, aggregate = mean, attack = None, config = config)

上图拉长一点

In [None]:
config = SVRGConfig.copy()
config['displayInterval'] = int(dataSetConfig['dataSetSize']/4)
config['snapshotInterval'] = dataSetConfig['dataSetSize']
run(optimizer = SVRG, aggregate = mean, attack = None, config = config)

中心式 收敛效果应该很好

In [None]:
config = SVRGConfig.copy()
# config['displayInterval'] = dataSetConfig['dataSetSize']
# config['snapshotInterval'] = dataSetConfig['dataSetSize']
config['honestSize'] = 1
run(optimizer = SVRG, aggregate = mean, attack = None, config = config)

In [None]:
config = SVRGConfig.copy()
# config['displayInterval'] = dataSetConfig['dataSetSize']
# config['snapshotInterval'] = dataSetConfig['dataSetSize']
config['honestSize'] = 1
run(optimizer = SVRG, aggregate = mean, attack = None, config = config)

为什么中心式的收敛效果还算那么差，调大步长也还是一样

In [None]:
config = SVRGConfig.copy()
# config['displayInterval'] = dataSetConfig['dataSetSize']
# config['snapshotInterval'] = dataSetConfig['dataSetSize']
config['honestSize'] = 1
config['gamma'] = 5e-2
run(optimizer = SVRG, aggregate = mean, attack = None, config = config)

white

In [None]:
run(optimizer = SVRG, aggregate = mean, attack = whiteNoise, config = SVRGConfig)

max

In [None]:
run(optimizer = SVRG, aggregate = mean, attack = maxValue, config = SVRGConfig)

zero Gradient

In [None]:
run(optimizer = SVRG, aggregate = mean, attack = zeroGradient, config = SVRGConfig)

### SVRG - geomtric median

In [None]:
run(optimizer = SVRG, aggregate = gm, attack = None, config = SVRGConfig)

white

In [None]:
run(optimizer = SVRG, aggregate = gm, attack = white, config = SVRGConfig)

max

In [None]:
run(optimizer = SVRG, aggregate = gm, attack = maxValue, config = SVRGConfig)

zero Gradient

In [None]:
run(optimizer = SVRG, aggregate = gm, attack = zeroGradient, config = SVRGConfig)

### SVRG - Krum

In [None]:
aggregate = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=0)
run(optimizer = SVRG, aggregate = aggregate, attack = None, config = SVRGConfig)

white

In [None]:
aggregate = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SVRG, aggregate = aggregate, attack = white, config = SVRGConfig)

max

In [None]:
aggregate = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SVRG, aggregate = aggregate, attack = maxValue, config = SVRGConfig)

zero Gradient

In [None]:
aggregate = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SVRG, aggregate = aggregate, attack = zeroGradient, config = SVRGConfig)

### SVRG - Median

In [None]:
run(optimizer = SVRG, aggregate = median, attack = None, config = SVRGConfig)

white

In [None]:
run(optimizer = SVRG, aggregate = median, attack = white, config = SVRGConfig)

max

In [None]:
run(optimizer = SVRG, aggregate = median, attack = maxValue, config = SVRGConfig)

zero Gradient

In [None]:
run(optimizer = SVRG, aggregate = median, attack = zeroGradient, config = SVRGConfig)

## SARAH

### SARAH - mean

In [None]:
run(optimizer = SARAH, aggregate = mean, attack = None, config = SARAHConfig)

white

In [None]:
run(optimizer = SARAH, aggregate = mean, attack = white, config = SARAHConfig)

max

In [None]:
run(optimizer = SARAH, aggregate = mean, attack = maxValue, config = SARAHConfig)

zero Gradient

In [None]:
run(optimizer = SARAH, aggregate = mean, attack = zeroGradient, config = SARAHConfig)

### SARAH - geomtric median

In [None]:
run(optimizer = SARAH, aggregate = gm, attack = None, config = SARAHConfig)

white

In [None]:
run(optimizer = SARAH, aggregate = gm, attack = white, config = SARAHConfig)

max

In [None]:
run(optimizer = SARAH, aggregate = gm, attack = maxValue, config = SARAHConfig)

zero Gradient

In [None]:
run(optimizer = SARAH, aggregate = gm, attack = zeroGradient, config = SARAHConfig)

### SARAH - Krum

In [None]:
aggregate = Krum_(nodeSize=VRConfig['honestNodeSize'], byzantineSize=0)
run(optimizer = SARAH, aggregate = aggregate, attack = None, config = SARAHConfig)

white

In [None]:
aggregate = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SARAH, aggregate = aggregate, attack = white, config = SARAHConfig)

max

In [None]:
aggregate = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SARAH, aggregate = aggregate, attack = maxValue, config = SARAHConfig)

zero Gradient

In [None]:
aggregate = Krum_(nodeSize=VRConfig['honestSize'], byzantineSize=VRConfig['byzantineSize'])
run(optimizer = SARAH, aggregate = aggregate, attack = zeroGradient, config = SARAHConfig)

### SARAH - Median

In [None]:
run(optimizer = SARAH, aggregate = median, attack = None, config = SARAHConfig)

white

In [None]:
run(optimizer = SARAH, aggregate = median, attack = white, config = SARAHConfig)

max

In [None]:
run(optimizer = SARAH, aggregate = median, attack = maxValue, config = SARAHConfig)

zero Gradient

In [None]:
run(optimizer = SARAH, aggregate = median, attack = zeroGradient, config = SARAHConfig)

## 比较经过VR前后方差大小

In [None]:
def FedSAGACmpVariance(w0, gamma, weight_decay, honestSize=0, byzantineSize=0, attack=None, 
            rounds=10, displayInterval=1000, SEED=100, fixSeed=False, **kw):
    assert byzantineSize == 0 or attack != None
    assert honestSize != 0
    
    if fixSeed:
        random.seed(SEED)

    nodeSize = honestSize + byzantineSize
    
    # 初始化
    w = w0.clone().detach()

    store = torch.zeros([len(dataset), w.size(0)], requires_grad=False, dtype=torch.float64)
    for index in range(len(dataset)):
        x, y = dataset[index]
        predict = LogisticRegression(w, x)

        err = (predict-y).data
        store[index][:-1] = err*x
        store[index][-1] = err
        store[index].add_(weight_decay, w)

    # 数据分片
    pieces = [(i*len(dataset)) // honestSize for i in range(honestSize+1)]
    dataPerNode = [pieces[i+1] - pieces[i] for i in range(honestSize)]

    G_avg = torch.stack([
        store[pieces[i]:pieces[i+1]].mean(dim=0) for i in range(honestSize)
    ])
    
    # 返回函数值，SAGA和SGD对应的bias, variance, GM_error
    path = [F(w, dataset, weight_decay)]
    SAGA_biasPath = []
    SAGA_variencePath = []
    SAGA_error_Path = []
    SGD_biasPath = []
    SGD_variencePath = []
    SGD_error_Path = []
    
    log('[SAGA]初始 loss={:.6f}, accuracy={:.2f} gamma={:}'.format(path[0], accuracy(w, dataset), gamma))
    
    # 中间变量分配空间
    new_G = torch.zeros_like(w0, dtype=torch.float64)
    message = torch.stack([w0] * nodeSize)
    message_SGD = torch.stack([w0] * nodeSize)

    log('开始迭代')
    for r in range(rounds):
        for k in range(displayInterval):
            # 诚实节点更新
            for node in range(honestSize):
                index = random.randint(pieces[node], pieces[node+1]-1)

                x, y = dataset[index]
                # 更新梯度表
                predict = LogisticRegression(w, x)

                old_G = store[index]
                err = (predict-y).data
                new_G[:-1] = err*x
                new_G[-1] = err
                new_G.add_(weight_decay, w)

                gradient = new_G.data - old_G.data + G_avg[node].data

                G_avg[node].add_(1 / dataPerNode[node],
                                 new_G.data - old_G.data)
                store[index] = new_G.data
                
                message[node].copy_(gradient)
                
                if k + 1 == displayInterval:
                    message_SGD[node].copy_(new_G)

            # 同步
            # Byzantine攻击
            if attack != None:
                attack(message, byzantineSize)
            g = aggregate_geometric(message)
            w.add_(-gamma, g.data)
            
        loss = F(w, dataset, weight_decay)
        acc = accuracy(w, dataset)
        
        fullGradient = G(w, dataset, weight_decay)

        # bias
        SAGA_bias = message[:honestSize].mean(dim=0) - fullGradient
        SAGA_biasPath.append((SAGA_bias.norm()).item())
        SGD_bias = message_SGD[:honestSize].mean(dim=0) - fullGradient
        SGD_biasPath.append((SGD_bias.norm()).item())

        # varience
        SAGA_var = getVarience(message, honestSize)
        SAGA_variencePath.append(SAGA_var)
        SGD_var = getVarience(message_SGD, honestSize)
        SGD_variencePath.append(SGD_var)
        # 函数值
        path.append(loss)

        # 记录error
        SGD_gm_bias = aggregate_geometric(message_SGD) - fullGradient
        SGD_error_Path.append(SGD_gm_bias.norm().item())
        SAGA_gm_bias = aggregate_geometric(message) - fullGradient
        SAGA_error_Path.append(SAGA_gm_bias.norm().item())
        
        log('[SAGA]已迭代 {}/{} rounds (interval: {:.0f}), loss={:.9f}, accuracy={:.2f}'.format(
            r+1, rounds, displayInterval, loss, acc
        ))
        
        print('[SGD] bias:{:.5f} var:{:.5f} err:{:.5f}'.format(SGD_biasPath[-1], SGD_variencePath[-1], SGD_error_Path[-1]))
        print('[SAGA] bias:{:.5f} var:{:.5f} err:{:.5f}'.format(SAGA_biasPath[-1], SAGA_variencePath[-1], SAGA_error_Path[-1]))
        
    return w, path, SAGA_biasPath, SAGA_variencePath, SAGA_error_Path, SGD_biasPath, SGD_variencePath, SGD_error_Path

baseline

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = VRConfig.copy()
_VRConfig['attack'] = None
_VRConfig['byzantineSize'] = 0
res = FedSAGACmpVariance(w0, **_VRConfig)
w, path, SAGA_biasPath, SAGA_variencePath, SAGA_error_Path, SGD_biasPath, SGD_variencePath, SGD_error_Path = res

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'path': path,
    'SGD_biasPath': SGD_biasPath,
    'SGD_variencePath': SGD_variencePath,
    'SGD_error_Path': SGD_error_Path,
    'SAGA_biasPath': SAGA_biasPath,
    'SAGA_variencePath': SAGA_variencePath,
    'SAGA_error_Path': SAGA_error_Path,
}


with open(CACHE_DIR + 'SGD_SAGA_cmpVar_baseline', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

white

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = VRConfig.copy()
_VRConfig['attack'] = whiteNoise
res = FedSAGACmpVariance(w0, **_VRConfig)
w, path, SAGA_biasPath, SAGA_variencePath, SAGA_error_Path, SGD_biasPath, SGD_variencePath, SGD_error_Path = res

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'path': path,
    'SGD_biasPath': SGD_biasPath,
    'SGD_variencePath': SGD_variencePath,
    'SGD_error_Path': SGD_error_Path,
    'SAGA_biasPath': SAGA_biasPath,
    'SAGA_variencePath': SAGA_variencePath,
    'SAGA_error_Path': SAGA_error_Path,
}


with open(CACHE_DIR + 'SGD_SAGA_cmpVar_white', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

maxValue

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = VRConfig.copy()
_VRConfig['attack'] = maxValue
res = FedSAGACmpVariance(w0, **_VRConfig)
w, path, SAGA_biasPath, SAGA_variencePath, SAGA_error_Path, SGD_biasPath, SGD_variencePath, SGD_error_Path = res

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'path': path,
    'SGD_biasPath': SGD_biasPath,
    'SGD_variencePath': SGD_variencePath,
    'SGD_error_Path': SGD_error_Path,
    'SAGA_biasPath': SAGA_biasPath,
    'SAGA_variencePath': SAGA_variencePath,
    'SAGA_error_Path': SAGA_error_Path,
}


with open(CACHE_DIR + 'SGD_SAGA_cmpVar_maxValue', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')

zeroGradient

In [None]:
log('Fmin={}'.format(Fmin))

_VRConfig = VRConfig.copy()
_VRConfig['attack'] = zeroGradient
res = FedSAGACmpVariance(w0, **_VRConfig)
w, path, SAGA_biasPath, SAGA_variencePath, SAGA_error_Path, SGD_biasPath, SGD_variencePath, SGD_error_Path = res

record = {
    **dataSetConfig,
    'gamma': _VRConfig['gamma'],
    'path': path,
    'SGD_biasPath': SGD_biasPath,
    'SGD_variencePath': SGD_variencePath,
    'SGD_error_Path': SGD_error_Path,
    'SAGA_biasPath': SAGA_biasPath,
    'SAGA_variencePath': SAGA_variencePath,
    'SAGA_error_Path': SAGA_error_Path,
}


with open(CACHE_DIR + 'SGD_SAGA_cmpVar_zeroGradient', 'wb') as f:
    pickle.dump(record, f)
    
axis = plt.axes()
plt.plot(list(range(len(path))), logAxis(path, Fmin))
axis.set_yscale('log')