In [1]:
import sys
sys.path.append("..")
import logging
import random
import neptune
import numpy as np
import syft as sy
from torch import load
from torchvision import transforms
from federated_learning.FLCustomDataset import FLCustomDataset
from federated_learning.FederatedLearning import FederatedLearning
from federated_learning.helper import utils

CONFIG_PATH = '../configs/defaults.yml'


configs = utils.load_config(CONFIG_PATH)
logging.basicConfig(format='%(asctime)s %(message)s', level=configs['log']['level'])
random.seed(configs['runtime']['random_seed'])

# Logging initialization
log_enable = False
output_dir = None
if log_enable:
    output_dir = utils.make_output_dir(
        configs['log']['root_output_dir'], arguments['--output-prefix'])
    utils.save_configs(output_dir, configs)
neptune_enable = False

epochs_num = configs['runtime']['epochs']
rounds_num = configs['runtime']['rounds']

fl = FederatedLearning(
    configs['runtime']['batch_size'], 
    configs['runtime']['test_batch_size'], 
    configs['runtime']['lr'], 
    configs['runtime']['reg'],
    configs['runtime']['momentum'], 
    neptune_enable, log_enable, 
    configs['log']['interval'], 
    output_dir, 
    configs['runtime']['random_seed'])


raw_train_data = utils.preprocess_leaf_data(
    utils.load_leaf_train(configs['data']['FEMNIST_PATH']), only_digits=True
)
raw_test_data = utils.preprocess_leaf_data(
    utils.load_leaf_test(configs['data']['FEMNIST_PATH']), min_num_samples=configs['runtime']['test_batch_size'], only_digits=True
)

# common users in processed test/train dataset
workers_idx_all = sorted(list(set(raw_test_data.keys()).intersection(raw_train_data.keys())))
logging.info("Total of {} workers are in the dataset.".format(len(workers_idx_all)))

workers_idx_to_be_used = utils.get_workers_idx(
    workers_idx_all,
    configs['runtime']['femnist_users_num'],
    []
)
logging.info("Select {} workers to be used from the dataset.".format(len(workers_idx_to_be_used)))


trusted_idx = utils.get_workers_idx(
    workers_idx_to_be_used, configs['runtime']['femnist_trusted_num'], [])
eavesdroppers_idx = utils.get_workers_idx(
    workers_idx_to_be_used, configs['runtime']['femnist_eavesdropper_num'], trusted_idx)
normal_idx = utils.get_workers_idx(
    workers_idx_to_be_used, 
    len(workers_idx_to_be_used) - 
    (int(configs['runtime']['femnist_eavesdropper_num']) + int(configs['runtime']['femnist_trusted_num'])),
    eavesdroppers_idx + trusted_idx)

# logging.info("Trusted [{}]: {}".format(len(trusted_idx), trusted_idx))
# logging.info("Eavesdroppers [{}]: {}".format(len(eavesdroppers_idx), eavesdroppers_idx))
# logging.info("Normal [{}]: {}".format(len(normal_idx), normal_idx))
# if log_enable:
#     utils.write_to_file(output_dir, "all_users", workers_idx_all)
#     utils.write_to_file(output_dir, "eavesdroppers", eavesdroppers_idx)
#     utils.write_to_file(output_dir, "normal", normal_idx)
#     utils.write_to_file(output_dir, "trusted", trusted_idx)

# # fl.create_server()
# # fl.create_server_model()
# # fl.create_workers(workers_idx_to_be_used)
# # fl.create_workers_model(workers_idx_to_be_used)
    
# # # Create test dataloader from all normal and eveasdroppers
# # fed_test_dataloader = fl.create_femnist_server_test_dataloader(
# #     raw_test_data, workers_idx_to_be_used)

# # # W0 model
# # # trained_w0_model = load(configs['runtime']['W0_pure_path'])
# # fed_train_datasets = None
# # if arguments["--no-attack"]:
# #     logging.info("No Attack will be performed.")
# #     fed_train_datasets = fl.create_femnist_train_datasets(raw_train_data, workers_idx_to_be_used)
# # elif arguments["--attack"] == "999": # Combines
# #     logging.info("Perform combined attacks 1, 2, 3")
# #     dataset = utils.perfrom_attack_femnist(
# #             raw_train_data, 1, workers_idx_to_be_used, eavesdroppers_idx)
# #     dataset = utils.perfrom_attack_femnist(
# #             dataset, 2, workers_idx_to_be_used, eavesdroppers_idx)
# #     dataset = utils.perfrom_attack_femnist(
# #             dataset, 3, workers_idx_to_be_used, eavesdroppers_idx)
# #     fed_train_datasets = fl.create_femnist_train_datasets(dataset, workers_idx_to_be_used)
# # else:
# #     logging.info("Perform attack type: {}".format(arguments["--attack"]))
# #     fed_train_datasets = fl.create_femnist_train_datasets(
# #         utils.perfrom_attack_femnist(
# #             raw_train_data, 
# #             int(arguments["--attack"]),
# #             workers_idx_to_be_used,
# #             eavesdroppers_idx
# #         ), workers_idx_to_be_used)

# # fed_train_dataloaders = dict()
# # for ww_id, fed_dataset in fed_train_datasets.items():
# #     dataloader = sy.FederatedDataLoader(
# #         fed_dataset, batch_size=configs['runtime']['batch_size'], shuffle=False, drop_last=True)
# #     fed_train_dataloaders[fed_dataset.workers[0]] = dataloader

# # for round_no in range(rounds_num):
# #     for counter, worker_id in enumerate(workers_idx_to_be_used):
# #         logging.info("Training worker {} out of {} workers...".format(
# #             counter+1, len(workers_idx_to_be_used)))
# #         fl.train_workers(fed_train_dataloaders[worker_id], [worker_id], round_no, epochs_num)

# #     # Find the best weights and update the server model
# #     weights = None
# #     if arguments['--avg']:
# #         weights = [1.0 / len(workers_idx_to_be_used)] * len(workers_idx_to_be_used)
# #     elif arguments['--opt']:
# #         trusted_weights = [1.0 / len(trusted_idx)] * len(trusted_idx)
# #         avg_trusted_model = fl.wieghted_avg_model(trusted_weights, trusted_idx)
# #         weights = fl.find_best_weights(avg_trusted_model, normal_idx + eavesdroppers_idx)

# #     if log_enable:
# #         fl.save_workers_model(workers_idx_to_be_used, str(round_no))
# #         # fl.save_model(
# #         #     fl.get_average_model(trusted_idx),
# #         #     "R{}_{}".format(round_no, "avg_trusted_model")
# #         # )

# #     weighted_avg_model = fl.wieghted_avg_model(weights, normal_idx + eavesdroppers_idx)
# #     # Update the server model
# #     fl.update_models(workers_idx_to_be_used, weighted_avg_model)

# #     # Apply the server model to the test dataset
# #     fl.test(weighted_avg_model, fed_test_dataloader, round_no)

# #     if log_enable:
# #         fl.save_model(
# #             weighted_avg_model, 
# #             "R{}_{}".format(round_no, "weighted_avg_model")
# #         )

# #     print("")


2020-12-20 16:24:15,476 Initializing Federated Learning class...
2020-12-20 16:24:15,865 Loading train dataset from /home/ubuntu/data/leaf_non_iid/data/femnist/data
2020-12-20 16:24:15,866 Loading 1 out of 4 files...
2020-12-20 16:24:18,803 Loading 2 out of 4 files...
2020-12-20 16:24:21,691 Loading 3 out of 4 files...
2020-12-20 16:24:25,354 Loading 4 out of 4 files...
2020-12-20 16:24:28,283 Start processing of femnist data...
2020-12-20 16:24:30,788 Loading test dataset from /home/ubuntu/data/leaf_non_iid/data/femnist/data
2020-12-20 16:24:30,790 Loading 1 out of 4 files...
2020-12-20 16:24:31,070 Loading 2 out of 4 files...
2020-12-20 16:24:31,360 Loading 3 out of 4 files...
2020-12-20 16:24:31,729 Loading 4 out of 4 files...
2020-12-20 16:24:32,021 Start processing of femnist data...
2020-12-20 16:24:32,165 Total of 31 workers are in the dataset.
2020-12-20 16:24:32,167 Select 30 workers to be used from the dataset.


In [4]:
utils.dataset_info(utils.load_leaf_train(configs['data']['FEMNIST_PATH']))

2020-12-20 16:27:11,403 Loading train dataset from /home/ubuntu/data/leaf_non_iid/data/femnist/data
2020-12-20 16:27:11,405 Loading 1 out of 4 files...
2020-12-20 16:27:14,388 Loading 2 out of 4 files...
2020-12-20 16:27:17,246 Loading 3 out of 4 files...
2020-12-20 16:27:20,306 Loading 4 out of 4 files...


104:	1
113:	1
128:	1
135:	1
150:	1
154:	1
159:	1
163:	1
169:	1
176:	1
185:	1
186:	1
189:	1
195:	1
198:	2
205:	3
210:	1
211:	2
212:	1
215:	1
216:	2
217:	1
219:	3
220:	2
221:	2
222:	2
223:	3
224:	2
226:	1
228:	2
230:	1
231:	2
232:	1
233:	2
234:	2
236:	1
237:	2
238:	1
239:	2
241:	1
242:	2
243:	6
244:	1
246:	1
247:	1
248:	1
249:	4
250:	2
251:	2
252:	2
253:	2
254:	2
255:	1
256:	2
257:	4
258:	2
259:	1
260:	4
261:	3
262:	3
263:	2
265:	2
266:	2
267:	1
268:	4
269:	2
270:	4
271:	3
272:	5
273:	2
274:	2
275:	5
276:	1
277:	2
278:	3
279:	1
280:	1
282:	1
283:	4
285:	1
286:	6
287:	3
288:	5
289:	1
290:	2
291:	2
292:	5
293:	4
295:	2
296:	1
297:	5
298:	6
299:	1
300:	2
301:	5
302:	3
303:	3
305:	4
306:	3
307:	4
308:	1
309:	5
310:	6
311:	2
312:	7
313:	3
315:	6
316:	8
317:	4
318:	3
319:	3
320:	4
322:	1
323:	2
324:	9
326:	2
327:	4
328:	2
329:	2
330:	6
331:	7
332:	3
333:	4
334:	1
335:	4
337:	4
338:	2
339:	1
340:	3
341:	4
342:	5
345:	4
347:	3
348:	1
349:	3
350:	1
351:	4
352:	1
353:	3
354:	4
355:	2
356:	3
357:	2

In [5]:
import torch
aa = torch.tensor([1,2,3,4,5], dtype=torch.float64)

In [7]:
aa.mean().item()

3.0

In [3]:
utils.dataset_info(utils.extract_data(raw_train_data, workers_idx_to_be_used))

2020-12-20 16:24:51,366 Extract data from raw data for 30 of users...


101:	2
102:	4
103:	2
104:	1
105:	2
106:	2
107:	3
108:	3
109:	1
110:	4
111:	2
112:	3
115:	1
Mean num of samples/user: 107.0
Total Samples:	3209
Total Users:	30
[f0824_18]: Images: 107, Pixels: 28
mean: 0.9666635394096375
std: 0.15451721847057343,
max: 1.0
-----


In [5]:
utils.dataset_info(utils.extract_data(raw_test_data, workers_idx_to_be_used))

2020-12-20 16:30:29,051 Extract data from raw data for 30 of users...


15:	12
16:	6
17:	5
18:	3
19:	3
20:	1
Mean num of samples/user: 18.0
Total Samples:	492
Total Users:	30
[f0629_39]: Images: 15, Pixels: 28
mean: 0.9668231010437012
std: 0.15408051013946533,
max: 1.0
-----


In [13]:
w1 = list(set(raw_test_data.keys()).intersection(raw_train_data.keys()))
w2 = list(set(raw_test_data.keys()).intersection(raw_train_data.keys()))
print("{}\n\n{}".format(w1, w2))



['f0840_37', 'f0933_38', 'f0710_08', 'f0898_29', 'f0666_20', 'f0625_26', 'f0843_06', 'f0958_28', 'f0590_06', 'f0519_46', 'f0862_25', 'f0877_17', 'f0864_41', 'f0931_37', 'f0806_09', 'f0836_28', 'f0629_39', 'f0561_12', 'f0701_21', 'f0824_18', 'f0644_19', 'f0792_07', 'f0841_10', 'f0504_29', 'f0617_30', 'f0512_15', 'f0726_09', 'f0834_03', 'f0673_28', 'f0566_02', 'f0539_34']

['f0840_37', 'f0933_38', 'f0710_08', 'f0898_29', 'f0666_20', 'f0625_26', 'f0843_06', 'f0958_28', 'f0590_06', 'f0519_46', 'f0862_25', 'f0877_17', 'f0864_41', 'f0931_37', 'f0806_09', 'f0836_28', 'f0629_39', 'f0561_12', 'f0701_21', 'f0824_18', 'f0644_19', 'f0792_07', 'f0841_10', 'f0504_29', 'f0617_30', 'f0512_15', 'f0726_09', 'f0834_03', 'f0673_28', 'f0566_02', 'f0539_34']


In [6]:
utils.dataset_info(raw_train_data)

100:	5
101:	14
102:	11
103:	11
104:	12
105:	9
106:	15
107:	12
108:	20
109:	10
110:	12
111:	15
112:	8
113:	4
114:	6
115:	10
116:	5
117:	5
118:	2
119:	3
120:	1
121:	1
Mean num of samples/user: 110.0
Total Samples:	20652
Total Users:	191
[f0640_19]: Images: 112, Pixels: 28
mean: 0.9643265008926392
std: 0.1592288762331009,
max: 1.0
-----


In [8]:
utils.dataset_info(raw_test_data)

15:	16
16:	13
17:	9
18:	7
19:	4
20:	1
Mean num of samples/user: 18.0
Total Samples:	823
Total Users:	50
[f0629_39]: Images: 15, Pixels: 28
mean: 0.9646655321121216
std: 0.15850037336349487,
max: 1.0
-----


In [10]:
print("All: [{}]: {}".format(len(workers_idx_all), workers_idx_all))

All: [31]: ['f0840_37', 'f0933_38', 'f0710_08', 'f0898_29', 'f0666_20', 'f0625_26', 'f0843_06', 'f0958_28', 'f0590_06', 'f0519_46', 'f0862_25', 'f0877_17', 'f0864_41', 'f0931_37', 'f0806_09', 'f0836_28', 'f0629_39', 'f0561_12', 'f0701_21', 'f0824_18', 'f0644_19', 'f0792_07', 'f0841_10', 'f0504_29', 'f0617_30', 'f0512_15', 'f0726_09', 'f0834_03', 'f0673_28', 'f0566_02', 'f0539_34']


In [11]:
print("To Be Used: [{}]: {}".format(len(workers_idx_to_be_used), workers_idx_to_be_used))

To Be Used: [30]: ['f0931_37', 'f0504_29', 'f0840_37', 'f0726_09', 'f0512_15', 'f0519_46', 'f0834_03', 'f0877_17', 'f0566_02', 'f0843_06', 'f0590_06', 'f0701_21', 'f0625_26', 'f0898_29', 'f0561_12', 'f0644_19', 'f0824_18', 'f0539_34', 'f0710_08', 'f0629_39', 'f0666_20', 'f0862_25', 'f0806_09', 'f0841_10', 'f0792_07', 'f0617_30', 'f0673_28', 'f0864_41', 'f0958_28', 'f0933_38']
