diff --git a/README.md b/README.md
index ea7d8b0c7..c42279041 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,40 @@ A detailed tutorial is provided on our [website](https://federatedscope.io/).
 - [05-25-2022] Our paper [FederatedScope-GNN](https://arxiv.org/abs/2204.05562) has been accepted by KDD'2022!
 - [05-06-2022] We release FederatedScope v0.1.0! 
 
+## Code Structure
+```
+FederatedScope
+├── federatedscope
+│   ├── core           
+│   |   ├── workers              # Behaviors of participants (i.e., server and clients)
+│   |   ├── trainers             # Details of local training
+│   |   ├── aggregators          # Details of federated aggregation
+│   |   ├── configs              # Customizable configurations
+│   |   ├── monitors             # The monitor module for logging and demonstrating  
+│   |   ├── communication.py     # Implementation of communication among participants   
+│   |   ├── fed_runner.py        # The runner for building and running an FL course
+│   |   ├── ... ..
+│   ├── cv                       # Federated learning in CV        
+│   ├── nlp                      # Federated learning in NLP          
+│   ├── gfl                      # Graph federated learning          
+│   ├── autotune                 # Auto-tunning for federated learning         
+│   ├── contrib                          
+│   ├── main.py           
+│   ├── ... ...          
+├── scripts                      # Scripts for reproducing existing algorithms
+├── benchmark                    # We release several benchmarks for convenient and fair comparisons
+├── doc                          # For automatic documentation
+├── enviornment                  # Installation requirements and provided docker files
+├── materials                    # Materials of related topics (e.g., paper lists)
+│   ├── notebook                        
+│   ├── paper_list                                        
+│   ├── tutorial                                       
+│   ├── ... ...                                      
+├── tests                        # Unittest modules for continuous integration
+├── LICENSE
+└── setup.py
+```
+
 ## Quick Start
 
 We provide an end-to-end example for users to start running a standard FL course with FederatedScope.
diff --git a/benchmark/pFL-Bench/yaml_best_runs_example/Ditto_FEMNIST-s02.yaml b/benchmark/pFL-Bench/yaml_best_runs_example/Ditto_FEMNIST-s02.yaml
index 3d7864b5a..08848b015 100644
--- a/benchmark/pFL-Bench/yaml_best_runs_example/Ditto_FEMNIST-s02.yaml
+++ b/benchmark/pFL-Bench/yaml_best_runs_example/Ditto_FEMNIST-s02.yaml
@@ -20,6 +20,7 @@ cfg_file: ''
 criterion:
   type: CrossEntropyLoss
 data:
+  save_data: false
   args: []
   batch_size: 32
   cSBM_phi:
@@ -75,7 +76,6 @@ eval:
   - avg
   - fairness
   - raw
-  save_data: false
   split:
   - test
   - val
diff --git a/benchmark/pFL-Bench/yaml_best_runs_example/FedAvg-FT_FEMNIST-s02.yaml b/benchmark/pFL-Bench/yaml_best_runs_example/FedAvg-FT_FEMNIST-s02.yaml
index 041646c69..3df1b3340 100644
--- a/benchmark/pFL-Bench/yaml_best_runs_example/FedAvg-FT_FEMNIST-s02.yaml
+++ b/benchmark/pFL-Bench/yaml_best_runs_example/FedAvg-FT_FEMNIST-s02.yaml
@@ -20,6 +20,7 @@ cfg_file: ''
 criterion:
   type: CrossEntropyLoss
 data:
+  save_data: false
   args: []
   batch_size: 32
   cSBM_phi:
@@ -75,7 +76,6 @@ eval:
   - avg
   - fairness
   - raw
-  save_data: false
   split:
   - test
   - val
diff --git a/benchmark/pFL-Bench/yaml_best_runs_example/FedEM-FedBN-FedOPT-FT_FEMNIST-s02.yaml b/benchmark/pFL-Bench/yaml_best_runs_example/FedEM-FedBN-FedOPT-FT_FEMNIST-s02.yaml
index 7a09cceb9..f3dc7a571 100644
--- a/benchmark/pFL-Bench/yaml_best_runs_example/FedEM-FedBN-FedOPT-FT_FEMNIST-s02.yaml
+++ b/benchmark/pFL-Bench/yaml_best_runs_example/FedEM-FedBN-FedOPT-FT_FEMNIST-s02.yaml
@@ -20,6 +20,7 @@ cfg_file: ''
 criterion:
   type: CrossEntropyLoss
 data:
+  save_data: false
   args: []
   batch_size: 32
   cSBM_phi:
@@ -76,7 +77,6 @@ eval:
   - avg
   - fairness
   - raw
-  save_data: false
   split:
   - test
   - val
diff --git a/federatedscope/attack/worker_as_attacker/server_attacker.py b/federatedscope/attack/worker_as_attacker/server_attacker.py
index b44677154..1d91a1b51 100644
--- a/federatedscope/attack/worker_as_attacker/server_attacker.py
+++ b/federatedscope/attack/worker_as_attacker/server_attacker.py
@@ -1,4 +1,4 @@
-from federatedscope.core.worker import Server
+from federatedscope.core.workers import Server
 from federatedscope.core.message import Message
 
 from federatedscope.core.auxiliaries.criterion_builder import get_criterion
diff --git a/federatedscope/autotune/fedex/client.py b/federatedscope/autotune/fedex/client.py
index 62b77d269..c870cd304 100644
--- a/federatedscope/autotune/fedex/client.py
+++ b/federatedscope/autotune/fedex/client.py
@@ -3,7 +3,7 @@
 import copy
 
 from federatedscope.core.message import Message
-from federatedscope.core.worker import Client
+from federatedscope.core.workers import Client
 
 logger = logging.getLogger(__name__)
 
diff --git a/federatedscope/autotune/fedex/server.py b/federatedscope/autotune/fedex/server.py
index cfbcad6fd..d3654810f 100644
--- a/federatedscope/autotune/fedex/server.py
+++ b/federatedscope/autotune/fedex/server.py
@@ -9,7 +9,7 @@
 from scipy.special import logsumexp
 
 from federatedscope.core.message import Message
-from federatedscope.core.worker import Server
+from federatedscope.core.workers import Server
 from federatedscope.core.auxiliaries.utils import merge_dict
 
 logger = logging.getLogger(__name__)
diff --git a/federatedscope/core/aggregator.py b/federatedscope/core/aggregator.py
deleted file mode 100644
index 41b269a07..000000000
--- a/federatedscope/core/aggregator.py
+++ /dev/null
@@ -1,275 +0,0 @@
-from abc import ABC, abstractmethod
-from federatedscope.core.auxiliaries.optimizer_builder import get_optimizer
-from federatedscope.core.auxiliaries.utils import param2tensor
-
-import torch
-import os
-import copy
-
-
-class Aggregator(ABC):
-    def __init__(self):
-        pass
-
-    @abstractmethod
-    def aggregate(self, agg_info):
-        pass
-
-
-class ClientsAvgAggregator(Aggregator):
-    """Implementation of vanilla FedAvg refer to `Communication-efficient
-    learning of deep networks from decentralized data` [McMahan et al., 2017]
-        (http://proceedings.mlr.press/v54/mcmahan17a.html)
-    """
-    def __init__(self, model=None, device='cpu', config=None):
-        super(Aggregator, self).__init__()
-        self.model = model
-        self.device = device
-        self.cfg = config
-
-    def aggregate(self, agg_info):
-        """
-        To preform aggregation
-
-        Arguments:
-        agg_info (dict): the feedbacks from clients
-        :returns: the aggregated results
-        :rtype: dict
-        """
-
-        models = agg_info["client_feedback"]
-        recover_fun = agg_info['recover_fun'] if (
-            'recover_fun' in agg_info and self.cfg.federate.use_ss) else None
-        avg_model = self._para_weighted_avg(models, recover_fun=recover_fun)
-
-        return avg_model
-
-    def update(self, model_parameters):
-        '''
-        Arguments:
-            model_parameters (dict): PyTorch Module object's state_dict.
-        '''
-        self.model.load_state_dict(model_parameters, strict=False)
-
-    def save_model(self, path, cur_round=-1):
-        assert self.model is not None
-
-        ckpt = {'cur_round': cur_round, 'model': self.model.state_dict()}
-        torch.save(ckpt, path)
-
-    def load_model(self, path):
-        assert self.model is not None
-
-        if os.path.exists(path):
-            ckpt = torch.load(path, map_location=self.device)
-            self.model.load_state_dict(ckpt['model'])
-            return ckpt['cur_round']
-        else:
-            raise ValueError("The file {} does NOT exist".format(path))
-
-    def _para_weighted_avg(self, models, recover_fun=None):
-        training_set_size = 0
-        for i in range(len(models)):
-            sample_size, _ = models[i]
-            training_set_size += sample_size
-
-        sample_size, avg_model = models[0]
-        for key in avg_model:
-            for i in range(len(models)):
-                local_sample_size, local_model = models[i]
-
-                if self.cfg.federate.ignore_weight:
-                    weight = 1.0 / len(models)
-                elif self.cfg.federate.use_ss:
-                    # When using secret sharing, what the server receives
-                    # are sample_size * model_para
-                    weight = 1.0
-                else:
-                    weight = local_sample_size / training_set_size
-
-                if not self.cfg.federate.use_ss:
-                    local_model[key] = param2tensor(local_model[key])
-                if i == 0:
-                    avg_model[key] = local_model[key] * weight
-                else:
-                    avg_model[key] += local_model[key] * weight
-
-            if self.cfg.federate.use_ss and recover_fun:
-                avg_model[key] = recover_fun(avg_model[key])
-                # When using secret sharing, what the server receives are
-                # sample_size * model_para
-                avg_model[key] /= training_set_size
-                avg_model[key] = torch.FloatTensor(avg_model[key])
-
-        return avg_model
-
-
-class NoCommunicationAggregator(Aggregator):
-    """"Clients do not communicate. Each client work locally
-    """
-    def aggregate(self, agg_info):
-        # do nothing
-        return {}
-
-
-class AsynClientsAvgAggregator(ClientsAvgAggregator):
-    """The aggregator used in asynchronous training, which discounts the
-        staled model updates
-    """
-    def __init__(self, model=None, device='cpu', config=None):
-        super(AsynClientsAvgAggregator, self).__init__(model, device, config)
-
-    def aggregate(self, agg_info):
-        """
-        To preform aggregation
-
-        Arguments:
-        agg_info (dict): the feedbacks from clients
-        :returns: the aggregated results
-        :rtype: dict
-        """
-
-        models = agg_info["client_feedback"]
-        recover_fun = agg_info['recover_fun'] if (
-            'recover_fun' in agg_info and self.cfg.federate.use_ss) else None
-        staleness = [x[1]
-                     for x in agg_info['staleness']]  # (client_id, staleness)
-        avg_model = self._para_weighted_avg(models,
-                                            recover_fun=recover_fun,
-                                            staleness=staleness)
-
-        # When using asynchronous training, the return feedback is model delta
-        # rather than the model param
-        updated_model = copy.deepcopy(avg_model)
-        init_model = self.model.state_dict()
-        for key in avg_model:
-            updated_model[key] = init_model[key] + avg_model[key]
-        return updated_model
-
-    def discount_func(self, staleness):
-        """
-        Served as an example, we discount the model update with staleness \tau
-        as: (1.0/((1.0+\tau)**factor)),
-        which has been used in previous studies such as FedAsync (Asynchronous
-        Federated Optimization) and FedBuff
-        (Federated Learning with Buffered Asynchronous Aggregation).
-        """
-        return (1.0 /
-                ((1.0 + staleness)**self.cfg.asyn.staleness_discount_factor))
-
-    def _para_weighted_avg(self, models, recover_fun=None, staleness=None):
-        training_set_size = 0
-        for i in range(len(models)):
-            sample_size, _ = models[i]
-            training_set_size += sample_size
-
-        sample_size, avg_model = models[0]
-        for key in avg_model:
-            for i in range(len(models)):
-                local_sample_size, local_model = models[i]
-
-                if self.cfg.federate.ignore_weight:
-                    weight = 1.0 / len(models)
-                else:
-                    weight = local_sample_size / training_set_size
-
-                assert staleness is not None
-                weight *= self.discount_func(staleness[i])
-                if isinstance(local_model[key], torch.Tensor):
-                    local_model[key] = local_model[key].float()
-                else:
-                    local_model[key] = torch.FloatTensor(local_model[key])
-
-                if i == 0:
-                    avg_model[key] = local_model[key] * weight
-                else:
-                    avg_model[key] += local_model[key] * weight
-
-        return avg_model
-
-
-class OnlineClientsAvgAggregator(ClientsAvgAggregator):
-    def __init__(self,
-                 model=None,
-                 device='cpu',
-                 src_device='cpu',
-                 config=None):
-        super(OnlineClientsAvgAggregator, self).__init__(model, device, config)
-        self.src_device = src_device
-
-    def reset(self):
-        self.maintained = self.model.state_dict()
-        for key in self.maintained:
-            self.maintained[key].data = torch.zeros_like(
-                self.maintained[key], device=self.src_device)
-        self.cnt = 0
-
-    def inc(self, content):
-        if isinstance(content, tuple):
-            sample_size, model_params = content
-            for key in self.maintained:
-                # if model_params[key].device != self.maintained[key].device:
-                #    model_params[key].to(self.maintained[key].device)
-                self.maintained[key] = (self.cnt * self.maintained[key] +
-                                        sample_size * model_params[key]) / (
-                                            self.cnt + sample_size)
-            self.cnt += sample_size
-        else:
-            raise TypeError(
-                "{} is not a tuple (sample_size, model_para)".format(content))
-
-    def aggregate(self, agg_info):
-        return self.maintained
-
-
-class ServerClientsInterpolateAggregator(ClientsAvgAggregator):
-    """"
-        # conduct aggregation by interpolating global model from server and
-        local models from clients
-    """
-    def __init__(self, model=None, device='cpu', config=None, beta=1.0):
-        super(ServerClientsInterpolateAggregator,
-              self).__init__(model, device, config)
-        self.beta = beta  # the weight for local models used in interpolation
-
-    def aggregate(self, agg_info):
-        models = agg_info["client_feedback"]
-        global_model = self.model
-        elem_each_client = next(iter(models))
-        assert len(elem_each_client) == 2, f"Require (sample_size, " \
-                                           f"model_para) tuple for each " \
-                                           f"client, i.e., len=2, but got " \
-                                           f"len={len(elem_each_client)}"
-        avg_model_by_clients = self._para_weighted_avg(models)
-        global_local_models = [((1 - self.beta), global_model.state_dict()),
-                               (self.beta, avg_model_by_clients)]
-
-        avg_model_by_interpolate = self._para_weighted_avg(global_local_models)
-        return avg_model_by_interpolate
-
-
-class FedOptAggregator(ClientsAvgAggregator):
-    """Implementation of FedOpt refer to `Adaptive Federated Optimization` [
-    Reddi et al., 2021]
-        (https://openreview.net/forum?id=LkFG3lB13U5)
-
-    """
-    def __init__(self, config, model, device='cpu'):
-        super(FedOptAggregator, self).__init__(model, device, config)
-        self.optimizer = get_optimizer(model=self.model,
-                                       **config.fedopt.optimizer)
-
-    def aggregate(self, agg_info):
-        new_model = super().aggregate(agg_info)
-
-        model = self.model.cpu().state_dict()
-        with torch.no_grad():
-            grads = {key: model[key] - new_model[key] for key in new_model}
-
-        self.optimizer.zero_grad()
-        for key, p in self.model.named_parameters():
-            if key in new_model.keys():
-                p.grad = grads[key]
-        self.optimizer.step()
-
-        return self.model.state_dict()
diff --git a/federatedscope/core/aggregators/__init__.py b/federatedscope/core/aggregators/__init__.py
new file mode 100644
index 000000000..e65cbb398
--- /dev/null
+++ b/federatedscope/core/aggregators/__init__.py
@@ -0,0 +1,19 @@
+from federatedscope.core.aggregators.aggregator import Aggregator, \
+    NoCommunicationAggregator
+from federatedscope.core.aggregators.clients_avg_aggregator import \
+    ClientsAvgAggregator, OnlineClientsAvgAggregator
+from federatedscope.core.aggregators.asyn_clients_avg_aggregator import \
+    AsynClientsAvgAggregator
+from federatedscope.core.aggregators.server_clients_interpolate_aggregator \
+    import ServerClientsInterpolateAggregator
+from federatedscope.core.aggregators.fedopt_aggregator import FedOptAggregator
+
+__all__ = [
+    'Aggregator',
+    'NoCommunicationAggregator',
+    'ClientsAvgAggregator',
+    'OnlineClientsAvgAggregator',
+    'AsynClientsAvgAggregator',
+    'ServerClientsInterpolateAggregator',
+    'FedOptAggregator',
+]
diff --git a/federatedscope/core/aggregators/aggregator.py b/federatedscope/core/aggregators/aggregator.py
new file mode 100644
index 000000000..c8e2052ac
--- /dev/null
+++ b/federatedscope/core/aggregators/aggregator.py
@@ -0,0 +1,18 @@
+from abc import ABC, abstractmethod
+
+
+class Aggregator(ABC):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def aggregate(self, agg_info):
+        pass
+
+
+class NoCommunicationAggregator(Aggregator):
+    """"Clients do not communicate. Each client work locally
+    """
+    def aggregate(self, agg_info):
+        # do nothing
+        return {}
diff --git a/federatedscope/core/aggregators/asyn_clients_avg_aggregator.py b/federatedscope/core/aggregators/asyn_clients_avg_aggregator.py
new file mode 100644
index 000000000..39d33a737
--- /dev/null
+++ b/federatedscope/core/aggregators/asyn_clients_avg_aggregator.py
@@ -0,0 +1,79 @@
+import copy
+import torch
+from federatedscope.core.aggregators import ClientsAvgAggregator
+
+
+class AsynClientsAvgAggregator(ClientsAvgAggregator):
+    """The aggregator used in asynchronous training, which discounts the
+        staled model updates
+    """
+    def __init__(self, model=None, device='cpu', config=None):
+        super(AsynClientsAvgAggregator, self).__init__(model, device, config)
+
+    def aggregate(self, agg_info):
+        """
+        To preform aggregation
+
+        Arguments:
+        agg_info (dict): the feedbacks from clients
+        :returns: the aggregated results
+        :rtype: dict
+        """
+
+        models = agg_info["client_feedback"]
+        recover_fun = agg_info['recover_fun'] if (
+            'recover_fun' in agg_info and self.cfg.federate.use_ss) else None
+        staleness = [x[1]
+                     for x in agg_info['staleness']]  # (client_id, staleness)
+        avg_model = self._para_weighted_avg(models,
+                                            recover_fun=recover_fun,
+                                            staleness=staleness)
+
+        # When using asynchronous training, the return feedback is model delta
+        # rather than the model param
+        updated_model = copy.deepcopy(avg_model)
+        init_model = self.model.state_dict()
+        for key in avg_model:
+            updated_model[key] = init_model[key] + avg_model[key]
+        return updated_model
+
+    def discount_func(self, staleness):
+        """
+        Served as an example, we discount the model update with staleness \tau
+        as: (1.0/((1.0+\tau)**factor)),
+        which has been used in previous studies such as FedAsync (Asynchronous
+        Federated Optimization) and FedBuff
+        (Federated Learning with Buffered Asynchronous Aggregation).
+        """
+        return (1.0 /
+                ((1.0 + staleness)**self.cfg.asyn.staleness_discount_factor))
+
+    def _para_weighted_avg(self, models, recover_fun=None, staleness=None):
+        training_set_size = 0
+        for i in range(len(models)):
+            sample_size, _ = models[i]
+            training_set_size += sample_size
+
+        sample_size, avg_model = models[0]
+        for key in avg_model:
+            for i in range(len(models)):
+                local_sample_size, local_model = models[i]
+
+                if self.cfg.federate.ignore_weight:
+                    weight = 1.0 / len(models)
+                else:
+                    weight = local_sample_size / training_set_size
+
+                assert staleness is not None
+                weight *= self.discount_func(staleness[i])
+                if isinstance(local_model[key], torch.Tensor):
+                    local_model[key] = local_model[key].float()
+                else:
+                    local_model[key] = torch.FloatTensor(local_model[key])
+
+                if i == 0:
+                    avg_model[key] = local_model[key] * weight
+                else:
+                    avg_model[key] += local_model[key] * weight
+
+        return avg_model
diff --git a/federatedscope/core/aggregators/clients_avg_aggregator.py b/federatedscope/core/aggregators/clients_avg_aggregator.py
new file mode 100644
index 000000000..21ac60c1e
--- /dev/null
+++ b/federatedscope/core/aggregators/clients_avg_aggregator.py
@@ -0,0 +1,126 @@
+import os
+import torch
+from federatedscope.core.aggregators import Aggregator
+from federatedscope.core.auxiliaries.utils import param2tensor
+
+
+class ClientsAvgAggregator(Aggregator):
+    """Implementation of vanilla FedAvg refer to `Communication-efficient
+    learning of deep networks from decentralized data` [McMahan et al., 2017]
+        (http://proceedings.mlr.press/v54/mcmahan17a.html)
+    """
+    def __init__(self, model=None, device='cpu', config=None):
+        super(Aggregator, self).__init__()
+        self.model = model
+        self.device = device
+        self.cfg = config
+
+    def aggregate(self, agg_info):
+        """
+        To preform aggregation
+
+        Arguments:
+        agg_info (dict): the feedbacks from clients
+        :returns: the aggregated results
+        :rtype: dict
+        """
+
+        models = agg_info["client_feedback"]
+        recover_fun = agg_info['recover_fun'] if (
+            'recover_fun' in agg_info and self.cfg.federate.use_ss) else None
+        avg_model = self._para_weighted_avg(models, recover_fun=recover_fun)
+
+        return avg_model
+
+    def update(self, model_parameters):
+        '''
+        Arguments:
+            model_parameters (dict): PyTorch Module object's state_dict.
+        '''
+        self.model.load_state_dict(model_parameters, strict=False)
+
+    def save_model(self, path, cur_round=-1):
+        assert self.model is not None
+
+        ckpt = {'cur_round': cur_round, 'model': self.model.state_dict()}
+        torch.save(ckpt, path)
+
+    def load_model(self, path):
+        assert self.model is not None
+
+        if os.path.exists(path):
+            ckpt = torch.load(path, map_location=self.device)
+            self.model.load_state_dict(ckpt['model'])
+            return ckpt['cur_round']
+        else:
+            raise ValueError("The file {} does NOT exist".format(path))
+
+    def _para_weighted_avg(self, models, recover_fun=None):
+        training_set_size = 0
+        for i in range(len(models)):
+            sample_size, _ = models[i]
+            training_set_size += sample_size
+
+        sample_size, avg_model = models[0]
+        for key in avg_model:
+            for i in range(len(models)):
+                local_sample_size, local_model = models[i]
+
+                if self.cfg.federate.ignore_weight:
+                    weight = 1.0 / len(models)
+                elif self.cfg.federate.use_ss:
+                    # When using secret sharing, what the server receives
+                    # are sample_size * model_para
+                    weight = 1.0
+                else:
+                    weight = local_sample_size / training_set_size
+
+                if not self.cfg.federate.use_ss:
+                    local_model[key] = param2tensor(local_model[key])
+                if i == 0:
+                    avg_model[key] = local_model[key] * weight
+                else:
+                    avg_model[key] += local_model[key] * weight
+
+            if self.cfg.federate.use_ss and recover_fun:
+                avg_model[key] = recover_fun(avg_model[key])
+                # When using secret sharing, what the server receives are
+                # sample_size * model_para
+                avg_model[key] /= training_set_size
+                avg_model[key] = torch.FloatTensor(avg_model[key])
+
+        return avg_model
+
+
+class OnlineClientsAvgAggregator(ClientsAvgAggregator):
+    def __init__(self,
+                 model=None,
+                 device='cpu',
+                 src_device='cpu',
+                 config=None):
+        super(OnlineClientsAvgAggregator, self).__init__(model, device, config)
+        self.src_device = src_device
+
+    def reset(self):
+        self.maintained = self.model.state_dict()
+        for key in self.maintained:
+            self.maintained[key].data = torch.zeros_like(
+                self.maintained[key], device=self.src_device)
+        self.cnt = 0
+
+    def inc(self, content):
+        if isinstance(content, tuple):
+            sample_size, model_params = content
+            for key in self.maintained:
+                # if model_params[key].device != self.maintained[key].device:
+                #    model_params[key].to(self.maintained[key].device)
+                self.maintained[key] = (self.cnt * self.maintained[key] +
+                                        sample_size * model_params[key]) / (
+                                            self.cnt + sample_size)
+            self.cnt += sample_size
+        else:
+            raise TypeError(
+                "{} is not a tuple (sample_size, model_para)".format(content))
+
+    def aggregate(self, agg_info):
+        return self.maintained
diff --git a/federatedscope/core/aggregators/fedopt_aggregator.py b/federatedscope/core/aggregators/fedopt_aggregator.py
new file mode 100644
index 000000000..47e1725a3
--- /dev/null
+++ b/federatedscope/core/aggregators/fedopt_aggregator.py
@@ -0,0 +1,31 @@
+import torch
+
+from federatedscope.core.aggregators import ClientsAvgAggregator
+from federatedscope.core.auxiliaries.optimizer_builder import get_optimizer
+
+
+class FedOptAggregator(ClientsAvgAggregator):
+    """Implementation of FedOpt refer to `Adaptive Federated Optimization` [
+    Reddi et al., 2021]
+        (https://openreview.net/forum?id=LkFG3lB13U5)
+
+    """
+    def __init__(self, config, model, device='cpu'):
+        super(FedOptAggregator, self).__init__(model, device, config)
+        self.optimizer = get_optimizer(model=self.model,
+                                       **config.fedopt.optimizer)
+
+    def aggregate(self, agg_info):
+        new_model = super().aggregate(agg_info)
+
+        model = self.model.cpu().state_dict()
+        with torch.no_grad():
+            grads = {key: model[key] - new_model[key] for key in new_model}
+
+        self.optimizer.zero_grad()
+        for key, p in self.model.named_parameters():
+            if key in new_model.keys():
+                p.grad = grads[key]
+        self.optimizer.step()
+
+        return self.model.state_dict()
diff --git a/federatedscope/core/aggregators/server_clients_interpolate_aggregator.py b/federatedscope/core/aggregators/server_clients_interpolate_aggregator.py
new file mode 100644
index 000000000..200de2543
--- /dev/null
+++ b/federatedscope/core/aggregators/server_clients_interpolate_aggregator.py
@@ -0,0 +1,27 @@
+from federatedscope.core.aggregators import ClientsAvgAggregator
+
+
+class ServerClientsInterpolateAggregator(ClientsAvgAggregator):
+    """"
+        # conduct aggregation by interpolating global model from server and
+        local models from clients
+    """
+    def __init__(self, model=None, device='cpu', config=None, beta=1.0):
+        super(ServerClientsInterpolateAggregator,
+              self).__init__(model, device, config)
+        self.beta = beta  # the weight for local models used in interpolation
+
+    def aggregate(self, agg_info):
+        models = agg_info["client_feedback"]
+        global_model = self.model
+        elem_each_client = next(iter(models))
+        assert len(elem_each_client) == 2, f"Require (sample_size, " \
+                                           f"model_para) tuple for each " \
+                                           f"client, i.e., len=2, but got " \
+                                           f"len={len(elem_each_client)}"
+        avg_model_by_clients = self._para_weighted_avg(models)
+        global_local_models = [((1 - self.beta), global_model.state_dict()),
+                               (self.beta, avg_model_by_clients)]
+
+        avg_model_by_interpolate = self._para_weighted_avg(global_local_models)
+        return avg_model_by_interpolate
diff --git a/federatedscope/core/auxiliaries/aggregator_builder.py b/federatedscope/core/auxiliaries/aggregator_builder.py
index e9cd0b256..778ff0c6f 100644
--- a/federatedscope/core/auxiliaries/aggregator_builder.py
+++ b/federatedscope/core/auxiliaries/aggregator_builder.py
@@ -10,7 +10,7 @@ def get_aggregator(method, model=None, device=None, online=False, config=None):
         from federatedscope.cross_backends import FedAvgAggregator
         return FedAvgAggregator(model=model, device=device)
     else:
-        from federatedscope.core.aggregator import ClientsAvgAggregator, \
+        from federatedscope.core.aggregators import ClientsAvgAggregator, \
             OnlineClientsAvgAggregator, ServerClientsInterpolateAggregator, \
             FedOptAggregator, NoCommunicationAggregator, \
             AsynClientsAvgAggregator
diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py
index 87faefddc..dfaf91836 100644
--- a/federatedscope/core/auxiliaries/data_builder.py
+++ b/federatedscope/core/auxiliaries/data_builder.py
@@ -117,7 +117,7 @@ def _generate_data(client_num=5,
 
     if generate:
         data = _generate_data(client_num=config.federate.client_num,
-                              save_data=config.eval.save_data)
+                              save_data=config.data.save_data)
     else:
         with open(config.distribute.data_file, 'rb') as f:
             data = pickle.load(f)
diff --git a/federatedscope/core/auxiliaries/worker_builder.py b/federatedscope/core/auxiliaries/worker_builder.py
index 88d1a6d99..bff5c6183 100644
--- a/federatedscope/core/auxiliaries/worker_builder.py
+++ b/federatedscope/core/auxiliaries/worker_builder.py
@@ -1,7 +1,7 @@
 import logging
 
 from federatedscope.core.configs import constants
-from federatedscope.core.worker import Server, Client
+from federatedscope.core.workers import Server, Client
 
 logger = logging.getLogger(__name__)
 
diff --git a/federatedscope/core/configs/README.md b/federatedscope/core/configs/README.md
index 4266d6eb5..9a1e6bff6 100644
--- a/federatedscope/core/configs/README.md
+++ b/federatedscope/core/configs/README.md
@@ -1,35 +1,136 @@
 ## Configurations
-We summarize all the customizable configurations here.
+We summarize all the customizable configurations:
+- [cfg_data.py](#data)
+- [cfg_model.py](#model)
+- [cfg_fl_algo.py](#federated-algorithms)
+- [cfg_training.py](#federated-training)
+- [cfg_fl_setting.py](#fl-setting)
+- [cfg_evaluation.py](#evaluation)
+- [cfg_asyn.py](#asynchronous-training-strategies)
+- [cfg_differential_privacy.py](#differential-privacy)
+- [cfg_hpo.py](#auto-tuning-components)
+- [cfg_attack.py](#attack)
 
 ### Data
 The configurations related to the data/dataset are defined in `cfg_data.py`.
 
 | Name |  (Type) Default Value | Description | Note |
 |:----:|:-----:|:---------- |:---- |
-| `data.root` | (string) 'data' | <font size=1> The folder where the data file located. `data.root` would be used together with `data.type` to load the dataset. </font> | - |
-| `data.type` | (string) 'toy' | <font size=1>Dataset name</font> | CV: 'femnist', 'celeba' ; NLP: 'shakespeare', 'subreddit', 'twitter'; Graph: 'cora', 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm', 'epinions', 'ciao', 'fb15k-237', 'wn18', 'fb15k' , 'MUTAG', 'BZR', 'COX2', 'DHFR', 'PTC_MR', 'AIDS', 'NCI1', 'ENZYMES', 'DD', 'PROTEINS', 'COLLAB', 'IMDB-BINARY', 'IMDB-MULTI', 'REDDIT-BINARY', 'IMDB-BINARY', 'IMDB-MULTI', 'HIV', 'ESOL', 'FREESOLV', 'LIPO', 'PCBA', 'MUV', 'BACE', 'BBBP', 'TOX21', 'TOXCAST', 'SIDER', 'CLINTOX', 'graph_multi_domain_mol', 'graph_multi_domain_small', 'graph_multi_domain_mix', 'graph_multi_domain_biochem'; MF: 'vflmovielens1m', 'vflmovielens10m', 'hflmovielens1m', 'hflmovielens10m', 'vflnetflix', 'hflnetflix'; Tabular: 'toy', 'synthetic'; External dataset: 'DNAME@torchvision', 'DNAME@torchtext', 'DNAME@huggingface_datasets', 'DNAME@openml'. |
-| `data.args` | (list) [] | <font size=1>Args for the external dataset</font> | Used for external dataset, eg. `[{'download': False}]` |
-| `data.splitter` | (string) '' | <font size=1>Splitter name for standalone dataset</font> | Generic splitter: 'lda'; Graph splitter: 'louvain', 'random', 'rel_type', 'graph_type', 'scaffold', 'scaffold_lda', 'rand_chunk' |
-| `data.splitter_args` | (list) [] | <font size=1>Args for splitter.</font> | Used for splitter, eg. `[{'alpha': 0.5}]` |
-| `data.transform` | (list) [] | <font size=1>Transform for x of data</font> | Used in `get_item` in torch.dataset, eg. `[['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]]` |
-| `data.target_transform` | (list) [] | <font size=1>Transform for y of data</font> | Use as `data.transform` |
-| `data.pre_transform` | (list) [] | <font size=1>Pre_transform for `torch_geometric` dataset</font> | Use as `data.transform` |
-| `data.batch_size` | (int) 64 | <font size=1>batch_size for DataLoader</font> | - |
-| `data.drop_last` | (bool) False | <font size=1>Whether drop last batch (if the number of last batch is smaller than batch_size) in DataLoader</font> | - |
-| `data.sizes` | (list) [10, 5] | <font size=1>Sample size for graph DataLoader</font> | The length of `data.sizes` must meet the layer of GNN models. |
-| `data.shuffle` | (bool) True | <font size=1>Shuffle train DataLoader</font> | - |
-| `data.server_holds_all` | (bool) False | <font size=1>Only use in global mode, whether the server (workers with idx 0) holds all data, useful in global training/evaluation case</font> | - |
-| `data.subsample` | (float) 1.0 | <font size=1> Only used in LEAF datasets, subsample clients from all clients</font> | - |
-| `data.splits` | (list) [0.8, 0.1, 0.1] | <font size=1>Train, valid, test splits</font> | - |
-| `data.consistent_label_distribution` | (bool) False | <font size=1>Make label distribution of train/val/test set over clients keep consistent during splitting</font> | - |
-| `data.cSBM_phi` | (list) [0.5, 0.5, 0.5] | <font size=1>Phi for cSBM graph dataset</font> | - |
-| `data.loader` | (string) '' | <font size=1>Graph sample name, used in minibatch trainer</font> | 'graphsaint-rw': use `GraphSAINTRandomWalkSampler` as DataLoader; 'neighbor': use `NeighborSampler` as DataLoader. |
-| `data.num_workers` | (int) 0 | <font size=1>num_workers in DataLoader</font> | - |
-| `data.graphsaint.walk_length` | (int) 2 | <font size=1>The length of each random walk in graphsaint.</font> | - |
-| `data.graphsaint.num_steps` | (int) 30 | <font size=1>The number of iterations per epoch in graphsaint.</font> | - |
-| `cfg.data.quadratic.dim` | (int) 1 | <font size=1>Dim of synthetic quadratic  dataset</font> | - |
-| `cfg.data.quadratic.min_curv` | (float) 0.02 | <font size=1>Min_curve of synthetic quadratic  dataset</font> | - |
-| `cfg.data.quadratic.max_curv` | (float) 12.5 | <font size=1>Max_cur of synthetic quadratic  dataset</font> | - |
+| `data.root` | (string) 'data' | The folder where the data file located. `data.root` would be used together with `data.type` to load the dataset. | - |
+| `data.type` | (string) 'toy' | Dataset name | CV: 'femnist', 'celeba' ; NLP: 'shakespeare', 'subreddit', 'twitter'; Graph: 'cora', 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm', 'epinions', 'ciao', 'fb15k-237', 'wn18', 'fb15k' , 'MUTAG', 'BZR', 'COX2', 'DHFR', 'PTC_MR', 'AIDS', 'NCI1', 'ENZYMES', 'DD', 'PROTEINS', 'COLLAB', 'IMDB-BINARY', 'IMDB-MULTI', 'REDDIT-BINARY', 'IMDB-BINARY', 'IMDB-MULTI', 'HIV', 'ESOL', 'FREESOLV', 'LIPO', 'PCBA', 'MUV', 'BACE', 'BBBP', 'TOX21', 'TOXCAST', 'SIDER', 'CLINTOX', 'graph_multi_domain_mol', 'graph_multi_domain_small', 'graph_multi_domain_mix', 'graph_multi_domain_biochem'; MF: 'vflmovielens1m', 'vflmovielens10m', 'hflmovielens1m', 'hflmovielens10m', 'vflnetflix', 'hflnetflix'; Tabular: 'toy', 'synthetic'; External dataset: 'DNAME@torchvision', 'DNAME@torchtext', 'DNAME@huggingface_datasets', 'DNAME@openml'. |
+| `data.args` | (list) [] | Args for the external dataset | Used for external dataset, eg. `[{'download': False}]` |
+| `data.save_data` | (bool) False | Whether to save the generated toy data | - |
+| `data.splitter` | (string) '' | Splitter name for standalone dataset | Generic splitter: 'lda'; Graph splitter: 'louvain', 'random', 'rel_type', 'graph_type', 'scaffold', 'scaffold_lda', 'rand_chunk' |
+| `data.splitter_args` | (list) [] | Args for splitter. | Used for splitter, eg. `[{'alpha': 0.5}]` |
+| `data.transform` | (list) [] | Transform for x of data | Used in `get_item` in torch.dataset, eg. `[['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]]` |
+| `data.target_transform` | (list) [] | Transform for y of data | Use as `data.transform` |
+| `data.pre_transform` | (list) [] | Pre_transform for `torch_geometric` dataset | Use as `data.transform` |
+| `data.batch_size` | (int) 64 | batch_size for DataLoader | - |
+| `data.drop_last` | (bool) False | Whether drop last batch (if the number of last batch is smaller than batch_size) in DataLoader | - |
+| `data.sizes` | (list) [10, 5] | Sample size for graph DataLoader | The length of `data.sizes` must meet the layer of GNN models. |
+| `data.shuffle` | (bool) True | Shuffle train DataLoader | - |
+| `data.server_holds_all` | (bool) False | Only use in global mode, whether the server (workers with idx 0) holds all data, useful in global training/evaluation case | - |
+| `data.subsample` | (float) 1.0 |  Only used in LEAF datasets, subsample clients from all clients | - |
+| `data.splits` | (list) [0.8, 0.1, 0.1] | Train, valid, test splits | - |
+| `data.` </br>`consistent_label_distribution` | (bool) False | Make label distribution of train/val/test set over clients keep consistent during splitting | - |
+| `data.cSBM_phi` | (list) [0.5, 0.5, 0.5] | Phi for cSBM graph dataset | - |
+| `data.loader` | (string) '' | Graph sample name, used in minibatch trainer | 'graphsaint-rw': use `GraphSAINTRandomWalkSampler` as DataLoader; 'neighbor': use `NeighborSampler` as DataLoader. |
+| `data.num_workers` | (int) 0 | num_workers in DataLoader | - |
+| `data.graphsaint.walk_length` | (int) 2 | The length of each random walk in graphsaint. | - |
+| `data.graphsaint.num_steps` | (int) 30 | The number of iterations per epoch in graphsaint. | - |
+| `data.quadratic.dim` | (int) 1 | Dim of synthetic quadratic  dataset | - |
+| `data.quadratic.min_curv` | (float) 0.02 | Min_curve of synthetic quadratic  dataset | - |
+| `data.quadratic.max_curv` | (float) 12.5 | Max_cur of synthetic quadratic  dataset | - |
+
+
+### Model
+
+The configurations related to the model are defined in `cfg_model.py`.  
+| [General](#model-general) | [Criterion](#criterion) | [Regularization](#regularizer) | 
+
+#### Model-General
+|            Name            | (Type) Default Value |                   Description                    |                                                                                          Note                                                                                          |
+|:--------------------------:|:--------------------:|:------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| `model.`</br> `model_num_per_trainer` |     (int) 1     | Number of model per trainer |                                                                 some methods may leverage more                                                                 |
+| `model.type` | (string) 'lr' | The model name used in FL | CV: 'convnet2', 'convnet5', 'vgg11', 'lr'; NLP: 'LSTM', 'MODEL@transformers'; Graph:  'gcn', 'sage', 'gpr', 'gat', 'gin', 'mpnn';  Tabular: 'mlp', 'lr', 'quadratic'; MF: 'vmfnet', 'hmfnet' |
+| `model.use_bias` | (bool) True | Whether use bias in lr model | - |
+| `model.task` | (string) 'node' | The task type of model, the default is `Classification` | NLP: 'PreTraining', 'QuestionAnswering', 'SequenceClassification', 'TokenClassification', 'Auto', 'WithLMHead'; Graph: 'NodeClassification', 'NodeRegression', 'LinkClassification', 'LinkRegression', 'GraphClassification', 'GraphRegression', |
+| `model.hidden` | (int) 256 | Hidden layer dimension | - |
+| `model.dropout` | (float) 0.5 | Dropout ratio | - |
+| `model.in_channels` | (int) 0 | Input channels dimension | If 0, model will be built by `data.shape` |
+| `model.out_channels` | (int) 1 | Output channels dimension | - |
+| `model.layer` | (int) 2 | Model layer | - |
+| `model.graph_pooling` | (string) 'mean' | Graph pooling method in graph-level task | 'add', 'mean' or 'max' |
+| `model.embed_size` | (int) 8 | `embed_size` in LSTM | - |
+| `model.num_item` | (int) 0 | Number of items in MF. | It will be overwritten by the real value of the dataset. |
+| `model.num_user` | (int) 0 | Number of users in MF. | It will be overwritten by the real value of the dataset. |
+
+#### Criterion
+
+|            Name            | (Type) Default Value |                   Description                    |                                                                                         Note                                                                                          |
+|:--------------------------:|:--------------------:|:------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| `criterion.type` |     (string) 'MSELoss'     | Criterion type |                                                                                           Chosen from https://pytorch.org/docs/stable/nn.html#loss-functions , eg. 'CrossEntropyLoss', 'L1Loss', etc.                                                                                            |
+
+#### Regularizer
+
+|            Name            | (Type) Default Value |                   Description                    |                                                                                          Note                                                                                          |
+|:--------------------------:|:--------------------:|:------------------------------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
+| `regularizer.type` |     (string) ' '     | The type of the regularizer |                                                                 Chosen from [`proximal_regularizer`]                                                                 |
+| `regularizer.mu` | (float) 0 | The factor that controls the loss of the regularization term | - |
+
+
+### Federated Algorithms 
+The configurations related to specific federated algorithms, which are defined in `cfg_fl_algo.py`.
+
+| [FedOPT](#fedopt-for-fedopt-algorithm) | [FedProx](#fedprox-for-fedprox-algorithm) | [personalization](#personalization-for-personalization-algorithms) | [fedsageplus](#fedsageplus-for-fedsageplus-algorithm) | [gcflplus](#gcflplus-for-gcflplus-algorithm) | [flitplus](#flitplus-for-flitplus-algorithm) |
+
+#### `fedopt`: for FedOpt algorithm
+| Name |  (Type) Default Value | Description | Note |
+|:----:|:-----:|:---------- |:---- |
+| `fedopt.use` | (bool) False | Whether to run FL courses with FedOpt algorithm. | If False, all the related configurations (cfg.fedopt.xxx) would not take effect. |
+| `fedopt.optimizer.type` | (string) 'SGD' | The type of optimizer used for FedOpt algorithm. | Currently we support all optimizers build in PyTorch (The modules under torch.optim). |
+| `fedopt.optimizer.lr` | (float) 0.1 | The learning rate used in for FedOpt optimizer. | - |
+#### `fedprox`: for FedProx algorithm 
+| Name |  (Type) Default Value | Description | Note |
+|:----:|:-----:|:---------- |:---- |
+| `fedprox.use` | (bool) False | Whether to run FL courses with FedProx algorithm. | If False, all the related configurations (cfg.fedprox.xxx) would not take effect. |
+| `fedprox.mu` | (float) 0.0 | The hyper-parameter $\mu$ used in FedProx algorithm. | - |
+#### `personalization`: for personalization algorithms
+| Name |  (Type) Default Value | Description | Note |
+|:----:|:-----:|:---------- |:---- |
+| `personalization.local_param` | (list of str) [] | The client-distinct local param names, e.g., ['pre', 'bn'] | - |
+| `personalization.`</br> `share_non_trainable_para` | (bool) False | Whether transmit non-trainable parameters between FL participants | - |
+| `personalization.`</br> `local_update_steps` | (int) -1 | The local training steps for personalized models | By default, -1 indicates that the local model steps will be set to be the same as the valid `train.local_update_steps` |
+| `personalization.regular_weight` | (float) 0.1 | The regularization factor used for model para regularization methods such as Ditto and pFedMe. | The smaller the regular_weight is, the stronger emphasising on personalized model. |
+| `personalization.lr` | (float) 0.0 | The personalized learning rate used in personalized FL algorithms. | The default value 0.0 indicates that the value will be set to be the same as `train.optimizer.lr` in case of users have not specify a valid `personalization.lr` |
+| `personalization.K` | (int) 5 | The local approximation steps for pFedMe. | - |
+| `personalization.beta` | (float) 5 | The average moving parameter for pFedMe. | - |
+#### `fedsageplus`: for fedsageplus algorithm
+| Name |  (Type) Default Value | Description | Note |
+|:----:|:-----:|:---------- |:---- |
+| `fedsageplus.num_pred` | (int) 5 | Number of nodes generated by the generator | - |
+| `fedsageplus.gen_hidden` | (int) 128 | Hidden layer dimension of generator | - |
+| `fedsageplus.hide_portion` | (float) 0.5 | Hide graph portion | - |
+| `fedsageplus.fedgen_epoch` | (int) 200 | Federated training round for generator | - |
+| `fedsageplus.loc_epoch` | (int) 1 | Local pre-train round for generator | - |
+| `fedsageplus.a` | (float) 1.0 | Coefficient for criterion number of missing node | - |
+| `fedsageplus.b` | (float) 1.0 | Coefficient for criterion feature | - |
+| `fedsageplus.c` | (float) 1.0 | Coefficient for criterion classification | - |
+#### `gcflplus`: for gcflplus algorithm
+| Name |  (Type) Default Value | Description | Note |
+|:----:|:-----:|:---------- |:---- |
+| `gcflplus.EPS_1` | (float) 0.05 | Bound for mean_norm | - |
+| `gcflplus.EPS_2` | (float) 0.1 | Bound for max_norm | - |
+| `gcflplus.seq_length` | (int) 5 | Length of the gradient sequence | - |
+| `gcflplus.standardize` | (bool) False | Whether standardized dtw_distances | - |
+#### `flitplus`: for flitplus algorithm
+| Name |  (Type) Default Value | Description | Note |
+|:----:|:-----:|:---------- |:---- |
+| `flitplus.tmpFed` | (float) 0.5 |  gamma in focal loss (Eq.4) | - |
+| `flitplus.lambdavat` | (float) 0.5 | lambda in phi (Eq.10) | - |
+| `flitplus.factor_ema` | (float) 0.8 | beta in omega (Eq.12) | - |
+| `flitplus.weightReg` | (float) 1.0 | balance lossLocalLabel and lossLocalVAT | - |
+
 
 ### Federated training
 The configurations related to federated training are defined in `cfg_training.py`.
@@ -37,6 +138,8 @@ Considering it's infeasible to list all the potential arguments for optimizers a
 For example, we haven't defined the argument `train.optimizer.weight_decay` in `cfg_training.py`, but the users are allowed directly use it. 
 If the optimizer doesn't require the argument named `weight_decay`, an error will be raised. 
 
+| [Local Training](#local-training) | [Finetune](#fine-tuning) | [Grad Clipping](#grad-clipping) | [Early Stop](#early-stop) | 
+
 #### Local training
 The following configurations are related to the local training. 
 
@@ -79,9 +182,13 @@ The following configurations are related to the grad clipping.
 |   `early_stop.improve_indicaator_mode`   | (string) 'best' | Early stop when there is no improvement within the last `early_step.patience` rounds, in ['mean', 'best'] |                                                                             Chosen from 'mean' or 'best'                                                                              |
 |   `early_step.the_smaller_the_better`    | (bool) True | The optimized direction of the chosen metric |                                                                                           -                                                                                           |
 
+
 ### FL Setting
 The configurations related to FL settings are defined in `cfg_fl_setting.py`.
-#### `federate`: basic fl setting
+
+| [General](#federate-general-fl-setting) | [Distribute](#distribute-for-distribute-mode) | [Vertical](#vertical-for-vertical-federated-learning) | 
+
+#### `federate`: general fl setting
 | Name |  (Type) Default Value |  Description  | Note |
 |:----:|:-----:|:---------- |:---- |
 | `federate.client_num` | (int) 0 | The number of clients that involves in the FL courses. | It can set to 0 to automatically specify by the partition of dataset. |
@@ -117,7 +224,7 @@ The configurations related to FL settings are defined in `cfg_fl_setting.py`.
 | `distribute.data_idx` | (int) -1 | It is used to specify the data index in distributed mode when adopting a centralized dataset for simulation (formatted as {data_idx: data/dataloader}). | `data_idx=-1` means that the entire dataset is owned by the participant. And we randomly sample the index in simulation for other invalid values excepted for -1.
 | `distribute.` </br>`grpc_max_send_message_length` | (int) 100 * 1024 * 1024 | The maximum length of sent messages | - |
 | `distribute.` </br>`grpc_max_receive_message_length` | (int) 100 * 1024 * 1024 | The maximum length of received messages | - |
-| `distribute.`grpc_enable_http_proxy | (bool) False | Whether to enable http proxy | - |
+| `distribute.grpc_enable_http_proxy` | (bool) False | Whether to enable http proxy | - |
 #### `vertical`: for vertical federated learning
 | Name |  (Type) Default Value |  Description  | Note |
 |:----:|:-----:|:---------- |:---- |
@@ -126,6 +233,32 @@ The configurations related to FL settings are defined in `cfg_fl_setting.py`.
 | `vertical.dims` | (list of int) [5,10] | The dimensions of the input features for participants. | - |
 | `vertical.key_size` | (int) 3072 | The length (bit) of the public keys. | - | 
 
+
+### Evaluation
+The configurations related to monitoring and evaluation, which are adefined in `cfg_evaluation.py`.
+
+| [General](#evaluation-general) | [WandB](#wandb-for-wandb-tracking-and-visualization) |
+
+#### Evaluation General
+| Name |  (Type) Default Value | Description | Note |
+|:----:|:-----:|:---------- |:---- |
+| `eval.freq` | (int) 1 | The frequency we conduct evaluation. | - |
+| `eval.metrics` | (list of str) [] | The names of adopted evaluation metrics. | By default, we calculate the ['loss', 'avg_loss', 'total'], all the supported metric can be find in `core/monitors/metric_calculator.py` |
+| `eval.split` | (list of str) ['test', 'val'] | The data splits' names we conduct evaluation. | - |
+| `eval.report` | (list of str) ['weighted_avg', 'avg', 'fairness', 'raw'] | The results reported forms to loggers | By default, we report comprehensive results, - `weighted_avg` and `avg` indicate the weighted average and uniform average over all evaluated clients; - `fairness` indicates report fairness-related results such as individual performance and std across all evaluated clients; - `raw` indicates that we save and compress all clients' individual results without summarization, and users can flexibly post-process the saved results further.|
+| `eval.`</br> `best_res_update_round_wise_key` | (str) 'val_loss' | The metric name we used to as the primary key to check the performance improvement at each evaluation round. | - |
+| `eval.monitoring` | (list of str) [] | Extended monitoring methods or metric, e.g., 'dissim' for B-local dissimilarity | - |
+| `eval.count_flops` | (bool) True | Whether to count the flops during the FL courses. | - |
+#### `wandb`: for wandb tracking and visualization
+| Name |  (Type) Default Value | Description | Note |
+|:----:|:-----:|:---------- |:---- |
+| `wandb.use` | (bool) False | Whether to use wandb to track and visualize the FL dynamics and results. | If `False`, all the related configurations (`wandb.xxx`) would not take effect. |
+| `wandb.name_user` | (str) '' | the user name used in wandb management | - |
+| `wandb.name_project` | (str) '' | the project name used in wandb management | - |
+| `wandb.online_track` | (bool) True | whether to track the results in an online manner, i.e., log results at every evaluation round | - |
+| `wandb.client_train_info` | (bool) True | whether to track the training info of clients | - |
+
+
 ### Asynchronous Training Strategies
 The configurations related to applying asynchronous training strategies in FL are defined in `cfg_asyn.py`.
 
@@ -141,7 +274,9 @@ The configurations related to applying asynchronous training strategies in FL ar
 | `asyn.broadcast_manner` | (string) 'after_aggregating' </br> Choices: {'after_aggregating', 'after_receiving'} | The broadcasting manner of server. | 'after_aggregating': broadcast the up-to-date global model after performing federated aggregation; 'after_receiving': broadcast the up-to-date global model after receiving the model update from clients. |
 | `asyn.overselection` | (bool) False | Whether to use the overselection technique | - |
 
+
 ### Differential Privacy
+| [NbAFL](#nbafl) | [SGDMF](#sgdmf) | 
 
 #### NbAFL
 The configurations related to NbAFL method. 
@@ -166,11 +301,14 @@ The configurations related to SGDMF method (only used in matrix factorization ta
 | `sgdmf.constant` |      (float) 1. | The constant in SGDMF | -                                                       |
 | `sgdmf.theta` | (int) -1 | - | -1 means per-rating privacy, otherwise per-user privacy |
 
+
 ### Auto-tuning Components
 
 These arguments are exposed for customizing our provided auto-tuning components.
 
-#### General
+| [General](#auto-tunning-general) | [SHA](#successive-halving-algorithm-sha) | [FedEx](#fedex) | [Wrappers for FedEx](#wrappers-for-fedex) | 
+
+#### Auto-tunning General
 
 | Name | (Type) Default Value | Description                                | Note |
 |:----:|:--------------------:|:-------------------------------------------|:-----|
@@ -209,4 +347,49 @@ These arguments are exposed for customizing our provided auto-tuning components.
 |:----:|:--------------------:|:-------------------------------------------|:-----|
 | `hpo.table.eps` |     (float) 0.1 | The probability to make local perturbation.        | Larger values lead to drastically different arms of the bandit FedEx attempts to solve. |
 | `hpo.table.num` |     (int) 27 | The number of arms of the bandit FedEx attempts to solve.        | - |
-| `hpo.table.idx` |     (int) 0 | The key (i.e., name) of the hyperparameter wrapper considers.        | No need to change this argument. |
\ No newline at end of file
+| `hpo.table.idx` |     (int) 0 | The key (i.e., name) of the hyperparameter wrapper considers.        | No need to change this argument. |
+
+
+### Attack 
+
+The configurations related to the data/dataset are defined in `cfg_attack.py`.
+
+| [Privacy Attack](#for-privacy-attack) | [Back-door Attack](#for-back-door-attack) | 
+
+
+#### For Privacy Attack
+| Name |  (Type) Default Value |  Description  | Note |
+|:----:|:-----:|:---------- |:---- |
+`attack.attack_method` | (str) '' | Attack method name | Choices: {'gan_attack', 'GradAscent', 'PassivePIA', 'DLG', 'IG', 'backdoor'} |
+`attack.target_label_ind` | (int) -1 | The target label to attack | Used in class representative attack (GAN based method) and back-door attack; defult -1 means no label to target|
+`attack.attacker_id` | (int) -1 | The id of the attack client | Default -1 means no client as attacker; Used in both privacy attack and back-door attack when client is the attacker |
+`attack.reconstruct_lr `| (float) 0.01 | The learning rate of the optimization based training data/label inference attack|-|
+`attack.reconstruct_optim` | (str) 'Adam' | The learning rate of the optimization based training data/label inference attack|Choices: {'Adam', 'SGD', 'LBFGS'}|
+`attack.info_diff_type` | (str) 'l2' | The distance to compare the ground-truth info (gradients or model updates) and the info generated by the dummy data. | Options: 'l2', 'l1', 'sim' representing L2, L1 and cosin similarity |
+`attack.max_ite` | (int) 400 | The maximum iteration of the optimization based training data/label inference attack |-|
+`attack.alpha_TV` | (float) 0.001 | The hyperparameter of the total variance term | Used in the mehtod invert gradint |
+`attack.inject_round` | (int) 0 | The round to start performing the attack actions |-|
+`attack.classifier_PIA` | (str) 'randomforest' | The property inference classifier name |-|
+
+#### For Back-door Attack
+| Name |  (Type) Default Value |  Description  | Note |
+|:----:|:-----:|:---------- |:---- |
+`attack.edge_path` |(str) 'edge_data/' | The folder where the ood data used by edge-case backdoor attacks located  |-|
+`attack.trigger_path` |(str) 'trigger/'|The folder where the trigger pictures used by pixel-wise backdoor attacks located  |-|
+`attack.setting` | (str) 'fix'| The setting about how to select the attack client. |Choices:{'fix', 'single', and 'all'}, 'single' setting means the attack client can be only selected in the predefined round (cfg.attack.insert_round). 'all' setting means the attack client can be selected in all round. 'fix' setting means that the attack client can be selected every freq round. freq has beed defined in the cfg.attack.freq keyword.|
+`attack.freq` | (int) 10 |This keyword is used in the 'fix' setting. The attack client can be selected every freq round.|-| 
+`attack.insert_round` |(int) 100000 |This keyword is used in the 'single' setting. The attack client can be only selected in the insert_round round.|-|
+`attack.mean` |(list) [0.1307] |The mean value which is used in the normalization procedure of poisoning data. |Notice: The length of this list must be same as the number of channels of used dataset.|
+`attack.std` |(list) [0.3081] |The std value which is used in the normalization procedure of poisoning data.|Notice: The length of this list must be same as the number of channels of used dataset.|
+`attack.trigger_type`|(str) 'edge'|This keyword represents the type of used triggers|Choices: {'edge', 'gridTrigger', 'hkTrigger', 'sigTrigger', 'wanetTrigger', 'fourCornerTrigger'}|
+`attack.label_type` |(str) 'dirty'| This keyword represents the type of used attack.|It contains 'dirty'-label and 'clean'-label attacks. Now, we only support 'dirty'-label attack. |
+`attack.edge_num` |(int) 100 | This keyword represents the number of used good samples for edge-case attack.|-|
+`attack.poison_ratio` |(float) 0.5|This keyword represents the percentage of samples with pixel-wise triggers in the local dataset of attack client|-|
+`attack.scale_poisoning` |(bool) False| This keyword represents whether to use the model scaling attack for attack client. |-|
+`attack.scale_para` |(float) 1.0 |This keyword represents the value to amplify the model update when conducting the model scaling attack.|-|
+`attack.pgd_poisoning` |(bool) False|This keyword represents whether to use the pgd to train the local model for attack client. |-|
+`attack.pgd_lr` | (float) 0.1 |This keyword represents learning rate of pgd training for attack client.|-|
+`attack.pgd_eps`|(int) 2 | This keyword represents perturbation budget of pgd training for attack client.|-|
+`attack.self_opt` |(bool) False |This keyword represents whether to use his own training procedure for attack client.|-|
+`attack.self_lr` |(float) 0.05|This keyword represents learning rate of his own training procedure for attack client.|-|
+`attack.self_epoch` |(int) 6 |This keyword represents epoch number of his own training procedure for attack client.|-|
diff --git a/federatedscope/core/configs/cfg_data.py b/federatedscope/core/configs/cfg_data.py
index 65c7361b5..dea7c2091 100644
--- a/federatedscope/core/configs/cfg_data.py
+++ b/federatedscope/core/configs/cfg_data.py
@@ -10,6 +10,7 @@ def extend_data_cfg(cfg):
 
     cfg.data.root = 'data'
     cfg.data.type = 'toy'
+    cfg.data.save_data = False  # whether to save the generated toy data
     cfg.data.args = []  # args for external dataset, eg. [{'download': True}]
     cfg.data.splitter = ''
     cfg.data.splitter_args = []  # args for splitter, eg. [{'alpha': 0.5}]
diff --git a/federatedscope/core/configs/cfg_evaluation.py b/federatedscope/core/configs/cfg_evaluation.py
index 065b991c5..09b9cdd48 100644
--- a/federatedscope/core/configs/cfg_evaluation.py
+++ b/federatedscope/core/configs/cfg_evaluation.py
@@ -10,7 +10,6 @@ def extend_evaluation_cfg(cfg):
     cfg.eval = CN(
         new_allowed=True)  # allow user to add their settings under `cfg.eval`
 
-    cfg.eval.save_data = False
     cfg.eval.freq = 1
     cfg.eval.metrics = []
     cfg.eval.split = ['test', 'val']
diff --git a/federatedscope/core/configs/cfg_fl_algo.py b/federatedscope/core/configs/cfg_fl_algo.py
index 6443cb83d..e1f242bf5 100644
--- a/federatedscope/core/configs/cfg_fl_algo.py
+++ b/federatedscope/core/configs/cfg_fl_algo.py
@@ -54,13 +54,21 @@ def extend_fl_algo_cfg(cfg):
     # ---------------------------------------------------------------------- #
     cfg.fedsageplus = CN()
 
+    # Number of nodes generated by the generator
     cfg.fedsageplus.num_pred = 5
+    # Hidden layer dimension of generator
     cfg.fedsageplus.gen_hidden = 128
+    # Hide graph portion
     cfg.fedsageplus.hide_portion = 0.5
+    # Federated training round for generator
     cfg.fedsageplus.fedgen_epoch = 200
+    # Local pre-train round for generator
     cfg.fedsageplus.loc_epoch = 1
+    # Coefficient for criterion number of missing node
     cfg.fedsageplus.a = 1.0
+    # Coefficient for criterion feature
     cfg.fedsageplus.b = 1.0
+    # Coefficient for criterion classification
     cfg.fedsageplus.c = 1.0
 
     # ---------------------------------------------------------------------- #
@@ -68,9 +76,13 @@ def extend_fl_algo_cfg(cfg):
     # ---------------------------------------------------------------------- #
     cfg.gcflplus = CN()
 
+    # Bound for mean_norm
     cfg.gcflplus.EPS_1 = 0.05
+    # Bound for max_norm
     cfg.gcflplus.EPS_2 = 0.1
+    # Length of the gradient sequence
     cfg.gcflplus.seq_length = 5
+    # Whether standardized dtw_distances
     cfg.gcflplus.standardize = False
 
     # ---------------------------------------------------------------------- #
diff --git a/federatedscope/core/fed_runner.py b/federatedscope/core/fed_runner.py
index 11db56bbb..3df4c8179 100644
--- a/federatedscope/core/fed_runner.py
+++ b/federatedscope/core/fed_runner.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from federatedscope.core.worker import Server, Client
+from federatedscope.core.workers import Server, Client
 from federatedscope.core.gpu_manager import GPUManager
 from federatedscope.core.auxiliaries.model_builder import get_model
 from federatedscope.core.auxiliaries.data_builder import merge_data
diff --git a/federatedscope/core/worker/__init__.py b/federatedscope/core/workers/__init__.py
similarity index 53%
rename from federatedscope/core/worker/__init__.py
rename to federatedscope/core/workers/__init__.py
index 05be87247..777c989da 100644
--- a/federatedscope/core/worker/__init__.py
+++ b/federatedscope/core/workers/__init__.py
@@ -3,8 +3,8 @@
 from __future__ import division
 from __future__ import with_statement
 
-from federatedscope.core.worker.base_worker import Worker
-from federatedscope.core.worker.server import Server
-from federatedscope.core.worker.client import Client
+from federatedscope.core.workers.base_worker import Worker
+from federatedscope.core.workers.server import Server
+from federatedscope.core.workers.client import Client
 
 __all__ = ['Worker', 'Server', 'Client']
diff --git a/federatedscope/core/worker/base_worker.py b/federatedscope/core/workers/base_worker.py
similarity index 100%
rename from federatedscope/core/worker/base_worker.py
rename to federatedscope/core/workers/base_worker.py
diff --git a/federatedscope/core/worker/client.py b/federatedscope/core/workers/client.py
similarity index 99%
rename from federatedscope/core/worker/client.py
rename to federatedscope/core/workers/client.py
index 366885453..86a399a3a 100644
--- a/federatedscope/core/worker/client.py
+++ b/federatedscope/core/workers/client.py
@@ -7,7 +7,7 @@
 from federatedscope.core.communication import StandaloneCommManager, \
     gRPCCommManager
 from federatedscope.core.monitors.early_stopper import EarlyStopper
-from federatedscope.core.worker import Worker
+from federatedscope.core.workers import Worker
 from federatedscope.core.auxiliaries.trainer_builder import get_trainer
 from federatedscope.core.secret_sharing import AdditiveSecretSharing
 from federatedscope.core.auxiliaries.utils import merge_dict, \
diff --git a/federatedscope/core/worker/server.py b/federatedscope/core/workers/server.py
similarity index 99%
rename from federatedscope/core/worker/server.py
rename to federatedscope/core/workers/server.py
index ad3c5ca71..cdfb22420 100644
--- a/federatedscope/core/worker/server.py
+++ b/federatedscope/core/workers/server.py
@@ -10,7 +10,7 @@
 from federatedscope.core.message import Message
 from federatedscope.core.communication import StandaloneCommManager, \
     gRPCCommManager
-from federatedscope.core.worker import Worker
+from federatedscope.core.workers import Worker
 from federatedscope.core.auxiliaries.aggregator_builder import get_aggregator
 from federatedscope.core.auxiliaries.sampler_builder import get_sampler
 from federatedscope.core.auxiliaries.utils import merge_dict, Timeout, \
diff --git a/federatedscope/gfl/fedsageplus/worker.py b/federatedscope/gfl/fedsageplus/worker.py
index c5bf0f26c..f1812598d 100644
--- a/federatedscope/gfl/fedsageplus/worker.py
+++ b/federatedscope/gfl/fedsageplus/worker.py
@@ -5,8 +5,8 @@
 from torch_geometric.loader import NeighborSampler
 
 from federatedscope.core.message import Message
-from federatedscope.core.worker.server import Server
-from federatedscope.core.worker.client import Client
+from federatedscope.core.workers.server import Server
+from federatedscope.core.workers.client import Client
 from federatedscope.core.auxiliaries.utils import merge_dict
 
 from federatedscope.gfl.trainer.nodetrainer import NodeMiniBatchTrainer
diff --git a/federatedscope/gfl/gcflplus/worker.py b/federatedscope/gfl/gcflplus/worker.py
index 3daea4175..f368a911d 100644
--- a/federatedscope/gfl/gcflplus/worker.py
+++ b/federatedscope/gfl/gcflplus/worker.py
@@ -4,8 +4,8 @@
 import numpy as np
 
 from federatedscope.core.message import Message
-from federatedscope.core.worker.server import Server
-from federatedscope.core.worker.client import Client
+from federatedscope.core.workers.server import Server
+from federatedscope.core.workers.client import Client
 from federatedscope.core.auxiliaries.utils import merge_dict
 from federatedscope.gfl.gcflplus.utils import compute_pairwise_distances, \
     min_cut, norm
diff --git a/federatedscope/vertical_fl/worker/vertical_client.py b/federatedscope/vertical_fl/worker/vertical_client.py
index 50f34e3b0..b463ea82e 100644
--- a/federatedscope/vertical_fl/worker/vertical_client.py
+++ b/federatedscope/vertical_fl/worker/vertical_client.py
@@ -1,7 +1,7 @@
 import numpy as np
 import logging
 
-from federatedscope.core.worker import Client
+from federatedscope.core.workers import Client
 from federatedscope.core.message import Message
 from federatedscope.vertical_fl.dataloader.utils import batch_iter
 
diff --git a/federatedscope/vertical_fl/worker/vertical_server.py b/federatedscope/vertical_fl/worker/vertical_server.py
index 03a38b114..d1e2da946 100644
--- a/federatedscope/vertical_fl/worker/vertical_server.py
+++ b/federatedscope/vertical_fl/worker/vertical_server.py
@@ -1,7 +1,7 @@
 import numpy as np
 import logging
 
-from federatedscope.core.worker import Server
+from federatedscope.core.workers import Server
 from federatedscope.core.message import Message
 from federatedscope.vertical_fl.Paillier import abstract_paillier