In [None]:
import torch
from peft import (
    get_peft_model_state_dict,
    set_peft_model_state_dict,
    PeftType,
)
from datasets import load_dataset

batch_size = 32
model_name_or_path = "roberta-large"
dataset_name_or_path = "stanfordnlp/snli"
task = "snli"
peft_type = PeftType.LORA
device = "cuda"

In [None]:
datasets = load_dataset(dataset_name_or_path, split="train[:60%]")
datasets_test = load_dataset(dataset_name_or_path, split="test")

In [4]:
from transformers import RobertaConfig, RobertaTokenizer, RobertaForSequenceClassification, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    llm_int8_skip_modules=["classifier"]
)

model_name = "FacebookAI/roberta-large"
config = RobertaConfig.from_pretrained(
    model_name,
    num_labels=3,
)
tokenizer = RobertaTokenizer.from_pretrained(
    "FacebookAI/roberta-large",
    do_lower_case=False,
)
model = RobertaForSequenceClassification.from_pretrained(
    "FacebookAI/roberta-large",
    config=config,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
)

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
import torch
import torch.nn as nn
import transformers
from torch.utils.data import TensorDataset
from transformers.data.processors.utils import InputExample
from transformers.data.processors.glue import glue_convert_examples_to_features

LABEL_LIST = [0, 1, 2]

def _create_examples(dataset, set_type):
    """ Convert raw dataframe to a list of InputExample. Filter malformed examples
    """
    examples = []
    for index, item in enumerate(dataset):
        if item['label'] not in LABEL_LIST:
            continue
        if not isinstance(item['premise'], str) or not isinstance(item['hypothesis'], str):
            continue
        guid = f"{index}-{set_type}"
        examples.append(
            InputExample(guid=guid, text_a=item['premise'], text_b=item['hypothesis'], label=item['label']))
    return examples

def _dataset_to_features(dataset, set_type):
    """ Pre-process text. This method will:
    1) tokenize inputs
    2) cut or pad each sequence to MAX_SEQ_LENGHT
    3) convert tokens into ids

    The output will contain:
    `input_ids` - padded token ids sequence
    `attention mask` - mask indicating padded tokens
    `token_type_ids` - mask indicating the split between premise and hypothesis
    `label` - label
    """
    examples = _create_examples(dataset, set_type)

    #backward compatibility with older transformers versions
    legacy_kwards = {}
    from packaging import version
    if version.parse(transformers.__version__) < version.parse("2.9.0"):
        legacy_kwards = {
            "pad_on_left": False,
            "pad_token": tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
            "pad_token_segment_id": 0,
        }

    return glue_convert_examples_to_features(
        examples=examples,
        tokenizer=tokenizer,
        label_list=LABEL_LIST,
        max_length=128,
        output_mode="classification",
        **legacy_kwards,
    )

def _features_to_dataset(features):
    """ Convert features from `_df_to_features` into a single dataset
    """
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_attention_mask = torch.tensor(
        [f.attention_mask for f in features], dtype=torch.long
    )
    all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
    dataset = TensorDataset(
        all_input_ids, all_attention_mask, all_labels
    )

    return dataset


train_features = _dataset_to_features(datasets, "train")
test_features = _dataset_to_features(datasets_test, "test")

train_dataset = _features_to_dataset(train_features)
test_dataset = _features_to_dataset(test_features)



In [7]:
BATCH_SIZE = 256
MAX_PHYSICAL_BATCH_SIZE = 64

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from opacus.utils.uniform_sampler import UniformWithReplacementSampler

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, sampler=RandomSampler(test_dataset), batch_size=BATCH_SIZE)

In [None]:
from peft import get_peft_model, LoraConfig, TaskType

total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters count: {total_params:,}")


lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # our particular task is sequence classification
    inference_mode=False,  # Enable training mode
    r=8,  # Low-rank dimension
    lora_alpha=8,  # Alpha scaling factor
    lora_dropout=0.05,  # Dropout for LoRA layers
    target_modules=["query", "value"],
)

model_with_lora = get_peft_model(model, lora_config)
trainable_params = sum(p.numel() for p in model_with_lora.parameters() if p.requires_grad)
print(f"Total trainable parameters with LoRA: {trainable_params:,}")

# FFA-LoRA modification: freeze all adapter A matrices so that only B matrices are trainable
for name, param in model_with_lora.named_parameters():
    if "lora_A" in name or "classifier" in name:
        param.requires_grad = False

# LoRA
# for name, param in model_with_lora.named_parameters():
#     if "classifier" in name:
#         param.requires_grad = False

trainable_params = sum(p.numel() for p in model_with_lora.parameters() if p.requires_grad)
print(f"Total trainable parameters with LoRA after freezing matrix A: {trainable_params:,}")

Total parameters count: 53,151,747
Total trainable parameters with LoRA: 1,839,107
Total trainable parameters with LoRA after freezing matrix A: 393,216


In [None]:
EPOCHS = 1
LOGGING_INTERVAL = 200 # once every how many steps we run evaluation cycle and report metrics
EPSILON = 6.0
DELTA = 1e-5 # probability of not achieving privacy guarantees
MAX_GRAD_NORM = 10.0

In [None]:
def accuracy(preds, labels):
    return (preds == labels).float().mean().item()

def evaluate(model):
    model.eval()

    loss_arr = []
    accuracy_arr = []

    for batch in test_dataloader:
        batch = tuple(t.to(device) for t in batch)

        with torch.no_grad():
            inputs = {'input_ids':      batch[0],
                      'attention_mask': batch[1],
                      'labels':         batch[2]}

            outputs = model(**inputs)
            loss, logits = outputs[:2]

            preds = torch.argmax(logits, dim=1)
            labels = inputs['labels']

            loss_arr.append(loss.item())
            accuracy_arr.append(accuracy(preds, labels))

    model.train()
    avg_loss = sum(loss_arr) / len(loss_arr)
    avg_accuracy = sum(accuracy_arr) / len(accuracy_arr)
    return avg_loss, avg_accuracy

In [None]:
import copy
import numpy as np
import torch
from torch.utils.data import DataLoader, Subset, RandomSampler
from opacus import PrivacyEngine
from opacus.utils.batch_memory_manager import BatchMemoryManager
from tqdm import tqdm

# --- PARTITIONING ---

def partition_dataset_homogeneous(dataset: TensorDataset, num_clients=3):
    indices = np.random.permutation(len(dataset))
    splits = np.array_split(indices, num_clients)
    return [Subset(dataset, split) for split in splits]

def partition_dataset_heterogeneous(dataset: TensorDataset, num_clients=3):
    """
    3-class classification task, partitioning scheme:
      - Client 0: 90% of samples have label 0, 5% label 1, 5% label 2
      - Client 1: 90% label 1, 5% label 0, 5% label 2
      - Client 2: 90% label 2, 5% label 0, 5% label 1
    """
    # Extract labels as numpy array
    labels = np.array([sample.item() for sample in dataset.tensors[2]])
    indices = np.arange(len(labels))
    partitions = []
    
    proportions = [
        {0: 0.9, 1: 0.05, 2: 0.05},
        {1: 0.9, 0: 0.05, 2: 0.05},
        {2: 0.9, 0: 0.05, 1: 0.05},
    ]
    for client in range(num_clients):
        client_indices = []
        for label in [0, 1, 2]:
            label_indices = indices[labels == label]
            # Determine number of samples to pick for this label from client partition
            num_samples = int(len(label_indices) * proportions[client][label])
            if num_samples > 0:
                selected = np.random.choice(label_indices, size=num_samples, replace=False)
                client_indices.extend(selected)
        partitions.append(Subset(dataset, client_indices))
    return partitions


# client_train_partitions = partition_dataset_homogeneous(train_dataset, num_clients=3)

client_train_partitions = partition_dataset_heterogeneous(train_dataset, num_clients=3)

# consideration
client_test_partitions = [test_dataset] * 3

# --- FEDERATED TRAINING LOOP ---

def FedAvg(w):
    w_avg = copy.deepcopy(w[0])
    for k in w_avg.keys():
        for i in range(1, len(w)):
            w_avg[k] += w[i][k]
        w_avg[k] = torch.div(w_avg[k], len(w))
    return w_avg


global_model = copy.deepcopy(model_with_lora).to(device)
global_model.train()

num_clients = 3

NUM_GLOBAL_ROUNDS = 20


for round in range(1, NUM_GLOBAL_ROUNDS + 1):
    print(f"Global Round {round}")
    client_params_list = []
    client_sample_counts = []

    for client_id in range(num_clients):
        print(f"  Client {client_id} training...")

        local_model = copy.deepcopy(global_model).to(device)
        local_model.train()
        # local dataloader
        local_train_loader = DataLoader(
            client_train_partitions[client_id],
            sampler=RandomSampler(client_train_partitions[client_id]),
            batch_size=BATCH_SIZE,
        )

        
        local_optimizer = torch.optim.SGD(local_model.parameters(), lr=0.2)
        
        privacy_engine = PrivacyEngine()

        local_model, local_optimizer, local_train_loader = privacy_engine.make_private_with_epsilon(
            module=local_model,
            optimizer=local_optimizer,
            data_loader=local_train_loader,
            target_delta=DELTA,
            target_epsilon=EPSILON,
            epochs=EPOCHS,  # local training epochs
            max_grad_norm=MAX_GRAD_NORM,
        )

        for epoch in range(1, EPOCHS+1):
            local_losses = []
            with BatchMemoryManager(
                data_loader=local_train_loader,
                max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE,
                optimizer=local_optimizer
            ) as memory_safe_loader:
                for step, batch in enumerate(tqdm(memory_safe_loader, desc=f"Client {client_id} training")):
                    local_optimizer.zero_grad()
                    batch = tuple(t.to(device) for t in batch)
                    inputs = {
                        'input_ids': batch[0],
                        'attention_mask': batch[1],
                        'labels': batch[2],
                    }
                    outputs = local_model(**inputs)
                    loss = outputs[0]
                    loss.backward()
                    local_losses.append(loss.item())
                    local_optimizer.step()
                    if step > 0 and step % LOGGING_INTERVAL == 0:
                        train_loss = np.mean(local_losses)
                        eps = privacy_engine.get_epsilon(DELTA)

                        eval_loss, eval_accuracy = evaluate(local_model)

                        print(
                            f"Step: {step} | "
                            f"Train loss: {train_loss:.3f} | "
                            f"Eval loss: {eval_loss:.3f} | "
                            f"Eval accuracy: {eval_accuracy:.3f} | "
                            f"ɛ: {eps:.2f} "
                        )

        print(f"    Local training loss: {np.mean(local_losses):.3f}")
        # get parameters
        local_params = get_peft_model_state_dict(local_model.base_model)
        client_params_list.append(local_params)
        client_sample_counts.append(len(client_train_partitions[client_id]))

    # aggregate
    aggregated_params = FedAvg(client_params_list)
    # update ~.base_model!!!
    set_peft_model_state_dict(global_model.base_model, aggregated_params)

    # eval global
    global_eval_loss, global_eval_accuracy = evaluate(global_model)

    print(
        f"After Global Round {round}: Eval loss: {global_eval_loss:.3f} | "
        f"Eval accuracy: {global_eval_accuracy:.3f} | ɛ: {eps:.2f}"
    )

Global Round 1
  Client 0 training...


Client 0 training:   7%|▋         | 201/2865 [01:05<3:13:07,  4.35s/it]

Step: 200 | Train loss: 0.639 | Eval loss: 2.126 | Eval accuracy: 0.342 | ɛ: 3.93 


Client 0 training:  14%|█▍        | 401/2865 [02:11<2:58:53,  4.36s/it]

Step: 400 | Train loss: 0.536 | Eval loss: 2.183 | Eval accuracy: 0.342 | ɛ: 4.38 


Client 0 training:  21%|██        | 601/2865 [03:16<2:45:15,  4.38s/it]

Step: 600 | Train loss: 0.490 | Eval loss: 2.212 | Eval accuracy: 0.345 | ɛ: 4.64 


Client 0 training:  28%|██▊       | 801/2865 [04:22<2:30:58,  4.39s/it]

Step: 800 | Train loss: 0.469 | Eval loss: 2.206 | Eval accuracy: 0.342 | ɛ: 4.83 


Client 0 training:  35%|███▍      | 1001/2865 [05:29<2:16:47,  4.40s/it]

Step: 1000 | Train loss: 0.455 | Eval loss: 2.248 | Eval accuracy: 0.343 | ɛ: 4.99 


Client 0 training:  42%|████▏     | 1201/2865 [06:35<2:01:27,  4.38s/it]

Step: 1200 | Train loss: 0.449 | Eval loss: 2.138 | Eval accuracy: 0.343 | ɛ: 5.12 


Client 0 training:  49%|████▉     | 1401/2865 [07:41<1:46:04,  4.35s/it]

Step: 1400 | Train loss: 0.443 | Eval loss: 2.155 | Eval accuracy: 0.344 | ɛ: 5.24 


Client 0 training:  56%|█████▌    | 1601/2865 [08:48<1:32:21,  4.38s/it]

Step: 1600 | Train loss: 0.440 | Eval loss: 2.159 | Eval accuracy: 0.343 | ɛ: 5.35 


Client 0 training:  63%|██████▎   | 1801/2865 [09:54<1:17:08,  4.35s/it]

Step: 1800 | Train loss: 0.435 | Eval loss: 2.153 | Eval accuracy: 0.344 | ɛ: 5.45 


Client 0 training:  70%|██████▉   | 2001/2865 [10:59<1:03:17,  4.40s/it]

Step: 2000 | Train loss: 0.431 | Eval loss: 2.168 | Eval accuracy: 0.343 | ɛ: 5.54 


Client 0 training:  77%|███████▋  | 2201/2865 [12:05<48:46,  4.41s/it]  

Step: 2200 | Train loss: 0.430 | Eval loss: 2.117 | Eval accuracy: 0.341 | ɛ: 5.63 


Client 0 training:  84%|████████▍ | 2401/2865 [13:12<33:58,  4.39s/it]

Step: 2400 | Train loss: 0.427 | Eval loss: 2.145 | Eval accuracy: 0.343 | ɛ: 5.71 


Client 0 training:  91%|█████████ | 2601/2865 [14:19<19:28,  4.43s/it]

Step: 2600 | Train loss: 0.425 | Eval loss: 2.203 | Eval accuracy: 0.343 | ɛ: 5.79 


Client 0 training:  98%|█████████▊| 2801/2865 [15:25<04:39,  4.37s/it]

Step: 2800 | Train loss: 0.423 | Eval loss: 2.166 | Eval accuracy: 0.344 | ɛ: 5.86 


Client 0 training: 3001it [16:31,  4.42s/it]                          

Step: 3000 | Train loss: 0.422 | Eval loss: 2.137 | Eval accuracy: 0.342 | ɛ: 5.93 


Client 0 training: 3174it [17:17,  3.06it/s]


    Local training loss: 0.421
  Client 1 training...


Client 1 training:   7%|▋         | 201/2856 [01:04<3:12:46,  4.36s/it]

Step: 200 | Train loss: 0.519 | Eval loss: 2.236 | Eval accuracy: 0.329 | ɛ: 3.92 


Client 1 training:  14%|█▍        | 401/2856 [02:08<2:57:29,  4.34s/it]

Step: 400 | Train loss: 0.455 | Eval loss: 2.298 | Eval accuracy: 0.329 | ɛ: 4.36 


Client 1 training:  21%|██        | 601/2856 [03:11<2:43:32,  4.35s/it]

Step: 600 | Train loss: 0.436 | Eval loss: 2.264 | Eval accuracy: 0.327 | ɛ: 4.63 


Client 1 training:  28%|██▊       | 801/2856 [04:16<2:29:14,  4.36s/it]

Step: 800 | Train loss: 0.429 | Eval loss: 2.258 | Eval accuracy: 0.327 | ɛ: 4.82 


Client 1 training:  35%|███▌      | 1001/2856 [05:21<2:14:59,  4.37s/it]

Step: 1000 | Train loss: 0.421 | Eval loss: 2.241 | Eval accuracy: 0.328 | ɛ: 4.98 


Client 1 training:  42%|████▏     | 1201/2856 [06:27<2:01:04,  4.39s/it]

Step: 1200 | Train loss: 0.418 | Eval loss: 2.265 | Eval accuracy: 0.329 | ɛ: 5.12 


Client 1 training:  49%|████▉     | 1401/2856 [07:33<1:46:18,  4.38s/it]

Step: 1400 | Train loss: 0.416 | Eval loss: 2.184 | Eval accuracy: 0.329 | ɛ: 5.24 


Client 1 training:  56%|█████▌    | 1601/2856 [08:39<1:32:21,  4.42s/it]

Step: 1600 | Train loss: 0.413 | Eval loss: 2.239 | Eval accuracy: 0.327 | ɛ: 5.35 


Client 1 training:  63%|██████▎   | 1801/2856 [09:45<1:16:30,  4.35s/it]

Step: 1800 | Train loss: 0.411 | Eval loss: 2.234 | Eval accuracy: 0.327 | ɛ: 5.44 


Client 1 training:  70%|███████   | 2001/2856 [10:51<1:02:25,  4.38s/it]

Step: 2000 | Train loss: 0.409 | Eval loss: 2.231 | Eval accuracy: 0.329 | ɛ: 5.53 


Client 1 training:  77%|███████▋  | 2201/2856 [11:57<48:09,  4.41s/it]  

Step: 2200 | Train loss: 0.409 | Eval loss: 2.118 | Eval accuracy: 0.328 | ɛ: 5.62 


Client 1 training:  84%|████████▍ | 2401/2856 [13:01<33:18,  4.39s/it]

Step: 2400 | Train loss: 0.407 | Eval loss: 2.202 | Eval accuracy: 0.328 | ɛ: 5.70 


Client 1 training:  91%|█████████ | 2601/2856 [14:08<18:33,  4.37s/it]

Step: 2600 | Train loss: 0.408 | Eval loss: 2.169 | Eval accuracy: 0.327 | ɛ: 5.78 


Client 1 training:  98%|█████████▊| 2801/2856 [15:13<03:59,  4.36s/it]

Step: 2800 | Train loss: 0.407 | Eval loss: 2.173 | Eval accuracy: 0.328 | ɛ: 5.85 


Client 1 training: 3001it [16:18,  4.36s/it]                          

Step: 3000 | Train loss: 0.408 | Eval loss: 2.137 | Eval accuracy: 0.329 | ɛ: 5.92 


Client 1 training: 3201it [17:23,  4.37s/it]

Step: 3200 | Train loss: 0.408 | Eval loss: 2.166 | Eval accuracy: 0.327 | ɛ: 5.99 


Client 1 training: 3221it [17:28,  3.07it/s]


    Local training loss: 0.408
  Client 2 training...


Client 2 training:   7%|▋         | 201/2862 [01:05<3:12:50,  4.35s/it]

Step: 200 | Train loss: 0.566 | Eval loss: 2.301 | Eval accuracy: 0.330 | ɛ: 3.92 


Client 2 training:  14%|█▍        | 401/2862 [02:10<2:58:14,  4.35s/it]

Step: 400 | Train loss: 0.485 | Eval loss: 2.349 | Eval accuracy: 0.329 | ɛ: 4.36 


Client 2 training:  21%|██        | 601/2862 [03:14<2:45:13,  4.38s/it]

Step: 600 | Train loss: 0.459 | Eval loss: 2.300 | Eval accuracy: 0.330 | ɛ: 4.64 


Client 2 training:  28%|██▊       | 801/2862 [04:20<2:30:37,  4.39s/it]

Step: 800 | Train loss: 0.443 | Eval loss: 2.403 | Eval accuracy: 0.329 | ɛ: 4.83 


Client 2 training:  35%|███▍      | 1001/2862 [05:25<2:14:54,  4.35s/it]

Step: 1000 | Train loss: 0.439 | Eval loss: 2.254 | Eval accuracy: 0.330 | ɛ: 4.98 


Client 2 training:  42%|████▏     | 1201/2862 [06:32<2:00:26,  4.35s/it]

Step: 1200 | Train loss: 0.431 | Eval loss: 2.295 | Eval accuracy: 0.330 | ɛ: 5.12 


Client 2 training:  49%|████▉     | 1401/2862 [07:37<1:45:56,  4.35s/it]

Step: 1400 | Train loss: 0.426 | Eval loss: 2.177 | Eval accuracy: 0.330 | ɛ: 5.23 


Client 2 training:  56%|█████▌    | 1601/2862 [08:41<1:32:07,  4.38s/it]

Step: 1600 | Train loss: 0.423 | Eval loss: 2.233 | Eval accuracy: 0.329 | ɛ: 5.34 


Client 2 training:  63%|██████▎   | 1801/2862 [09:45<1:17:40,  4.39s/it]

Step: 1800 | Train loss: 0.421 | Eval loss: 2.203 | Eval accuracy: 0.329 | ɛ: 5.43 


Client 2 training:  70%|██████▉   | 2001/2862 [10:50<1:03:07,  4.40s/it]

Step: 2000 | Train loss: 0.419 | Eval loss: 2.224 | Eval accuracy: 0.329 | ɛ: 5.53 


Client 2 training:  77%|███████▋  | 2201/2862 [11:56<48:32,  4.41s/it]  

Step: 2200 | Train loss: 0.416 | Eval loss: 2.261 | Eval accuracy: 0.330 | ɛ: 5.61 


Client 2 training:  84%|████████▍ | 2401/2862 [13:01<33:42,  4.39s/it]

Step: 2400 | Train loss: 0.414 | Eval loss: 2.252 | Eval accuracy: 0.329 | ɛ: 5.69 


Client 2 training:  91%|█████████ | 2601/2862 [14:06<19:01,  4.37s/it]

Step: 2600 | Train loss: 0.413 | Eval loss: 2.229 | Eval accuracy: 0.329 | ɛ: 5.77 


Client 2 training:  98%|█████████▊| 2801/2862 [15:11<04:26,  4.37s/it]

Step: 2800 | Train loss: 0.411 | Eval loss: 2.235 | Eval accuracy: 0.331 | ɛ: 5.84 


Client 2 training: 3001it [16:15,  4.39s/it]                          

Step: 3000 | Train loss: 0.411 | Eval loss: 2.143 | Eval accuracy: 0.329 | ɛ: 5.91 


Client 2 training: 3201it [17:19,  4.40s/it]

Step: 3200 | Train loss: 0.411 | Eval loss: 2.181 | Eval accuracy: 0.330 | ɛ: 5.98 


Client 2 training: 3249it [17:32,  3.09it/s]


    Local training loss: 0.410
After Global Round 1: Eval loss: 1.121 | Eval accuracy: 0.343 | ɛ: 5.98
Global Round 2
  Client 0 training...


Client 0 training:   7%|▋         | 201/2865 [01:04<3:11:33,  4.31s/it]

Step: 200 | Train loss: 0.434 | Eval loss: 2.218 | Eval accuracy: 0.342 | ɛ: 3.91 


Client 0 training:  14%|█▍        | 401/2865 [02:08<2:57:29,  4.32s/it]

Step: 400 | Train loss: 0.424 | Eval loss: 2.266 | Eval accuracy: 0.343 | ɛ: 4.36 


Client 0 training:  21%|██        | 601/2865 [03:11<2:44:14,  4.35s/it]

Step: 600 | Train loss: 0.416 | Eval loss: 2.185 | Eval accuracy: 0.343 | ɛ: 4.62 


Client 0 training:  28%|██▊       | 801/2865 [04:17<2:28:57,  4.33s/it]

Step: 800 | Train loss: 0.411 | Eval loss: 2.218 | Eval accuracy: 0.344 | ɛ: 4.82 


Client 0 training:  35%|███▍      | 1001/2865 [05:22<2:15:04,  4.35s/it]

Step: 1000 | Train loss: 0.407 | Eval loss: 2.212 | Eval accuracy: 0.342 | ɛ: 4.98 


Client 0 training:  42%|████▏     | 1201/2865 [06:27<2:00:55,  4.36s/it]

Step: 1200 | Train loss: 0.405 | Eval loss: 2.096 | Eval accuracy: 0.344 | ɛ: 5.11 


Client 0 training:  49%|████▉     | 1401/2865 [07:32<1:47:04,  4.39s/it]

Step: 1400 | Train loss: 0.405 | Eval loss: 2.150 | Eval accuracy: 0.341 | ɛ: 5.23 


Client 0 training:  56%|█████▌    | 1601/2865 [08:37<1:31:54,  4.36s/it]

Step: 1600 | Train loss: 0.405 | Eval loss: 2.155 | Eval accuracy: 0.343 | ɛ: 5.33 


Client 0 training:  63%|██████▎   | 1801/2865 [09:42<1:17:50,  4.39s/it]

Step: 1800 | Train loss: 0.407 | Eval loss: 2.137 | Eval accuracy: 0.343 | ɛ: 5.43 


Client 0 training:  70%|██████▉   | 2001/2865 [10:48<1:03:29,  4.41s/it]

Step: 2000 | Train loss: 0.406 | Eval loss: 2.196 | Eval accuracy: 0.344 | ɛ: 5.52 


Client 0 training:  77%|███████▋  | 2201/2865 [11:55<48:14,  4.36s/it]  

Step: 2200 | Train loss: 0.405 | Eval loss: 2.121 | Eval accuracy: 0.342 | ɛ: 5.61 


Client 0 training:  84%|████████▍ | 2401/2865 [13:00<33:47,  4.37s/it]

Step: 2400 | Train loss: 0.405 | Eval loss: 2.220 | Eval accuracy: 0.342 | ɛ: 5.69 


Client 0 training:  91%|█████████ | 2601/2865 [14:05<19:21,  4.40s/it]

Step: 2600 | Train loss: 0.405 | Eval loss: 2.076 | Eval accuracy: 0.342 | ɛ: 5.77 


Client 0 training:  98%|█████████▊| 2801/2865 [15:10<04:41,  4.39s/it]

Step: 2800 | Train loss: 0.406 | Eval loss: 2.084 | Eval accuracy: 0.342 | ɛ: 5.84 


Client 0 training: 3001it [16:14,  4.36s/it]                          

Step: 3000 | Train loss: 0.405 | Eval loss: 2.175 | Eval accuracy: 0.343 | ɛ: 5.91 


Client 0 training: 3201it [17:20,  4.41s/it]

Step: 3200 | Train loss: 0.405 | Eval loss: 2.178 | Eval accuracy: 0.342 | ɛ: 5.98 


Client 0 training: 3231it [17:28,  3.08it/s]


    Local training loss: 0.405
  Client 1 training...


Client 1 training:   7%|▋         | 201/2856 [01:05<3:09:51,  4.29s/it]

Step: 200 | Train loss: 0.439 | Eval loss: 2.366 | Eval accuracy: 0.327 | ɛ: 3.95 


Client 1 training:  14%|█▍        | 401/2856 [02:11<2:58:01,  4.35s/it]

Step: 400 | Train loss: 0.425 | Eval loss: 2.273 | Eval accuracy: 0.328 | ɛ: 4.39 


Client 1 training:  21%|██        | 601/2856 [03:15<2:44:53,  4.39s/it]

Step: 600 | Train loss: 0.417 | Eval loss: 2.316 | Eval accuracy: 0.328 | ɛ: 4.65 


Client 1 training:  28%|██▊       | 801/2856 [04:20<2:28:46,  4.34s/it]

Step: 800 | Train loss: 0.412 | Eval loss: 2.312 | Eval accuracy: 0.328 | ɛ: 4.84 


Client 1 training:  35%|███▌      | 1001/2856 [05:26<2:15:43,  4.39s/it]

Step: 1000 | Train loss: 0.406 | Eval loss: 2.337 | Eval accuracy: 0.327 | ɛ: 5.00 


Client 1 training:  42%|████▏     | 1201/2856 [06:32<2:00:33,  4.37s/it]

Step: 1200 | Train loss: 0.405 | Eval loss: 2.296 | Eval accuracy: 0.327 | ɛ: 5.13 


Client 1 training:  49%|████▉     | 1401/2856 [07:37<1:46:32,  4.39s/it]

Step: 1400 | Train loss: 0.405 | Eval loss: 2.221 | Eval accuracy: 0.328 | ɛ: 5.25 


Client 1 training:  56%|█████▌    | 1601/2856 [08:43<1:31:29,  4.37s/it]

Step: 1600 | Train loss: 0.403 | Eval loss: 2.236 | Eval accuracy: 0.327 | ɛ: 5.36 


Client 1 training:  63%|██████▎   | 1801/2856 [09:48<1:16:46,  4.37s/it]

Step: 1800 | Train loss: 0.404 | Eval loss: 2.245 | Eval accuracy: 0.327 | ɛ: 5.46 


Client 1 training:  70%|███████   | 2001/2856 [10:53<1:02:02,  4.35s/it]

Step: 2000 | Train loss: 0.403 | Eval loss: 2.194 | Eval accuracy: 0.328 | ɛ: 5.54 


Client 1 training:  77%|███████▋  | 2201/2856 [11:57<47:20,  4.34s/it]  

Step: 2200 | Train loss: 0.404 | Eval loss: 2.210 | Eval accuracy: 0.329 | ɛ: 5.63 


Client 1 training:  84%|████████▍ | 2401/2856 [13:03<33:17,  4.39s/it]

Step: 2400 | Train loss: 0.403 | Eval loss: 2.227 | Eval accuracy: 0.328 | ɛ: 5.71 


Client 1 training:  91%|█████████ | 2601/2856 [14:08<18:41,  4.40s/it]

Step: 2600 | Train loss: 0.402 | Eval loss: 2.238 | Eval accuracy: 0.328 | ɛ: 5.79 


Client 1 training:  98%|█████████▊| 2801/2856 [15:14<04:00,  4.37s/it]

Step: 2800 | Train loss: 0.402 | Eval loss: 2.237 | Eval accuracy: 0.328 | ɛ: 5.86 


Client 1 training: 3001it [16:19,  4.41s/it]                          

Step: 3000 | Train loss: 0.402 | Eval loss: 2.236 | Eval accuracy: 0.328 | ɛ: 5.93 


Client 1 training: 3186it [17:09,  3.10it/s]


    Local training loss: 0.402
  Client 2 training...


Client 2 training:   7%|▋         | 201/2862 [01:06<3:12:35,  4.34s/it]

Step: 200 | Train loss: 0.446 | Eval loss: 2.369 | Eval accuracy: 0.330 | ɛ: 3.93 


Client 2 training:  14%|█▍        | 401/2862 [02:10<2:57:33,  4.33s/it]

Step: 400 | Train loss: 0.420 | Eval loss: 2.397 | Eval accuracy: 0.331 | ɛ: 4.37 


Client 2 training:  21%|██        | 602/2862 [02:58<58:15,  1.55s/it]  

Step: 600 | Train loss: 0.413 | Eval loss: 2.316 | Eval accuracy: 0.329 | ɛ: 4.64 


Client 2 training:  28%|██▊       | 802/2862 [03:33<52:50,  1.54s/it]  

Step: 800 | Train loss: 0.412 | Eval loss: 2.299 | Eval accuracy: 0.330 | ɛ: 4.83 


Client 2 training:  35%|███▌      | 1002/2862 [04:08<47:45,  1.54s/it]  

Step: 1000 | Train loss: 0.411 | Eval loss: 2.265 | Eval accuracy: 0.329 | ɛ: 4.98 


Client 2 training:  42%|████▏     | 1202/2862 [04:43<42:57,  1.55s/it]

Step: 1200 | Train loss: 0.409 | Eval loss: 2.297 | Eval accuracy: 0.329 | ɛ: 5.12 


Client 2 training:  49%|████▉     | 1402/2862 [05:18<37:33,  1.54s/it]

Step: 1400 | Train loss: 0.408 | Eval loss: 2.308 | Eval accuracy: 0.331 | ɛ: 5.23 


Client 2 training:  56%|█████▌    | 1602/2862 [05:54<32:37,  1.55s/it]

Step: 1600 | Train loss: 0.405 | Eval loss: 2.335 | Eval accuracy: 0.330 | ɛ: 5.34 


Client 2 training:  63%|██████▎   | 1802/2862 [06:30<27:10,  1.54s/it]

Step: 1800 | Train loss: 0.404 | Eval loss: 2.308 | Eval accuracy: 0.329 | ɛ: 5.44 


Client 2 training:  70%|██████▉   | 2002/2862 [07:05<22:12,  1.55s/it]

Step: 2000 | Train loss: 0.403 | Eval loss: 2.196 | Eval accuracy: 0.330 | ɛ: 5.53 


Client 2 training:  77%|███████▋  | 2202/2862 [07:40<17:00,  1.55s/it]

Step: 2200 | Train loss: 0.403 | Eval loss: 2.225 | Eval accuracy: 0.328 | ɛ: 5.61 


Client 2 training:  84%|████████▍ | 2402/2862 [08:15<11:47,  1.54s/it]

Step: 2400 | Train loss: 0.403 | Eval loss: 2.209 | Eval accuracy: 0.329 | ɛ: 5.70 


Client 2 training:  91%|█████████ | 2602/2862 [08:50<06:45,  1.56s/it]

Step: 2600 | Train loss: 0.402 | Eval loss: 2.290 | Eval accuracy: 0.328 | ɛ: 5.77 


Client 2 training:  98%|█████████▊| 2802/2862 [09:26<01:32,  1.55s/it]

Step: 2800 | Train loss: 0.402 | Eval loss: 2.233 | Eval accuracy: 0.331 | ɛ: 5.85 


Client 2 training: 3002it [10:01,  1.55s/it]                          

Step: 3000 | Train loss: 0.402 | Eval loss: 2.200 | Eval accuracy: 0.329 | ɛ: 5.92 


Client 2 training: 3202it [10:36,  1.56s/it]

Step: 3200 | Train loss: 0.401 | Eval loss: 2.198 | Eval accuracy: 0.328 | ɛ: 5.99 


Client 2 training: 3212it [10:38,  5.03it/s]


    Local training loss: 0.401
After Global Round 2: Eval loss: 1.090 | Eval accuracy: 0.389 | ɛ: 5.99
Global Round 3
  Client 0 training...


Client 0 training:   7%|▋         | 202/2865 [00:35<1:07:53,  1.53s/it]

Step: 200 | Train loss: 0.411 | Eval loss: 2.260 | Eval accuracy: 0.343 | ɛ: 3.94 


Client 0 training:  14%|█▍        | 402/2865 [01:10<1:02:53,  1.53s/it]

Step: 400 | Train loss: 0.400 | Eval loss: 2.220 | Eval accuracy: 0.343 | ɛ: 4.37 


Client 0 training:  21%|██        | 602/2865 [01:45<57:50,  1.53s/it]  

Step: 600 | Train loss: 0.400 | Eval loss: 2.114 | Eval accuracy: 0.343 | ɛ: 4.64 


Client 0 training:  28%|██▊       | 802/2865 [02:22<52:57,  1.54s/it]  

Step: 800 | Train loss: 0.400 | Eval loss: 2.123 | Eval accuracy: 0.341 | ɛ: 4.83 


Client 0 training:  35%|███▍      | 1002/2865 [02:57<47:58,  1.54s/it]  

Step: 1000 | Train loss: 0.398 | Eval loss: 2.204 | Eval accuracy: 0.343 | ɛ: 4.99 


Client 0 training:  42%|████▏     | 1202/2865 [03:32<42:52,  1.55s/it]

Step: 1200 | Train loss: 0.396 | Eval loss: 2.099 | Eval accuracy: 0.344 | ɛ: 5.12 


Client 0 training:  49%|████▉     | 1402/2865 [04:08<37:28,  1.54s/it]

Step: 1400 | Train loss: 0.394 | Eval loss: 2.191 | Eval accuracy: 0.343 | ɛ: 5.24 


Client 0 training:  56%|█████▌    | 1602/2865 [04:43<32:24,  1.54s/it]

Step: 1600 | Train loss: 0.394 | Eval loss: 2.044 | Eval accuracy: 0.342 | ɛ: 5.35 


Client 0 training:  63%|██████▎   | 1802/2865 [05:18<27:26,  1.55s/it]

Step: 1800 | Train loss: 0.394 | Eval loss: 1.903 | Eval accuracy: 0.344 | ɛ: 5.44 


Client 0 training:  70%|██████▉   | 2002/2865 [05:53<22:18,  1.55s/it]

Step: 2000 | Train loss: 0.392 | Eval loss: 2.057 | Eval accuracy: 0.344 | ɛ: 5.53 


Client 0 training:  77%|███████▋  | 2202/2865 [06:29<17:00,  1.54s/it]

Step: 2200 | Train loss: 0.391 | Eval loss: 1.972 | Eval accuracy: 0.341 | ɛ: 5.62 


Client 0 training:  84%|████████▍ | 2402/2865 [07:04<11:57,  1.55s/it]

Step: 2400 | Train loss: 0.389 | Eval loss: 1.789 | Eval accuracy: 0.343 | ɛ: 5.70 


Client 0 training:  91%|█████████ | 2602/2865 [07:39<06:48,  1.55s/it]

Step: 2600 | Train loss: 0.387 | Eval loss: 1.766 | Eval accuracy: 0.343 | ɛ: 5.78 


Client 0 training:  98%|█████████▊| 2802/2865 [08:15<01:37,  1.55s/it]

Step: 2800 | Train loss: 0.385 | Eval loss: 1.696 | Eval accuracy: 0.343 | ɛ: 5.85 


Client 0 training: 3002it [08:50,  1.55s/it]                          

Step: 3000 | Train loss: 0.384 | Eval loss: 1.842 | Eval accuracy: 0.344 | ɛ: 5.92 


Client 0 training: 3202it [09:26,  1.56s/it]

Step: 3200 | Train loss: 0.382 | Eval loss: 1.728 | Eval accuracy: 0.344 | ɛ: 5.99 


Client 0 training: 3212it [09:27,  5.66it/s]


    Local training loss: 0.382
  Client 1 training...


Client 1 training:   7%|▋         | 202/2856 [00:35<1:07:52,  1.53s/it]

Step: 200 | Train loss: 0.419 | Eval loss: 2.280 | Eval accuracy: 0.328 | ɛ: 3.93 


Client 1 training:  14%|█▍        | 402/2856 [01:10<1:02:34,  1.53s/it]

Step: 400 | Train loss: 0.416 | Eval loss: 2.189 | Eval accuracy: 0.327 | ɛ: 4.37 


Client 1 training:  21%|██        | 602/2856 [01:46<57:43,  1.54s/it]  

Step: 600 | Train loss: 0.406 | Eval loss: 2.212 | Eval accuracy: 0.327 | ɛ: 4.65 


Client 1 training:  28%|██▊       | 802/2856 [02:21<52:51,  1.54s/it]  

Step: 800 | Train loss: 0.401 | Eval loss: 2.212 | Eval accuracy: 0.328 | ɛ: 4.84 


Client 1 training:  35%|███▌      | 1002/2856 [02:56<47:30,  1.54s/it]  

Step: 1000 | Train loss: 0.402 | Eval loss: 2.210 | Eval accuracy: 0.326 | ɛ: 5.00 


Client 1 training:  42%|████▏     | 1202/2856 [03:31<42:36,  1.55s/it]

Step: 1200 | Train loss: 0.401 | Eval loss: 2.147 | Eval accuracy: 0.326 | ɛ: 5.13 


Client 1 training:  49%|████▉     | 1402/2856 [04:07<37:34,  1.55s/it]

Step: 1400 | Train loss: 0.399 | Eval loss: 2.249 | Eval accuracy: 0.327 | ɛ: 5.25 


Client 1 training:  56%|█████▌    | 1602/2856 [04:42<32:22,  1.55s/it]

Step: 1600 | Train loss: 0.398 | Eval loss: 2.151 | Eval accuracy: 0.328 | ɛ: 5.35 


Client 1 training:  63%|██████▎   | 1802/2856 [05:19<27:11,  1.55s/it]

Step: 1800 | Train loss: 0.399 | Eval loss: 2.215 | Eval accuracy: 0.328 | ɛ: 5.45 


Client 1 training:  70%|███████   | 2002/2856 [05:55<22:09,  1.56s/it]

Step: 2000 | Train loss: 0.398 | Eval loss: 2.242 | Eval accuracy: 0.328 | ɛ: 5.55 


Client 1 training:  77%|███████▋  | 2202/2856 [06:30<16:55,  1.55s/it]

Step: 2200 | Train loss: 0.397 | Eval loss: 2.204 | Eval accuracy: 0.327 | ɛ: 5.63 


Client 1 training:  84%|████████▍ | 2402/2856 [07:06<11:46,  1.56s/it]

Step: 2400 | Train loss: 0.397 | Eval loss: 2.222 | Eval accuracy: 0.328 | ɛ: 5.71 


Client 1 training:  91%|█████████ | 2602/2856 [07:41<06:35,  1.56s/it]

Step: 2600 | Train loss: 0.396 | Eval loss: 2.124 | Eval accuracy: 0.328 | ɛ: 5.79 


Client 1 training:  98%|█████████▊| 2802/2856 [08:16<01:24,  1.56s/it]

Step: 2800 | Train loss: 0.395 | Eval loss: 2.212 | Eval accuracy: 0.327 | ɛ: 5.86 


Client 1 training: 3002it [08:52,  1.56s/it]                          

Step: 3000 | Train loss: 0.395 | Eval loss: 2.180 | Eval accuracy: 0.327 | ɛ: 5.94 


Client 1 training: 3175it [09:17,  5.70it/s]


    Local training loss: 0.395
  Client 2 training...


Client 2 training:   7%|▋         | 202/2862 [00:35<1:08:00,  1.53s/it]

Step: 200 | Train loss: 0.431 | Eval loss: 2.349 | Eval accuracy: 0.330 | ɛ: 3.93 


Client 2 training:  14%|█▍        | 402/2862 [01:10<1:02:51,  1.53s/it]

Step: 400 | Train loss: 0.411 | Eval loss: 2.320 | Eval accuracy: 0.329 | ɛ: 4.37 


Client 2 training:  21%|██        | 602/2862 [01:45<58:07,  1.54s/it]  

Step: 600 | Train loss: 0.401 | Eval loss: 2.269 | Eval accuracy: 0.329 | ɛ: 4.64 


Client 2 training:  28%|██▊       | 802/2862 [02:20<53:04,  1.55s/it]  

Step: 800 | Train loss: 0.396 | Eval loss: 2.286 | Eval accuracy: 0.330 | ɛ: 4.82 


Client 2 training:  35%|███▌      | 1002/2862 [02:55<48:01,  1.55s/it]  

Step: 1000 | Train loss: 0.392 | Eval loss: 2.296 | Eval accuracy: 0.329 | ɛ: 4.98 


Client 2 training:  42%|████▏     | 1202/2862 [03:31<42:42,  1.54s/it]

Step: 1200 | Train loss: 0.392 | Eval loss: 2.285 | Eval accuracy: 0.330 | ɛ: 5.12 


Client 2 training:  49%|████▉     | 1402/2862 [04:06<37:32,  1.54s/it]

Step: 1400 | Train loss: 0.392 | Eval loss: 2.199 | Eval accuracy: 0.330 | ɛ: 5.24 


Client 2 training:  56%|█████▌    | 1602/2862 [04:41<32:21,  1.54s/it]

Step: 1600 | Train loss: 0.393 | Eval loss: 2.181 | Eval accuracy: 0.329 | ɛ: 5.34 


Client 2 training:  63%|██████▎   | 1802/2862 [05:17<27:09,  1.54s/it]

Step: 1800 | Train loss: 0.392 | Eval loss: 2.266 | Eval accuracy: 0.329 | ɛ: 5.44 


Client 2 training:  70%|██████▉   | 2002/2862 [05:52<22:06,  1.54s/it]

Step: 2000 | Train loss: 0.390 | Eval loss: 2.277 | Eval accuracy: 0.331 | ɛ: 5.53 


Client 2 training:  77%|███████▋  | 2202/2862 [06:27<17:01,  1.55s/it]

Step: 2200 | Train loss: 0.389 | Eval loss: 2.112 | Eval accuracy: 0.330 | ɛ: 5.62 


Client 2 training:  84%|████████▍ | 2402/2862 [07:02<11:53,  1.55s/it]

Step: 2400 | Train loss: 0.388 | Eval loss: 2.174 | Eval accuracy: 0.329 | ɛ: 5.70 


Client 2 training:  91%|█████████ | 2602/2862 [07:37<06:44,  1.55s/it]

Step: 2600 | Train loss: 0.388 | Eval loss: 2.165 | Eval accuracy: 0.328 | ɛ: 5.77 


Client 2 training:  98%|█████████▊| 2802/2862 [08:14<01:48,  1.81s/it]

Step: 2800 | Train loss: 0.386 | Eval loss: 2.089 | Eval accuracy: 0.329 | ɛ: 5.85 


Client 2 training: 3002it [08:49,  1.56s/it]                          

Step: 3000 | Train loss: 0.385 | Eval loss: 2.093 | Eval accuracy: 0.329 | ɛ: 5.92 


Client 2 training: 3202it [09:25,  1.56s/it]

Step: 3200 | Train loss: 0.384 | Eval loss: 2.114 | Eval accuracy: 0.329 | ɛ: 5.99 


Client 2 training: 3219it [09:27,  5.67it/s]


    Local training loss: 0.384
After Global Round 3: Eval loss: 1.004 | Eval accuracy: 0.526 | ɛ: 5.99
Global Round 4
  Client 0 training...


Client 0 training:   7%|▋         | 202/2865 [00:35<1:08:04,  1.53s/it]

Step: 200 | Train loss: 0.383 | Eval loss: 1.722 | Eval accuracy: 0.342 | ɛ: 3.93 


Client 0 training:  14%|█▍        | 402/2865 [01:10<1:02:51,  1.53s/it]

Step: 400 | Train loss: 0.370 | Eval loss: 1.781 | Eval accuracy: 0.343 | ɛ: 4.37 


Client 0 training:  21%|██        | 602/2865 [01:45<58:07,  1.54s/it]  

Step: 600 | Train loss: 0.362 | Eval loss: 1.606 | Eval accuracy: 0.344 | ɛ: 4.63 


Client 0 training:  28%|██▊       | 802/2865 [02:20<53:02,  1.54s/it]  

Step: 800 | Train loss: 0.352 | Eval loss: 1.651 | Eval accuracy: 0.359 | ɛ: 4.83 


Client 0 training:  35%|███▍      | 1002/2865 [02:55<47:46,  1.54s/it]  

Step: 1000 | Train loss: 0.345 | Eval loss: 1.657 | Eval accuracy: 0.433 | ɛ: 4.98 


Client 0 training:  42%|████▏     | 1202/2865 [03:31<42:53,  1.55s/it]

Step: 1200 | Train loss: 0.340 | Eval loss: 1.352 | Eval accuracy: 0.519 | ɛ: 5.12 


Client 0 training:  49%|████▉     | 1402/2865 [04:06<37:45,  1.55s/it]

Step: 1400 | Train loss: 0.335 | Eval loss: 1.530 | Eval accuracy: 0.502 | ɛ: 5.24 


Client 0 training:  56%|█████▌    | 1602/2865 [04:41<32:23,  1.54s/it]

Step: 1600 | Train loss: 0.329 | Eval loss: 1.345 | Eval accuracy: 0.542 | ɛ: 5.34 


Client 0 training:  63%|██████▎   | 1802/2865 [05:16<27:25,  1.55s/it]

Step: 1800 | Train loss: 0.325 | Eval loss: 1.608 | Eval accuracy: 0.509 | ɛ: 5.44 


Client 0 training:  70%|██████▉   | 2002/2865 [05:52<22:11,  1.54s/it]

Step: 2000 | Train loss: 0.321 | Eval loss: 1.627 | Eval accuracy: 0.515 | ɛ: 5.53 


Client 0 training:  77%|███████▋  | 2202/2865 [06:27<17:03,  1.54s/it]

Step: 2200 | Train loss: 0.317 | Eval loss: 1.457 | Eval accuracy: 0.542 | ɛ: 5.62 


Client 0 training:  84%|████████▍ | 2402/2865 [07:02<11:58,  1.55s/it]

Step: 2400 | Train loss: 0.312 | Eval loss: 1.523 | Eval accuracy: 0.535 | ɛ: 5.70 


Client 0 training:  91%|█████████ | 2602/2865 [07:37<06:48,  1.55s/it]

Step: 2600 | Train loss: 0.310 | Eval loss: 1.413 | Eval accuracy: 0.558 | ɛ: 5.77 


Client 0 training:  98%|█████████▊| 2802/2865 [08:12<01:37,  1.54s/it]

Step: 2800 | Train loss: 0.308 | Eval loss: 1.380 | Eval accuracy: 0.559 | ɛ: 5.85 


Client 0 training: 3002it [08:48,  1.56s/it]                          

Step: 3000 | Train loss: 0.305 | Eval loss: 1.195 | Eval accuracy: 0.590 | ɛ: 5.92 


Client 0 training: 3202it [09:23,  1.54s/it]

Step: 3200 | Train loss: 0.302 | Eval loss: 1.230 | Eval accuracy: 0.587 | ɛ: 5.98 


Client 0 training: 3224it [09:26,  5.69it/s]


    Local training loss: 0.302
  Client 1 training...


Client 1 training:   7%|▋         | 202/2856 [00:35<1:07:56,  1.54s/it]

Step: 200 | Train loss: 0.397 | Eval loss: 2.196 | Eval accuracy: 0.329 | ɛ: 3.93 


Client 1 training:  14%|█▍        | 402/2856 [01:12<1:14:12,  1.81s/it]

Step: 400 | Train loss: 0.391 | Eval loss: 2.068 | Eval accuracy: 0.328 | ɛ: 4.37 


Client 1 training:  21%|██        | 602/2856 [01:47<57:53,  1.54s/it]  

Step: 600 | Train loss: 0.390 | Eval loss: 2.087 | Eval accuracy: 0.329 | ɛ: 4.64 


Client 1 training:  28%|██▊       | 802/2856 [02:22<52:29,  1.53s/it]  

Step: 800 | Train loss: 0.385 | Eval loss: 1.985 | Eval accuracy: 0.329 | ɛ: 4.83 


Client 1 training:  35%|███▌      | 1002/2856 [02:57<47:33,  1.54s/it]  

Step: 1000 | Train loss: 0.384 | Eval loss: 1.814 | Eval accuracy: 0.327 | ɛ: 4.99 


Client 1 training:  42%|████▏     | 1202/2856 [03:32<42:29,  1.54s/it]

Step: 1200 | Train loss: 0.380 | Eval loss: 1.924 | Eval accuracy: 0.328 | ɛ: 5.12 


Client 1 training:  49%|████▉     | 1402/2856 [04:07<37:26,  1.55s/it]

Step: 1400 | Train loss: 0.379 | Eval loss: 1.850 | Eval accuracy: 0.327 | ɛ: 5.24 


Client 1 training:  56%|█████▌    | 1602/2856 [04:42<32:24,  1.55s/it]

Step: 1600 | Train loss: 0.377 | Eval loss: 1.759 | Eval accuracy: 0.328 | ɛ: 5.34 


Client 1 training:  63%|██████▎   | 1802/2856 [05:18<27:11,  1.55s/it]

Step: 1800 | Train loss: 0.374 | Eval loss: 1.773 | Eval accuracy: 0.328 | ɛ: 5.44 


Client 1 training:  70%|███████   | 2002/2856 [05:53<21:59,  1.55s/it]

Step: 2000 | Train loss: 0.371 | Eval loss: 1.719 | Eval accuracy: 0.327 | ɛ: 5.53 


Client 1 training:  77%|███████▋  | 2202/2856 [06:28<16:54,  1.55s/it]

Step: 2200 | Train loss: 0.370 | Eval loss: 1.760 | Eval accuracy: 0.329 | ɛ: 5.62 


Client 1 training:  84%|████████▍ | 2402/2856 [07:04<11:39,  1.54s/it]

Step: 2400 | Train loss: 0.368 | Eval loss: 1.782 | Eval accuracy: 0.327 | ɛ: 5.70 


Client 1 training:  91%|█████████ | 2602/2856 [07:39<06:34,  1.55s/it]

Step: 2600 | Train loss: 0.366 | Eval loss: 1.620 | Eval accuracy: 0.330 | ɛ: 5.78 


Client 1 training:  98%|█████████▊| 2802/2856 [08:15<01:23,  1.54s/it]

Step: 2800 | Train loss: 0.365 | Eval loss: 1.408 | Eval accuracy: 0.360 | ɛ: 5.86 


Client 1 training: 3002it [08:50,  1.55s/it]                          

Step: 3000 | Train loss: 0.363 | Eval loss: 1.455 | Eval accuracy: 0.353 | ɛ: 5.93 


Client 1 training: 3202it [09:25,  5.66it/s]

Step: 3200 | Train loss: 0.362 | Eval loss: 1.540 | Eval accuracy: 0.340 | ɛ: 6.00 
    Local training loss: 0.362
  Client 2 training...



Client 2 training:   7%|▋         | 202/2862 [00:35<1:08:05,  1.54s/it]

Step: 200 | Train loss: 0.401 | Eval loss: 2.073 | Eval accuracy: 0.330 | ɛ: 3.94 


Client 2 training:  14%|█▍        | 402/2862 [01:10<1:03:01,  1.54s/it]

Step: 400 | Train loss: 0.382 | Eval loss: 2.132 | Eval accuracy: 0.331 | ɛ: 4.38 


Client 2 training:  21%|██        | 602/2862 [01:46<58:05,  1.54s/it]  

Step: 600 | Train loss: 0.371 | Eval loss: 1.997 | Eval accuracy: 0.330 | ɛ: 4.65 


Client 2 training:  28%|██▊       | 802/2862 [02:21<52:56,  1.54s/it]  

Step: 800 | Train loss: 0.367 | Eval loss: 1.704 | Eval accuracy: 0.330 | ɛ: 4.84 


Client 2 training:  35%|███▌      | 1002/2862 [02:56<47:41,  1.54s/it]  

Step: 1000 | Train loss: 0.363 | Eval loss: 1.765 | Eval accuracy: 0.330 | ɛ: 5.00 


Client 2 training:  42%|████▏     | 1202/2862 [03:31<42:38,  1.54s/it]

Step: 1200 | Train loss: 0.359 | Eval loss: 1.624 | Eval accuracy: 0.329 | ɛ: 5.13 


Client 2 training:  49%|████▉     | 1402/2862 [04:07<37:53,  1.56s/it]

Step: 1400 | Train loss: 0.355 | Eval loss: 1.692 | Eval accuracy: 0.330 | ɛ: 5.25 


Client 2 training:  56%|█████▌    | 1602/2862 [04:44<32:28,  1.55s/it]

Step: 1600 | Train loss: 0.350 | Eval loss: 1.586 | Eval accuracy: 0.331 | ɛ: 5.35 


Client 2 training:  63%|██████▎   | 1802/2862 [05:19<27:11,  1.54s/it]

Step: 1800 | Train loss: 0.346 | Eval loss: 1.468 | Eval accuracy: 0.380 | ɛ: 5.45 


Client 2 training:  70%|██████▉   | 2002/2862 [05:54<22:15,  1.55s/it]

Step: 2000 | Train loss: 0.341 | Eval loss: 1.394 | Eval accuracy: 0.517 | ɛ: 5.54 


Client 2 training:  77%|███████▋  | 2202/2862 [06:30<17:00,  1.55s/it]

Step: 2200 | Train loss: 0.336 | Eval loss: 1.275 | Eval accuracy: 0.561 | ɛ: 5.63 


Client 2 training:  84%|████████▍ | 2402/2862 [07:05<11:53,  1.55s/it]

Step: 2400 | Train loss: 0.332 | Eval loss: 1.236 | Eval accuracy: 0.575 | ɛ: 5.71 


Client 2 training:  91%|█████████ | 2602/2862 [07:39<06:41,  1.54s/it]

Step: 2600 | Train loss: 0.329 | Eval loss: 1.345 | Eval accuracy: 0.566 | ɛ: 5.79 


Client 2 training:  98%|█████████▊| 2802/2862 [08:14<01:33,  1.55s/it]

Step: 2800 | Train loss: 0.325 | Eval loss: 1.225 | Eval accuracy: 0.590 | ɛ: 5.86 


Client 2 training: 3002it [08:49,  1.54s/it]                          

Step: 3000 | Train loss: 0.322 | Eval loss: 1.048 | Eval accuracy: 0.607 | ɛ: 5.93 


Client 2 training: 3192it [09:15,  5.74it/s]


    Local training loss: 0.318
After Global Round 4: Eval loss: 0.672 | Eval accuracy: 0.729 | ɛ: 5.93
Global Round 5
  Client 0 training...


Client 0 training:   7%|▋         | 202/2865 [00:34<1:07:29,  1.52s/it]

Step: 200 | Train loss: 0.295 | Eval loss: 1.162 | Eval accuracy: 0.587 | ɛ: 3.93 


Client 0 training:  14%|█▍        | 402/2865 [01:09<1:02:36,  1.53s/it]

Step: 400 | Train loss: 0.281 | Eval loss: 1.175 | Eval accuracy: 0.588 | ɛ: 4.37 


Client 0 training:  21%|██        | 602/2865 [01:44<57:43,  1.53s/it]  

Step: 600 | Train loss: 0.274 | Eval loss: 1.097 | Eval accuracy: 0.621 | ɛ: 4.64 


Client 0 training:  28%|██▊       | 802/2865 [02:18<52:53,  1.54s/it]  

Step: 800 | Train loss: 0.270 | Eval loss: 1.167 | Eval accuracy: 0.608 | ɛ: 4.83 


Client 0 training:  35%|███▍      | 1002/2865 [02:53<47:48,  1.54s/it]  

Step: 1000 | Train loss: 0.266 | Eval loss: 1.122 | Eval accuracy: 0.627 | ɛ: 4.99 


Client 0 training:  42%|████▏     | 1202/2865 [03:28<42:37,  1.54s/it]

Step: 1200 | Train loss: 0.263 | Eval loss: 1.215 | Eval accuracy: 0.614 | ɛ: 5.12 


Client 0 training:  49%|████▉     | 1402/2865 [04:04<37:43,  1.55s/it]

Step: 1400 | Train loss: 0.261 | Eval loss: 1.080 | Eval accuracy: 0.642 | ɛ: 5.25 


Client 0 training:  56%|█████▌    | 1602/2865 [04:38<32:12,  1.53s/it]

Step: 1600 | Train loss: 0.261 | Eval loss: 0.968 | Eval accuracy: 0.662 | ɛ: 5.35 


Client 0 training:  63%|██████▎   | 1802/2865 [05:13<27:11,  1.53s/it]

Step: 1800 | Train loss: 0.259 | Eval loss: 1.200 | Eval accuracy: 0.613 | ɛ: 5.45 


Client 0 training:  70%|██████▉   | 2002/2865 [05:47<22:06,  1.54s/it]

Step: 2000 | Train loss: 0.258 | Eval loss: 0.979 | Eval accuracy: 0.660 | ɛ: 5.54 


Client 0 training:  77%|███████▋  | 2202/2865 [06:22<17:07,  1.55s/it]

Step: 2200 | Train loss: 0.257 | Eval loss: 1.196 | Eval accuracy: 0.619 | ɛ: 5.63 


Client 0 training:  84%|████████▍ | 2402/2865 [06:57<11:54,  1.54s/it]

Step: 2400 | Train loss: 0.256 | Eval loss: 1.015 | Eval accuracy: 0.644 | ɛ: 5.70 


Client 0 training:  91%|█████████ | 2602/2865 [07:33<06:43,  1.53s/it]

Step: 2600 | Train loss: 0.254 | Eval loss: 0.978 | Eval accuracy: 0.683 | ɛ: 5.78 


Client 0 training:  98%|█████████▊| 2802/2865 [08:08<01:37,  1.55s/it]

Step: 2800 | Train loss: 0.255 | Eval loss: 1.009 | Eval accuracy: 0.664 | ɛ: 5.85 


Client 0 training: 3002it [08:43,  1.56s/it]                          

Step: 3000 | Train loss: 0.253 | Eval loss: 1.019 | Eval accuracy: 0.671 | ɛ: 5.92 


Client 0 training: 3198it [09:11,  5.80it/s]


    Local training loss: 0.252
  Client 1 training...


Client 1 training:   7%|▋         | 202/2856 [00:35<1:07:43,  1.53s/it]

Step: 200 | Train loss: 0.356 | Eval loss: 1.455 | Eval accuracy: 0.357 | ɛ: 3.93 


Client 1 training:  14%|█▍        | 402/2856 [01:09<1:02:34,  1.53s/it]

Step: 400 | Train loss: 0.339 | Eval loss: 1.438 | Eval accuracy: 0.360 | ɛ: 4.37 


Client 1 training:  21%|██        | 602/2856 [01:44<57:50,  1.54s/it]  

Step: 600 | Train loss: 0.331 | Eval loss: 1.360 | Eval accuracy: 0.365 | ɛ: 4.63 


Client 1 training:  28%|██▊       | 802/2856 [02:19<52:36,  1.54s/it]  

Step: 800 | Train loss: 0.329 | Eval loss: 1.374 | Eval accuracy: 0.354 | ɛ: 4.83 


Client 1 training:  35%|███▌      | 1002/2856 [02:54<47:09,  1.53s/it]  

Step: 1000 | Train loss: 0.324 | Eval loss: 1.285 | Eval accuracy: 0.402 | ɛ: 4.99 


Client 1 training:  42%|████▏     | 1202/2856 [03:29<42:20,  1.54s/it]

Step: 1200 | Train loss: 0.320 | Eval loss: 1.243 | Eval accuracy: 0.465 | ɛ: 5.12 


Client 1 training:  49%|████▉     | 1402/2856 [04:04<37:09,  1.53s/it]

Step: 1400 | Train loss: 0.317 | Eval loss: 1.225 | Eval accuracy: 0.474 | ɛ: 5.24 


Client 1 training:  56%|█████▌    | 1602/2856 [04:39<32:04,  1.53s/it]

Step: 1600 | Train loss: 0.315 | Eval loss: 1.213 | Eval accuracy: 0.527 | ɛ: 5.34 


Client 1 training:  63%|██████▎   | 1802/2856 [05:14<26:54,  1.53s/it]

Step: 1800 | Train loss: 0.311 | Eval loss: 1.220 | Eval accuracy: 0.535 | ɛ: 5.44 


Client 1 training:  70%|███████   | 2002/2856 [05:49<21:54,  1.54s/it]

Step: 2000 | Train loss: 0.309 | Eval loss: 1.113 | Eval accuracy: 0.595 | ɛ: 5.54 


Client 1 training:  77%|███████▋  | 2202/2856 [06:24<16:54,  1.55s/it]

Step: 2200 | Train loss: 0.308 | Eval loss: 0.994 | Eval accuracy: 0.652 | ɛ: 5.62 


Client 1 training:  84%|████████▍ | 2402/2856 [06:58<11:32,  1.53s/it]

Step: 2400 | Train loss: 0.306 | Eval loss: 1.082 | Eval accuracy: 0.615 | ɛ: 5.70 


Client 1 training:  91%|█████████ | 2602/2856 [07:33<06:34,  1.56s/it]

Step: 2600 | Train loss: 0.304 | Eval loss: 1.023 | Eval accuracy: 0.639 | ɛ: 5.78 


Client 1 training:  98%|█████████▊| 2802/2856 [08:08<01:22,  1.53s/it]

Step: 2800 | Train loss: 0.303 | Eval loss: 0.961 | Eval accuracy: 0.682 | ɛ: 5.85 


Client 1 training: 3002it [08:43,  1.53s/it]                          

Step: 3000 | Train loss: 0.301 | Eval loss: 1.024 | Eval accuracy: 0.671 | ɛ: 5.92 


Client 1 training: 3202it [09:17,  1.55s/it]

Step: 3200 | Train loss: 0.301 | Eval loss: 0.908 | Eval accuracy: 0.697 | ɛ: 5.99 


Client 1 training: 3214it [09:19,  5.74it/s]


    Local training loss: 0.301
  Client 2 training...


Client 2 training:   7%|▋         | 202/2862 [00:36<1:07:25,  1.52s/it]

Step: 200 | Train loss: 0.327 | Eval loss: 1.079 | Eval accuracy: 0.591 | ɛ: 3.93 


Client 2 training:  14%|█▍        | 402/2862 [01:10<1:03:13,  1.54s/it]

Step: 400 | Train loss: 0.305 | Eval loss: 1.045 | Eval accuracy: 0.597 | ɛ: 4.37 


Client 2 training:  21%|██        | 602/2862 [01:45<57:49,  1.54s/it]  

Step: 600 | Train loss: 0.289 | Eval loss: 0.981 | Eval accuracy: 0.612 | ɛ: 4.64 


Client 2 training:  28%|██▊       | 802/2862 [02:20<52:34,  1.53s/it]  

Step: 800 | Train loss: 0.283 | Eval loss: 1.143 | Eval accuracy: 0.600 | ɛ: 4.83 


Client 2 training:  35%|███▌      | 1002/2862 [02:55<47:12,  1.52s/it]  

Step: 1000 | Train loss: 0.278 | Eval loss: 1.004 | Eval accuracy: 0.614 | ɛ: 4.99 


Client 2 training:  42%|████▏     | 1202/2862 [03:29<42:30,  1.54s/it]

Step: 1200 | Train loss: 0.272 | Eval loss: 0.925 | Eval accuracy: 0.640 | ɛ: 5.12 


Client 2 training:  49%|████▉     | 1402/2862 [04:04<37:11,  1.53s/it]

Step: 1400 | Train loss: 0.269 | Eval loss: 0.943 | Eval accuracy: 0.636 | ɛ: 5.23 


Client 2 training:  56%|█████▌    | 1602/2862 [04:38<32:19,  1.54s/it]

Step: 1600 | Train loss: 0.266 | Eval loss: 0.999 | Eval accuracy: 0.628 | ɛ: 5.34 


Client 2 training:  63%|██████▎   | 1802/2862 [05:14<27:06,  1.53s/it]

Step: 1800 | Train loss: 0.265 | Eval loss: 1.056 | Eval accuracy: 0.627 | ɛ: 5.44 


Client 2 training:  70%|██████▉   | 2002/2862 [05:49<21:59,  1.53s/it]

Step: 2000 | Train loss: 0.262 | Eval loss: 0.965 | Eval accuracy: 0.648 | ɛ: 5.53 


Client 2 training:  77%|███████▋  | 2202/2862 [06:23<17:02,  1.55s/it]

Step: 2200 | Train loss: 0.259 | Eval loss: 1.010 | Eval accuracy: 0.644 | ɛ: 5.62 


Client 2 training:  84%|████████▍ | 2402/2862 [06:58<11:49,  1.54s/it]

Step: 2400 | Train loss: 0.258 | Eval loss: 0.927 | Eval accuracy: 0.660 | ɛ: 5.70 


Client 2 training:  91%|█████████ | 2602/2862 [07:33<06:42,  1.55s/it]

Step: 2600 | Train loss: 0.256 | Eval loss: 0.912 | Eval accuracy: 0.684 | ɛ: 5.77 


Client 2 training:  98%|█████████▊| 2802/2862 [08:07<01:32,  1.55s/it]

Step: 2800 | Train loss: 0.255 | Eval loss: 1.018 | Eval accuracy: 0.664 | ɛ: 5.84 


Client 2 training: 3002it [08:42,  1.53s/it]                          

Step: 3000 | Train loss: 0.255 | Eval loss: 0.868 | Eval accuracy: 0.686 | ɛ: 5.92 


Client 2 training: 3202it [09:17,  1.55s/it]

Step: 3200 | Train loss: 0.253 | Eval loss: 0.903 | Eval accuracy: 0.674 | ɛ: 5.98 


Client 2 training: 3234it [09:22,  5.75it/s]


    Local training loss: 0.254
After Global Round 5: Eval loss: 0.512 | Eval accuracy: 0.809 | ɛ: 5.98


In [15]:
evaluate(global_model)

(0.5127203525641025, 0.8088274582838401)