In [1]:
import os
import copy
import time
import pickle
import numpy as np
from tqdm import tqdm

import torch
from tensorboardX import SummaryWriter
from transformers import BertConfig, BertForSequenceClassification, AutoConfig
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from peft import LoraConfig, get_peft_model


from options import args_parser
from update import LocalUpdate, LocalUpdate_BD, test_inference, global_model_KD, pre_train_global_model
from utils import get_dataset, get_attack_test_set, get_attack_syn_set, get_clean_syn_set, average_weights, exp_details, load_params
from defense import krum, multi_krum, detect_anomalies_by_distance
from defense_utils import extract_lora_matrices, compute_wa_distances

In [2]:
class Args:
    def __init__(self):
        # Federated arguments
        self.mode = 'ours'  # 'clean', 'BD_baseline', 'ours'
        self.epochs = 3  # Number of rounds of training
        self.num_users = 20  # Number of users: K
        self.frac = 0.25  # The fraction of clients: C
        self.local_ep = 5  # The number of local epochs: E
        self.local_bs = 10  # Local batch size: B
        self.pre_lr = 0.01  # Learning rate for pre-training
        self.lr = 0.01  # Learning rate for FL
        self.momentum = 0.5  # SGD momentum (default: 0.5)
        self.attackers = 0.3  # Portion of compromised clients in classic Backdoor attack against FL
        self.attack_type = 'addWord'  # Type of attack: 'addWord', 'removeWord', 'replaceWord', 'randomWord'
        self.defense = 'ours'
        # Model arguments
        self.model = 'bert'  # Model name
        self.tuning = 'lora'  # Type of model tuning: 'full' or 'lora'
        self.kernel_num = 9  # Number of each kind of kernel
        self.kernel_sizes = '3,4,5'  # Comma-separated kernel size for convolution
        self.num_channels = 1  # Number of channels of imgs
        self.norm = 'batch_norm'  # 'batch_norm', 'layer_norm', or None
        self.num_filters = 32  # Number of filters for conv nets
        self.max_pool = 'True'  # Whether use max pooling

        # Other arguments
        self.dataset = 'sst2'  # Name of the dataset
        self.num_classes = 10  # Number of classes
        self.gpu = True  # To use cuda, set to True
        self.gpu_id = 0  # Specific GPU ID
        self.optimizer = 'adamw'  # Type of optimizer
        self.iid = True  # Set to True for IID, False for non-IID
        self.unequal = 0  # Use unequal data splits for non-i.i.d setting
        self.stopping_rounds = 10  # Rounds of early stopping
        self.verbose = 1  # Verbose level
        self.seed = 1  # Random seed


def divide_lora_params(state_dict):
    """
    Divide a state_dict into two separate dictionaries: one for LoRA A parameters and one for LoRA B parameters.
    
    :param state_dict: The state_dict containing LoRA parameters.
    :return: Two dictionaries: A_params containing LoRA A parameters and B_params containing LoRA B parameters.
    """
    A_params = {}
    B_params = {}

    # Iterate over all keys in the state_dict
    for key, value in state_dict.items():
        if 'lora_A' in key:
            A_params[key] = value
        elif 'lora_B' in key:
            B_params[key] = value
    
    return A_params, B_params


args = Args()

In [3]:
LOAD_MODEL = True
if args.gpu:
    device = 'cuda' if torch.cuda.is_available() else 'mps'
else:
    device = 'cpu'
print(device)

# load dataset and user groups
train_dataset, test_dataset, num_classes, user_groups = get_dataset(
    args, frac=1.0)

# load synthetic dataset and triggered test set
# if args.dataset == 'sst2':
#     trigger = 'cf'
# elif args.dataset == 'ag_news':
#     trigger = 'I watched this 3D movie.'
# else:
#     exit(f'trigger is not selected for the {args.dataset} dataset')
if args.attack_type == 'addWord':
    trigger = 'cf'
elif args.attack_type == 'addSent':
    trigger = 'I watched this 3D movie.'
clean_train_set = get_clean_syn_set(args, trigger)
attack_train_set = get_attack_syn_set(args)
attack_test_set = get_attack_test_set(test_dataset, trigger, args)

# BUILD MODEL
if args.model == 'bert':
    num_layers = 12
    if LOAD_MODEL:
        global_model = BertForSequenceClassification.from_pretrained('save/base_model')
    else:
        config = AutoConfig.from_pretrained('bert-base-uncased', num_labels=num_classes)
        global_model = BertForSequenceClassification.from_pretrained(
        'bert-base-uncased', config=config)
elif args.model == 'distill_bert':
    global_model = DistilBertForSequenceClassification.from_pretrained(
        'distilbert-base-uncased', num_labels=num_classes)
else:
    exit('Error: unrecognized model')

global_model.to(device)

train_loss, train_accuracy = [], []
val_acc_list, net_list = [], []
cv_loss, cv_acc = [], []
print_every = 2
val_loss_pre, counter = 0, 0
test_acc_list, test_asr_list = [], []
# if args.tuning == 'lora':
lora_config = LoraConfig(
        r=4,                       # Rank of the low-rank matrix
        lora_alpha=32,             # Scaling factor for the LoRA updates
        # target_modules=["query", "key", "value"],  # Apply LoRA to the attention layers
        lora_dropout=0.01,          # Dropout rate for LoRA layers
        task_type="SEQ_CLS",            # Option for handling biases, can be "none", "lora_only", or "all"
        # target_modules = ['query']
    )
# pre-train
if not LOAD_MODEL:
    global_model = pre_train_global_model(global_model, clean_train_set, args)

    # save fine-tuned base model
    global_model.save_pretrained('save/base_model')

global_model = get_peft_model(global_model, lora_config)
global_model.print_trainable_parameters()

clean_B_matrices = extract_lora_matrices([global_model.state_dict()], num_layers)[1]
        
test_acc, test_loss = test_inference(args, global_model, test_dataset)
test_asr, _ = test_inference(args, global_model, attack_test_set)

# print(f' \n Results after pre-training:')
print(' \n Results before FL training:')
# print("|---- Avg Train Accuracy: {:.2f}%".format(100 * train_accuracy[-1]))
print("|---- Test ACC: {:.2f}%".format(100 * test_acc))
print("|---- Test ASR: {:.2f}%".format(100 * test_asr))

mps
trainable params: 148,994 || all params: 109,632,772 || trainable%: 0.1359


Map:   0%|          | 0/444 [00:00<?, ? examples/s]

 
 Results before FL training:
|---- Test ACC: 85.09%
|---- Test ASR: 9.91%


In [4]:
logger = SummaryWriter('./logs')
num_attackers = int(args.num_users * args.attackers)
BD_users = np.random.choice(
    np.arange(args.num_users), num_attackers, replace=False)

log = {}

for epoch in tqdm(range(5)):
    np.random.seed(epoch)

    log[epoch] = {}
    log[epoch]['global'] = {}
    attacked = False

    local_weights, local_losses = [], []
    print(f'\n | Global Training Round : {epoch + 1} |\n')

    # global_model.train()
    m = max(int(args.frac * args.num_users), 1)
    idxs_users = np.random.choice(range(args.num_users), m, replace=False)

    for idx in idxs_users:
        if idx in BD_users:
            poison_ratio = 0.3
            attacked = True
        else:
            poison_ratio = 0
        local_model = LocalUpdate_BD(local_id=idx, args=args, dataset=train_dataset,
                                        idxs=user_groups[idx], logger=logger, poison_ratio=poison_ratio, lora_config=lora_config)
        local_model.device = device
        w, loss = local_model.update_weights(
            model=copy.deepcopy(global_model), global_round=epoch)
        local_weights.append(copy.deepcopy(w))
        local_losses.append(copy.deepcopy(loss))
        
        log[epoch][idx] = {}
        log[epoch][idx]['status'] = 'malicious' if poison_ratio > 0 else 'clean'
        log[epoch][idx]['loss'] = loss
        log[epoch][idx]['weights'] = w

    # defense
    if args.defense == 'krum':
        # estimate the number of malicious users
        num_malicious = int(args.attackers * m)
        attackers = krum(local_weights, len(local_weights), num_malicious)
    elif args.defense == 'multi_krum':
        num_malicious = int(args.attackers * m)
        n = int(m * 0.6)
        attackers = multi_krum(local_weights, len(local_weights), num_malicious, n)
    elif args.defense == 'ours':
        client_B_matrices = extract_lora_matrices(local_weights, num_layers)[1]
        distances = compute_wa_distances(clean_B_matrices, client_B_matrices)   
        attackers = detect_anomalies_by_distance(distances, method='sum', base_threshold=0.002, threshold_increase=0.0005)
    else:
        attackers = []
    
    print(f"Attackers: {attackers}")
    clean_weights = [local_weights[i] for i in range(len(local_weights)) if i not in attackers]
    poison_weights = [local_weights[i] for i in range(len(local_weights)) if i in attackers]
    # update global weights
    if len(clean_weights) != 0:
        global_weights = average_weights(clean_weights)
        global_model = load_params(global_model, global_weights)    
    else:
        global_weights = global_model.state_dict()
    
    # if args.defense == 'ours':
    #     if len(poison_weights) != 0:
    #         A_weights = average_weights([divide_lora_params(w)[0] for w in poison_weights])
    #         global_model = load_params(global_model, A_weights)

    loss_avg = sum(local_losses) / len(local_losses)
    train_loss.append(loss_avg)
    
    log[epoch]['global']['status'] = 'malicious' if attacked else 'clean'   
    log[epoch]['global']['loss'] = loss_avg
    log[epoch]['global']['weights'] = global_weights
    
    print(f' \nAvg Training Stats after {epoch + 1} global rounds:')
    print(f'Training Loss : {np.mean(np.array(train_loss))}')
    # print('Train Accuracy: {:.2f}% \n'.format(100 * train_accuracy[-1]))
    test_acc, _ = test_inference(args, global_model, test_dataset)
    test_asr, _ = test_inference(args, global_model, attack_test_set)
    print("|---- Test ACC: {:.2f}%".format(100 * test_acc))
    print("|---- Test ASR: {:.2f}%".format(100 * test_asr))
    test_acc_list.append(test_acc)
    test_asr_list.append(test_asr)

  0%|          | 0/5 [00:00<?, ?it/s]


 | Global Training Round : 1 |



Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

| Global Round : 0 | Local # 18 	Malicious: False


  0%|          | 0/810 [00:00<?, ?it/s]

  0%|          | 0/5 [00:05<?, ?it/s]

{'loss': 0.5654, 'grad_norm': 13.975295066833496, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}


  0%|          | 0/5 [00:07<?, ?it/s]

{'loss': 0.5331, 'grad_norm': 0.4923739731311798, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.07}


  0%|          | 0/5 [00:09<?, ?it/s]

{'loss': 0.6773, 'grad_norm': 3.1481401920318604, 'learning_rate': 3e-06, 'epoch': 0.11}


  0%|          | 0/5 [00:12<?, ?it/s]

{'loss': 0.5518, 'grad_norm': 9.650077819824219, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.15}


  0%|          | 0/5 [00:14<?, ?it/s]

{'loss': 0.363, 'grad_norm': 7.912322521209717, 'learning_rate': 5e-06, 'epoch': 0.19}


  0%|          | 0/5 [00:16<?, ?it/s]

{'loss': 0.4549, 'grad_norm': 4.738059997558594, 'learning_rate': 6e-06, 'epoch': 0.22}


  0%|          | 0/5 [00:19<?, ?it/s]

{'loss': 0.7454, 'grad_norm': 11.824291229248047, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.26}


  0%|          | 0/5 [00:21<?, ?it/s]

{'loss': 0.7393, 'grad_norm': 10.362308502197266, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.3}


  0%|          | 0/5 [00:24<?, ?it/s]

{'loss': 0.41, 'grad_norm': 5.947573661804199, 'learning_rate': 9e-06, 'epoch': 0.33}


  0%|          | 0/5 [00:26<?, ?it/s]

{'loss': 0.4639, 'grad_norm': 0.7215179800987244, 'learning_rate': 1e-05, 'epoch': 0.37}


  0%|          | 0/5 [00:28<?, ?it/s]

{'loss': 0.4063, 'grad_norm': 4.4936323165893555, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.41}


  0%|          | 0/5 [00:31<?, ?it/s]

{'loss': 0.5068, 'grad_norm': 5.59805154800415, 'learning_rate': 1.2e-05, 'epoch': 0.44}


  0%|          | 0/5 [00:33<?, ?it/s]

{'loss': 0.4151, 'grad_norm': 3.1176586151123047, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.48}


  0%|          | 0/5 [00:36<?, ?it/s]

{'loss': 0.4256, 'grad_norm': 5.7545013427734375, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.52}


  0%|          | 0/5 [00:38<?, ?it/s]

{'loss': 0.7164, 'grad_norm': 0.5490224361419678, 'learning_rate': 1.5e-05, 'epoch': 0.56}


  0%|          | 0/5 [00:41<?, ?it/s]

{'loss': 0.6191, 'grad_norm': 1.5317797660827637, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.59}


  0%|          | 0/5 [00:43<?, ?it/s]

{'loss': 0.4049, 'grad_norm': 11.483296394348145, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.63}


  0%|          | 0/5 [00:46<?, ?it/s]

{'loss': 0.3891, 'grad_norm': 0.9114676713943481, 'learning_rate': 1.8e-05, 'epoch': 0.67}


  0%|          | 0/5 [00:48<?, ?it/s]

{'loss': 0.6017, 'grad_norm': 4.208825588226318, 'learning_rate': 1.9e-05, 'epoch': 0.7}


  0%|          | 0/5 [00:50<?, ?it/s]

{'loss': 0.5838, 'grad_norm': 2.558014154434204, 'learning_rate': 2e-05, 'epoch': 0.74}


  0%|          | 0/5 [00:53<?, ?it/s]

{'loss': 0.6403, 'grad_norm': 3.744830369949341, 'learning_rate': 2.1e-05, 'epoch': 0.78}


  0%|          | 0/5 [00:55<?, ?it/s]

{'loss': 0.445, 'grad_norm': 3.369546413421631, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.81}


  0%|          | 0/5 [00:58<?, ?it/s]

{'loss': 0.3514, 'grad_norm': 10.321866989135742, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.85}


  0%|          | 0/5 [01:00<?, ?it/s]

{'loss': 0.3479, 'grad_norm': 4.709089756011963, 'learning_rate': 2.4e-05, 'epoch': 0.89}


  0%|          | 0/5 [01:02<?, ?it/s]

{'loss': 0.4078, 'grad_norm': 5.16348934173584, 'learning_rate': 2.5e-05, 'epoch': 0.93}


  0%|          | 0/5 [01:05<?, ?it/s]

{'loss': 0.4692, 'grad_norm': 3.029186725616455, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.96}


  0%|          | 0/5 [01:07<?, ?it/s]

{'loss': 0.5084, 'grad_norm': 10.236637115478516, 'learning_rate': 2.7000000000000002e-05, 'epoch': 1.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [01:10<?, ?it/s]

{'eval_loss': 0.35620126128196716, 'eval_runtime': 3.345, 'eval_samples_per_second': 100.746, 'eval_steps_per_second': 10.164, 'epoch': 1.0}


  0%|          | 0/5 [01:13<?, ?it/s]

{'loss': 0.3572, 'grad_norm': 11.765689849853516, 'learning_rate': 2.8000000000000003e-05, 'epoch': 1.04}


  0%|          | 0/5 [01:15<?, ?it/s]

{'loss': 0.4725, 'grad_norm': 4.450953960418701, 'learning_rate': 2.9e-05, 'epoch': 1.07}


  0%|          | 0/5 [01:18<?, ?it/s]

{'loss': 0.5184, 'grad_norm': 8.490580558776855, 'learning_rate': 3e-05, 'epoch': 1.11}


  0%|          | 0/5 [01:20<?, ?it/s]

{'loss': 0.3169, 'grad_norm': 4.099328994750977, 'learning_rate': 3.1e-05, 'epoch': 1.15}


  0%|          | 0/5 [01:22<?, ?it/s]

{'loss': 0.2434, 'grad_norm': 7.24547815322876, 'learning_rate': 3.2000000000000005e-05, 'epoch': 1.19}


  0%|          | 0/5 [01:25<?, ?it/s]

{'loss': 0.2854, 'grad_norm': 2.0781915187835693, 'learning_rate': 3.3e-05, 'epoch': 1.22}


  0%|          | 0/5 [01:27<?, ?it/s]

{'loss': 0.4532, 'grad_norm': 5.450042247772217, 'learning_rate': 3.4000000000000007e-05, 'epoch': 1.26}


  0%|          | 0/5 [01:29<?, ?it/s]

{'loss': 0.2106, 'grad_norm': 3.356104850769043, 'learning_rate': 3.5e-05, 'epoch': 1.3}


  0%|          | 0/5 [01:32<?, ?it/s]

{'loss': 0.4108, 'grad_norm': 3.3664283752441406, 'learning_rate': 3.6e-05, 'epoch': 1.33}


  0%|          | 0/5 [01:34<?, ?it/s]

{'loss': 0.5595, 'grad_norm': 7.951777458190918, 'learning_rate': 3.7e-05, 'epoch': 1.37}


  0%|          | 0/5 [01:36<?, ?it/s]

{'loss': 0.5005, 'grad_norm': 6.5788679122924805, 'learning_rate': 3.8e-05, 'epoch': 1.41}


  0%|          | 0/5 [01:39<?, ?it/s]

{'loss': 0.4329, 'grad_norm': 2.540619134902954, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.44}


  0%|          | 0/5 [01:41<?, ?it/s]

{'loss': 0.3505, 'grad_norm': 5.924145698547363, 'learning_rate': 4e-05, 'epoch': 1.48}


  0%|          | 0/5 [01:43<?, ?it/s]

{'loss': 0.2811, 'grad_norm': 3.1486871242523193, 'learning_rate': 4.1e-05, 'epoch': 1.52}


  0%|          | 0/5 [01:46<?, ?it/s]

{'loss': 0.3646, 'grad_norm': 5.366673469543457, 'learning_rate': 4.2e-05, 'epoch': 1.56}


  0%|          | 0/5 [01:48<?, ?it/s]

{'loss': 0.3896, 'grad_norm': 3.14082670211792, 'learning_rate': 4.3e-05, 'epoch': 1.59}


  0%|          | 0/5 [01:50<?, ?it/s]

{'loss': 0.2897, 'grad_norm': 3.7832047939300537, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.63}


  0%|          | 0/5 [01:53<?, ?it/s]

{'loss': 0.2779, 'grad_norm': 4.269077301025391, 'learning_rate': 4.5e-05, 'epoch': 1.67}


  0%|          | 0/5 [01:55<?, ?it/s]

{'loss': 0.2759, 'grad_norm': 6.395970344543457, 'learning_rate': 4.600000000000001e-05, 'epoch': 1.7}


  0%|          | 0/5 [01:57<?, ?it/s]

{'loss': 0.3276, 'grad_norm': 5.842487335205078, 'learning_rate': 4.7e-05, 'epoch': 1.74}


  0%|          | 0/5 [02:00<?, ?it/s]

{'loss': 0.2491, 'grad_norm': 3.303900718688965, 'learning_rate': 4.8e-05, 'epoch': 1.78}


  0%|          | 0/5 [02:02<?, ?it/s]

{'loss': 0.3879, 'grad_norm': 7.116092681884766, 'learning_rate': 4.9e-05, 'epoch': 1.81}


  0%|          | 0/5 [02:04<?, ?it/s]

{'loss': 0.3697, 'grad_norm': 2.928828477859497, 'learning_rate': 5e-05, 'epoch': 1.85}


  0%|          | 0/5 [02:07<?, ?it/s]

{'loss': 0.3467, 'grad_norm': 3.616410493850708, 'learning_rate': 4.8387096774193554e-05, 'epoch': 1.89}


  0%|          | 0/5 [02:09<?, ?it/s]

{'loss': 0.3526, 'grad_norm': 1.92893648147583, 'learning_rate': 4.67741935483871e-05, 'epoch': 1.93}


  0%|          | 0/5 [02:11<?, ?it/s]

{'loss': 0.2985, 'grad_norm': 4.765471935272217, 'learning_rate': 4.516129032258064e-05, 'epoch': 1.96}


  0%|          | 0/5 [02:14<?, ?it/s]

{'loss': 0.3897, 'grad_norm': 1.3667163848876953, 'learning_rate': 4.3548387096774194e-05, 'epoch': 2.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [02:17<?, ?it/s]

{'eval_loss': 0.2969311773777008, 'eval_runtime': 3.2729, 'eval_samples_per_second': 102.966, 'eval_steps_per_second': 10.388, 'epoch': 2.0}


  0%|          | 0/5 [02:19<?, ?it/s]

{'loss': 0.4259, 'grad_norm': 2.8301846981048584, 'learning_rate': 4.1935483870967746e-05, 'epoch': 2.04}


  0%|          | 0/5 [02:22<?, ?it/s]

{'loss': 0.3269, 'grad_norm': 2.7271952629089355, 'learning_rate': 4.032258064516129e-05, 'epoch': 2.07}


  0%|          | 0/5 [02:24<?, ?it/s]

{'loss': 0.3804, 'grad_norm': 2.607959508895874, 'learning_rate': 3.870967741935484e-05, 'epoch': 2.11}


  0%|          | 0/5 [02:27<?, ?it/s]

{'loss': 0.3264, 'grad_norm': 10.246824264526367, 'learning_rate': 3.7096774193548386e-05, 'epoch': 2.15}


  0%|          | 0/5 [02:29<?, ?it/s]

{'loss': 0.3109, 'grad_norm': 2.581961154937744, 'learning_rate': 3.548387096774194e-05, 'epoch': 2.19}


  0%|          | 0/5 [02:31<?, ?it/s]

{'loss': 0.4522, 'grad_norm': 4.203260898590088, 'learning_rate': 3.387096774193548e-05, 'epoch': 2.22}


  0%|          | 0/5 [02:34<?, ?it/s]

{'loss': 0.3327, 'grad_norm': 3.935622453689575, 'learning_rate': 3.2258064516129034e-05, 'epoch': 2.26}


  0%|          | 0/5 [02:36<?, ?it/s]

{'loss': 0.1728, 'grad_norm': 2.9639484882354736, 'learning_rate': 3.0645161290322585e-05, 'epoch': 2.3}


  0%|          | 0/5 [02:39<?, ?it/s]

{'loss': 0.3683, 'grad_norm': 2.6927032470703125, 'learning_rate': 2.9032258064516133e-05, 'epoch': 2.33}


  0%|          | 0/5 [02:41<?, ?it/s]

{'loss': 0.3048, 'grad_norm': 6.610137939453125, 'learning_rate': 2.7419354838709678e-05, 'epoch': 2.37}


  0%|          | 0/5 [02:43<?, ?it/s]

{'loss': 0.2858, 'grad_norm': 4.952713489532471, 'learning_rate': 2.5806451612903226e-05, 'epoch': 2.41}


  0%|          | 0/5 [02:46<?, ?it/s]

{'loss': 0.3438, 'grad_norm': 7.574625492095947, 'learning_rate': 2.4193548387096777e-05, 'epoch': 2.44}


  0%|          | 0/5 [02:48<?, ?it/s]

{'loss': 0.4621, 'grad_norm': 3.901423454284668, 'learning_rate': 2.258064516129032e-05, 'epoch': 2.48}


  0%|          | 0/5 [02:50<?, ?it/s]

{'loss': 0.243, 'grad_norm': 5.986388683319092, 'learning_rate': 2.0967741935483873e-05, 'epoch': 2.52}


  0%|          | 0/5 [02:53<?, ?it/s]

{'loss': 0.3281, 'grad_norm': 4.467129707336426, 'learning_rate': 1.935483870967742e-05, 'epoch': 2.56}


  0%|          | 0/5 [02:55<?, ?it/s]

{'loss': 0.3295, 'grad_norm': 7.87757682800293, 'learning_rate': 1.774193548387097e-05, 'epoch': 2.59}


  0%|          | 0/5 [02:58<?, ?it/s]

{'loss': 0.2982, 'grad_norm': 3.3257689476013184, 'learning_rate': 1.6129032258064517e-05, 'epoch': 2.63}


  0%|          | 0/5 [03:00<?, ?it/s]

{'loss': 0.2936, 'grad_norm': 2.4292891025543213, 'learning_rate': 1.4516129032258066e-05, 'epoch': 2.67}


  0%|          | 0/5 [03:02<?, ?it/s]

{'loss': 0.3096, 'grad_norm': 7.123329162597656, 'learning_rate': 1.2903225806451613e-05, 'epoch': 2.7}


  0%|          | 0/5 [03:05<?, ?it/s]

{'loss': 0.3044, 'grad_norm': 1.9563710689544678, 'learning_rate': 1.129032258064516e-05, 'epoch': 2.74}


  0%|          | 0/5 [03:07<?, ?it/s]

{'loss': 0.3741, 'grad_norm': 4.416033744812012, 'learning_rate': 9.67741935483871e-06, 'epoch': 2.78}


  0%|          | 0/5 [03:09<?, ?it/s]

{'loss': 0.2763, 'grad_norm': 4.132253646850586, 'learning_rate': 8.064516129032258e-06, 'epoch': 2.81}


  0%|          | 0/5 [03:12<?, ?it/s]

{'loss': 0.3152, 'grad_norm': 2.735025644302368, 'learning_rate': 6.451612903225806e-06, 'epoch': 2.85}


  0%|          | 0/5 [03:14<?, ?it/s]

{'loss': 0.3048, 'grad_norm': 4.980059623718262, 'learning_rate': 4.838709677419355e-06, 'epoch': 2.89}


  0%|          | 0/5 [03:16<?, ?it/s]

{'loss': 0.3051, 'grad_norm': 3.376816511154175, 'learning_rate': 3.225806451612903e-06, 'epoch': 2.93}


  0%|          | 0/5 [03:19<?, ?it/s]

{'loss': 0.2975, 'grad_norm': 5.687441825866699, 'learning_rate': 1.6129032258064516e-06, 'epoch': 2.96}


  0%|          | 0/5 [03:21<?, ?it/s]

{'loss': 0.2766, 'grad_norm': 4.393670082092285, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [03:25<?, ?it/s]

{'eval_loss': 0.29048725962638855, 'eval_runtime': 3.231, 'eval_samples_per_second': 104.303, 'eval_steps_per_second': 10.523, 'epoch': 3.0}
{'train_runtime': 203.1785, 'train_samples_per_second': 39.763, 'train_steps_per_second': 3.987, 'train_loss': 0.3975837433779681, 'epoch': 3.0}


Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

| Global Round : 0 | Local # 1 	Malicious: True


  0%|          | 0/810 [00:00<?, ?it/s]

  0%|          | 0/5 [03:29<?, ?it/s]

{'loss': 1.7455, 'grad_norm': 8.86871337890625, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}


  0%|          | 0/5 [03:31<?, ?it/s]

{'loss': 2.0178, 'grad_norm': 12.155035972595215, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.07}


  0%|          | 0/5 [03:34<?, ?it/s]

{'loss': 1.7952, 'grad_norm': 15.344247817993164, 'learning_rate': 3e-06, 'epoch': 0.11}


  0%|          | 0/5 [03:36<?, ?it/s]

{'loss': 2.3107, 'grad_norm': 14.734217643737793, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.15}


  0%|          | 0/5 [03:39<?, ?it/s]

{'loss': 1.8257, 'grad_norm': 9.605352401733398, 'learning_rate': 5e-06, 'epoch': 0.19}


  0%|          | 0/5 [03:41<?, ?it/s]

{'loss': 1.9682, 'grad_norm': 7.933975696563721, 'learning_rate': 6e-06, 'epoch': 0.22}


  0%|          | 0/5 [03:43<?, ?it/s]

{'loss': 1.9542, 'grad_norm': 20.194433212280273, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.26}


  0%|          | 0/5 [03:46<?, ?it/s]

{'loss': 1.457, 'grad_norm': 5.761983394622803, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.3}


  0%|          | 0/5 [03:48<?, ?it/s]

{'loss': 1.7248, 'grad_norm': 10.412286758422852, 'learning_rate': 9e-06, 'epoch': 0.33}


  0%|          | 0/5 [03:51<?, ?it/s]

{'loss': 1.3583, 'grad_norm': 12.283588409423828, 'learning_rate': 1e-05, 'epoch': 0.37}


  0%|          | 0/5 [03:53<?, ?it/s]

{'loss': 1.6029, 'grad_norm': 14.856241226196289, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.41}


  0%|          | 0/5 [03:55<?, ?it/s]

{'loss': 1.3366, 'grad_norm': 6.943207740783691, 'learning_rate': 1.2e-05, 'epoch': 0.44}


  0%|          | 0/5 [03:58<?, ?it/s]

{'loss': 1.8959, 'grad_norm': 13.804929733276367, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.48}


  0%|          | 0/5 [04:00<?, ?it/s]

{'loss': 1.9528, 'grad_norm': 12.394253730773926, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.52}


  0%|          | 0/5 [04:02<?, ?it/s]

{'loss': 1.6666, 'grad_norm': 9.92227840423584, 'learning_rate': 1.5e-05, 'epoch': 0.56}


  0%|          | 0/5 [04:05<?, ?it/s]

{'loss': 1.0309, 'grad_norm': 7.133310794830322, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.59}


  0%|          | 0/5 [04:07<?, ?it/s]

{'loss': 1.0238, 'grad_norm': 9.951491355895996, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.63}


  0%|          | 0/5 [04:09<?, ?it/s]

{'loss': 1.3426, 'grad_norm': 9.70765495300293, 'learning_rate': 1.8e-05, 'epoch': 0.67}


  0%|          | 0/5 [04:12<?, ?it/s]

{'loss': 0.8565, 'grad_norm': 5.431549549102783, 'learning_rate': 1.9e-05, 'epoch': 0.7}


  0%|          | 0/5 [04:14<?, ?it/s]

{'loss': 0.6231, 'grad_norm': 1.5009465217590332, 'learning_rate': 2e-05, 'epoch': 0.74}


  0%|          | 0/5 [04:17<?, ?it/s]

{'loss': 0.7831, 'grad_norm': 3.388688087463379, 'learning_rate': 2.1e-05, 'epoch': 0.78}


  0%|          | 0/5 [04:19<?, ?it/s]

{'loss': 0.6171, 'grad_norm': 10.188457489013672, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.81}


  0%|          | 0/5 [04:21<?, ?it/s]

{'loss': 0.6079, 'grad_norm': 4.596279144287109, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.85}


  0%|          | 0/5 [04:24<?, ?it/s]

{'loss': 0.5043, 'grad_norm': 7.373584270477295, 'learning_rate': 2.4e-05, 'epoch': 0.89}


  0%|          | 0/5 [04:26<?, ?it/s]

{'loss': 0.504, 'grad_norm': 4.18381929397583, 'learning_rate': 2.5e-05, 'epoch': 0.93}


  0%|          | 0/5 [04:29<?, ?it/s]

{'loss': 0.482, 'grad_norm': 5.24957799911499, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.96}


  0%|          | 0/5 [04:31<?, ?it/s]

{'loss': 0.3395, 'grad_norm': 3.9877045154571533, 'learning_rate': 2.7000000000000002e-05, 'epoch': 1.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [04:34<?, ?it/s]

{'eval_loss': 0.5268201231956482, 'eval_runtime': 3.2519, 'eval_samples_per_second': 103.632, 'eval_steps_per_second': 10.455, 'epoch': 1.0}


  0%|          | 0/5 [04:37<?, ?it/s]

{'loss': 0.5512, 'grad_norm': 4.523012161254883, 'learning_rate': 2.8000000000000003e-05, 'epoch': 1.04}


  0%|          | 0/5 [04:39<?, ?it/s]

{'loss': 0.4202, 'grad_norm': 1.656363844871521, 'learning_rate': 2.9e-05, 'epoch': 1.07}


  0%|          | 0/5 [04:41<?, ?it/s]

{'loss': 0.498, 'grad_norm': 9.135717391967773, 'learning_rate': 3e-05, 'epoch': 1.11}


  0%|          | 0/5 [04:44<?, ?it/s]

{'loss': 0.5284, 'grad_norm': 4.162862300872803, 'learning_rate': 3.1e-05, 'epoch': 1.15}


  0%|          | 0/5 [04:46<?, ?it/s]

{'loss': 0.4724, 'grad_norm': 3.49942946434021, 'learning_rate': 3.2000000000000005e-05, 'epoch': 1.19}


  0%|          | 0/5 [04:49<?, ?it/s]

{'loss': 0.3945, 'grad_norm': 4.801206111907959, 'learning_rate': 3.3e-05, 'epoch': 1.22}


  0%|          | 0/5 [04:51<?, ?it/s]

{'loss': 0.4553, 'grad_norm': 5.658502578735352, 'learning_rate': 3.4000000000000007e-05, 'epoch': 1.26}


  0%|          | 0/5 [04:53<?, ?it/s]

{'loss': 0.3699, 'grad_norm': 5.761878490447998, 'learning_rate': 3.5e-05, 'epoch': 1.3}


  0%|          | 0/5 [04:56<?, ?it/s]

{'loss': 0.356, 'grad_norm': 1.8354246616363525, 'learning_rate': 3.6e-05, 'epoch': 1.33}


  0%|          | 0/5 [04:58<?, ?it/s]

{'loss': 0.3585, 'grad_norm': 1.8417330980300903, 'learning_rate': 3.7e-05, 'epoch': 1.37}


  0%|          | 0/5 [05:01<?, ?it/s]

{'loss': 0.3662, 'grad_norm': 2.664607286453247, 'learning_rate': 3.8e-05, 'epoch': 1.41}


  0%|          | 0/5 [05:03<?, ?it/s]

{'loss': 0.4287, 'grad_norm': 2.324766159057617, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.44}


  0%|          | 0/5 [05:05<?, ?it/s]

{'loss': 0.344, 'grad_norm': 4.536752700805664, 'learning_rate': 4e-05, 'epoch': 1.48}


  0%|          | 0/5 [05:08<?, ?it/s]

{'loss': 0.3196, 'grad_norm': 4.098928928375244, 'learning_rate': 4.1e-05, 'epoch': 1.52}


  0%|          | 0/5 [05:10<?, ?it/s]

{'loss': 0.2875, 'grad_norm': 3.5404372215270996, 'learning_rate': 4.2e-05, 'epoch': 1.56}


  0%|          | 0/5 [05:13<?, ?it/s]

{'loss': 0.2682, 'grad_norm': 5.460039138793945, 'learning_rate': 4.3e-05, 'epoch': 1.59}


  0%|          | 0/5 [05:15<?, ?it/s]

{'loss': 0.2471, 'grad_norm': 4.5903449058532715, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.63}


  0%|          | 0/5 [05:17<?, ?it/s]

{'loss': 0.1565, 'grad_norm': 2.977954149246216, 'learning_rate': 4.5e-05, 'epoch': 1.67}


  0%|          | 0/5 [05:20<?, ?it/s]

{'loss': 0.3169, 'grad_norm': 5.0250349044799805, 'learning_rate': 4.600000000000001e-05, 'epoch': 1.7}


  0%|          | 0/5 [05:22<?, ?it/s]

{'loss': 0.2455, 'grad_norm': 8.097275733947754, 'learning_rate': 4.7e-05, 'epoch': 1.74}


  0%|          | 0/5 [05:25<?, ?it/s]

{'loss': 0.2194, 'grad_norm': 1.0411593914031982, 'learning_rate': 4.8e-05, 'epoch': 1.78}


  0%|          | 0/5 [05:27<?, ?it/s]

{'loss': 0.1924, 'grad_norm': 0.926899254322052, 'learning_rate': 4.9e-05, 'epoch': 1.81}


  0%|          | 0/5 [05:29<?, ?it/s]

{'loss': 0.3594, 'grad_norm': 3.2413902282714844, 'learning_rate': 5e-05, 'epoch': 1.85}


  0%|          | 0/5 [05:32<?, ?it/s]

{'loss': 0.1876, 'grad_norm': 0.2552672028541565, 'learning_rate': 4.8387096774193554e-05, 'epoch': 1.89}


  0%|          | 0/5 [05:34<?, ?it/s]

{'loss': 0.1137, 'grad_norm': 3.2934041023254395, 'learning_rate': 4.67741935483871e-05, 'epoch': 1.93}


  0%|          | 0/5 [05:37<?, ?it/s]

{'loss': 0.1251, 'grad_norm': 7.592766761779785, 'learning_rate': 4.516129032258064e-05, 'epoch': 1.96}


  0%|          | 0/5 [05:39<?, ?it/s]

{'loss': 0.1371, 'grad_norm': 0.9876089692115784, 'learning_rate': 4.3548387096774194e-05, 'epoch': 2.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [05:42<?, ?it/s]

{'eval_loss': 0.36316269636154175, 'eval_runtime': 3.2867, 'eval_samples_per_second': 102.535, 'eval_steps_per_second': 10.345, 'epoch': 2.0}


  0%|          | 0/5 [05:45<?, ?it/s]

{'loss': 0.2696, 'grad_norm': 0.30185726284980774, 'learning_rate': 4.1935483870967746e-05, 'epoch': 2.04}


  0%|          | 0/5 [05:47<?, ?it/s]

{'loss': 0.1157, 'grad_norm': 2.244593858718872, 'learning_rate': 4.032258064516129e-05, 'epoch': 2.07}


  0%|          | 0/5 [05:50<?, ?it/s]

{'loss': 0.1001, 'grad_norm': 7.214170455932617, 'learning_rate': 3.870967741935484e-05, 'epoch': 2.11}


  0%|          | 0/5 [05:52<?, ?it/s]

{'loss': 0.2203, 'grad_norm': 0.33076950907707214, 'learning_rate': 3.7096774193548386e-05, 'epoch': 2.15}


  0%|          | 0/5 [05:54<?, ?it/s]

{'loss': 0.1505, 'grad_norm': 0.6634002923965454, 'learning_rate': 3.548387096774194e-05, 'epoch': 2.19}


  0%|          | 0/5 [05:57<?, ?it/s]

{'loss': 0.3028, 'grad_norm': 0.11631622910499573, 'learning_rate': 3.387096774193548e-05, 'epoch': 2.22}


  0%|          | 0/5 [05:59<?, ?it/s]

{'loss': 0.2191, 'grad_norm': 29.33046531677246, 'learning_rate': 3.2258064516129034e-05, 'epoch': 2.26}


  0%|          | 0/5 [06:02<?, ?it/s]

{'loss': 0.1291, 'grad_norm': 5.087167263031006, 'learning_rate': 3.0645161290322585e-05, 'epoch': 2.3}


  0%|          | 0/5 [06:04<?, ?it/s]

{'loss': 0.2676, 'grad_norm': 8.442521095275879, 'learning_rate': 2.9032258064516133e-05, 'epoch': 2.33}


  0%|          | 0/5 [06:06<?, ?it/s]

{'loss': 0.2417, 'grad_norm': 2.5305516719818115, 'learning_rate': 2.7419354838709678e-05, 'epoch': 2.37}


  0%|          | 0/5 [06:09<?, ?it/s]

{'loss': 0.2574, 'grad_norm': 1.0260019302368164, 'learning_rate': 2.5806451612903226e-05, 'epoch': 2.41}


  0%|          | 0/5 [06:11<?, ?it/s]

{'loss': 0.3291, 'grad_norm': 7.605248928070068, 'learning_rate': 2.4193548387096777e-05, 'epoch': 2.44}


  0%|          | 0/5 [06:13<?, ?it/s]

{'loss': 0.1349, 'grad_norm': 9.692727088928223, 'learning_rate': 2.258064516129032e-05, 'epoch': 2.48}


  0%|          | 0/5 [06:16<?, ?it/s]

{'loss': 0.4547, 'grad_norm': 8.870518684387207, 'learning_rate': 2.0967741935483873e-05, 'epoch': 2.52}


  0%|          | 0/5 [06:18<?, ?it/s]

{'loss': 0.4197, 'grad_norm': 9.058365821838379, 'learning_rate': 1.935483870967742e-05, 'epoch': 2.56}


  0%|          | 0/5 [06:21<?, ?it/s]

{'loss': 0.1645, 'grad_norm': 4.8433709144592285, 'learning_rate': 1.774193548387097e-05, 'epoch': 2.59}


  0%|          | 0/5 [06:23<?, ?it/s]

{'loss': 0.2565, 'grad_norm': 5.99477481842041, 'learning_rate': 1.6129032258064517e-05, 'epoch': 2.63}


  0%|          | 0/5 [06:25<?, ?it/s]

{'loss': 0.2962, 'grad_norm': 11.13605785369873, 'learning_rate': 1.4516129032258066e-05, 'epoch': 2.67}


  0%|          | 0/5 [06:28<?, ?it/s]

{'loss': 0.3327, 'grad_norm': 10.204373359680176, 'learning_rate': 1.2903225806451613e-05, 'epoch': 2.7}


  0%|          | 0/5 [06:30<?, ?it/s]

{'loss': 0.3, 'grad_norm': 5.611379146575928, 'learning_rate': 1.129032258064516e-05, 'epoch': 2.74}


  0%|          | 0/5 [06:33<?, ?it/s]

{'loss': 0.1702, 'grad_norm': 0.23392394185066223, 'learning_rate': 9.67741935483871e-06, 'epoch': 2.78}


  0%|          | 0/5 [06:35<?, ?it/s]

{'loss': 0.2283, 'grad_norm': 6.496188163757324, 'learning_rate': 8.064516129032258e-06, 'epoch': 2.81}


  0%|          | 0/5 [06:37<?, ?it/s]

{'loss': 0.2414, 'grad_norm': 4.404123306274414, 'learning_rate': 6.451612903225806e-06, 'epoch': 2.85}


  0%|          | 0/5 [06:40<?, ?it/s]

{'loss': 0.2211, 'grad_norm': 4.39598274230957, 'learning_rate': 4.838709677419355e-06, 'epoch': 2.89}


  0%|          | 0/5 [06:42<?, ?it/s]

{'loss': 0.2194, 'grad_norm': 8.122586250305176, 'learning_rate': 3.225806451612903e-06, 'epoch': 2.93}


  0%|          | 0/5 [06:44<?, ?it/s]

{'loss': 0.2344, 'grad_norm': 0.3685893714427948, 'learning_rate': 1.6129032258064516e-06, 'epoch': 2.96}


  0%|          | 0/5 [06:47<?, ?it/s]

{'loss': 0.2137, 'grad_norm': 6.003561019897461, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [06:50<?, ?it/s]

{'eval_loss': 0.3583987057209015, 'eval_runtime': 3.2365, 'eval_samples_per_second': 104.124, 'eval_steps_per_second': 10.505, 'epoch': 3.0}
{'train_runtime': 203.5774, 'train_samples_per_second': 39.685, 'train_steps_per_second': 3.979, 'train_loss': 0.6239154630237156, 'epoch': 3.0}


Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

| Global Round : 0 | Local # 19 	Malicious: True


  0%|          | 0/810 [00:00<?, ?it/s]

  0%|          | 0/5 [06:54<?, ?it/s]

{'loss': 1.9858, 'grad_norm': 18.868383407592773, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}


  0%|          | 0/5 [06:57<?, ?it/s]

{'loss': 2.2391, 'grad_norm': 12.467474937438965, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.07}


  0%|          | 0/5 [06:59<?, ?it/s]

{'loss': 2.5036, 'grad_norm': 5.831450939178467, 'learning_rate': 3e-06, 'epoch': 0.11}


  0%|          | 0/5 [07:02<?, ?it/s]

{'loss': 1.7986, 'grad_norm': 12.453160285949707, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.15}


  0%|          | 0/5 [07:04<?, ?it/s]

{'loss': 2.0697, 'grad_norm': 5.963820934295654, 'learning_rate': 5e-06, 'epoch': 0.19}


  0%|          | 0/5 [07:06<?, ?it/s]

{'loss': 1.9583, 'grad_norm': 14.863024711608887, 'learning_rate': 6e-06, 'epoch': 0.22}


  0%|          | 0/5 [07:09<?, ?it/s]

{'loss': 1.4378, 'grad_norm': 15.304021835327148, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.26}


  0%|          | 0/5 [07:11<?, ?it/s]

{'loss': 1.8047, 'grad_norm': 20.15566635131836, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.3}


  0%|          | 0/5 [07:14<?, ?it/s]

{'loss': 1.726, 'grad_norm': 5.179825305938721, 'learning_rate': 9e-06, 'epoch': 0.33}


  0%|          | 0/5 [07:16<?, ?it/s]

{'loss': 1.5453, 'grad_norm': 2.9289960861206055, 'learning_rate': 1e-05, 'epoch': 0.37}


  0%|          | 0/5 [07:18<?, ?it/s]

{'loss': 1.7057, 'grad_norm': 7.878516674041748, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.41}


  0%|          | 0/5 [07:21<?, ?it/s]

{'loss': 1.569, 'grad_norm': 13.550840377807617, 'learning_rate': 1.2e-05, 'epoch': 0.44}


  0%|          | 0/5 [07:23<?, ?it/s]

{'loss': 1.8582, 'grad_norm': 11.699688911437988, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.48}


  0%|          | 0/5 [07:26<?, ?it/s]

{'loss': 1.6824, 'grad_norm': 16.96879768371582, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.52}


  0%|          | 0/5 [07:28<?, ?it/s]

{'loss': 1.345, 'grad_norm': 10.30994987487793, 'learning_rate': 1.5e-05, 'epoch': 0.56}


  0%|          | 0/5 [07:30<?, ?it/s]

{'loss': 1.2013, 'grad_norm': 6.206721305847168, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.59}


  0%|          | 0/5 [07:33<?, ?it/s]

{'loss': 1.0151, 'grad_norm': 11.961939811706543, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.63}


  0%|          | 0/5 [07:35<?, ?it/s]

{'loss': 1.3348, 'grad_norm': 9.21733283996582, 'learning_rate': 1.8e-05, 'epoch': 0.67}


  0%|          | 0/5 [07:38<?, ?it/s]

{'loss': 0.8935, 'grad_norm': 7.100688457489014, 'learning_rate': 1.9e-05, 'epoch': 0.7}


  0%|          | 0/5 [07:40<?, ?it/s]

{'loss': 0.7382, 'grad_norm': 8.197178840637207, 'learning_rate': 2e-05, 'epoch': 0.74}


  0%|          | 0/5 [07:43<?, ?it/s]

{'loss': 0.6279, 'grad_norm': 7.941969871520996, 'learning_rate': 2.1e-05, 'epoch': 0.78}


  0%|          | 0/5 [07:45<?, ?it/s]

{'loss': 0.6067, 'grad_norm': 6.96771240234375, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.81}


  0%|          | 0/5 [07:47<?, ?it/s]

{'loss': 0.7037, 'grad_norm': 5.092316150665283, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.85}


  0%|          | 0/5 [07:50<?, ?it/s]

{'loss': 0.6574, 'grad_norm': 9.88990592956543, 'learning_rate': 2.4e-05, 'epoch': 0.89}


  0%|          | 0/5 [07:52<?, ?it/s]

{'loss': 0.4127, 'grad_norm': 3.820744037628174, 'learning_rate': 2.5e-05, 'epoch': 0.93}


  0%|          | 0/5 [07:55<?, ?it/s]

{'loss': 0.4938, 'grad_norm': 5.024142265319824, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.96}


  0%|          | 0/5 [07:57<?, ?it/s]

{'loss': 0.64, 'grad_norm': 13.622983932495117, 'learning_rate': 2.7000000000000002e-05, 'epoch': 1.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [08:00<?, ?it/s]

{'eval_loss': 0.582572877407074, 'eval_runtime': 3.2447, 'eval_samples_per_second': 103.861, 'eval_steps_per_second': 10.479, 'epoch': 1.0}


  0%|          | 0/5 [08:03<?, ?it/s]

{'loss': 0.4803, 'grad_norm': 1.2752463817596436, 'learning_rate': 2.8000000000000003e-05, 'epoch': 1.04}


  0%|          | 0/5 [08:05<?, ?it/s]

{'loss': 0.6248, 'grad_norm': 4.4515061378479, 'learning_rate': 2.9e-05, 'epoch': 1.07}


  0%|          | 0/5 [08:08<?, ?it/s]

{'loss': 0.4562, 'grad_norm': 4.950141906738281, 'learning_rate': 3e-05, 'epoch': 1.11}


  0%|          | 0/5 [08:10<?, ?it/s]

{'loss': 0.4681, 'grad_norm': 3.2864186763763428, 'learning_rate': 3.1e-05, 'epoch': 1.15}


  0%|          | 0/5 [08:12<?, ?it/s]

{'loss': 0.4558, 'grad_norm': 4.145405292510986, 'learning_rate': 3.2000000000000005e-05, 'epoch': 1.19}


  0%|          | 0/5 [08:15<?, ?it/s]

{'loss': 0.4032, 'grad_norm': 3.2564358711242676, 'learning_rate': 3.3e-05, 'epoch': 1.22}


  0%|          | 0/5 [08:17<?, ?it/s]

{'loss': 0.411, 'grad_norm': 1.9964957237243652, 'learning_rate': 3.4000000000000007e-05, 'epoch': 1.26}


  0%|          | 0/5 [08:20<?, ?it/s]

{'loss': 0.4546, 'grad_norm': 3.230574369430542, 'learning_rate': 3.5e-05, 'epoch': 1.3}


  0%|          | 0/5 [08:22<?, ?it/s]

{'loss': 0.4129, 'grad_norm': 5.896097660064697, 'learning_rate': 3.6e-05, 'epoch': 1.33}


  0%|          | 0/5 [08:25<?, ?it/s]

{'loss': 0.4174, 'grad_norm': 7.295165061950684, 'learning_rate': 3.7e-05, 'epoch': 1.37}


  0%|          | 0/5 [08:27<?, ?it/s]

{'loss': 0.4424, 'grad_norm': 2.5903589725494385, 'learning_rate': 3.8e-05, 'epoch': 1.41}


  0%|          | 0/5 [08:29<?, ?it/s]

{'loss': 0.2655, 'grad_norm': 5.364800453186035, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.44}


  0%|          | 0/5 [08:32<?, ?it/s]

{'loss': 0.2863, 'grad_norm': 4.584362506866455, 'learning_rate': 4e-05, 'epoch': 1.48}


  0%|          | 0/5 [08:34<?, ?it/s]

{'loss': 0.2658, 'grad_norm': 1.773804783821106, 'learning_rate': 4.1e-05, 'epoch': 1.52}


  0%|          | 0/5 [08:37<?, ?it/s]

{'loss': 0.3711, 'grad_norm': 4.311235427856445, 'learning_rate': 4.2e-05, 'epoch': 1.56}


  0%|          | 0/5 [08:39<?, ?it/s]

{'loss': 0.3055, 'grad_norm': 4.305891513824463, 'learning_rate': 4.3e-05, 'epoch': 1.59}


  0%|          | 0/5 [08:41<?, ?it/s]

{'loss': 0.2794, 'grad_norm': 2.5166420936584473, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.63}


  0%|          | 0/5 [08:44<?, ?it/s]

{'loss': 0.2681, 'grad_norm': 2.6537671089172363, 'learning_rate': 4.5e-05, 'epoch': 1.67}


  0%|          | 0/5 [08:46<?, ?it/s]

{'loss': 0.4148, 'grad_norm': 3.9854166507720947, 'learning_rate': 4.600000000000001e-05, 'epoch': 1.7}


  0%|          | 0/5 [08:49<?, ?it/s]

{'loss': 0.2325, 'grad_norm': 1.4062304496765137, 'learning_rate': 4.7e-05, 'epoch': 1.74}


  0%|          | 0/5 [08:51<?, ?it/s]

{'loss': 0.3791, 'grad_norm': 6.769145965576172, 'learning_rate': 4.8e-05, 'epoch': 1.78}


  0%|          | 0/5 [08:54<?, ?it/s]

{'loss': 0.1747, 'grad_norm': 2.028505325317383, 'learning_rate': 4.9e-05, 'epoch': 1.81}


  0%|          | 0/5 [08:56<?, ?it/s]

{'loss': 0.2145, 'grad_norm': 4.172044277191162, 'learning_rate': 5e-05, 'epoch': 1.85}


  0%|          | 0/5 [08:58<?, ?it/s]

{'loss': 0.1561, 'grad_norm': 3.302455186843872, 'learning_rate': 4.8387096774193554e-05, 'epoch': 1.89}


  0%|          | 0/5 [09:01<?, ?it/s]

{'loss': 0.2059, 'grad_norm': 0.28539159893989563, 'learning_rate': 4.67741935483871e-05, 'epoch': 1.93}


  0%|          | 0/5 [09:03<?, ?it/s]

{'loss': 0.311, 'grad_norm': 5.033400535583496, 'learning_rate': 4.516129032258064e-05, 'epoch': 1.96}


  0%|          | 0/5 [09:06<?, ?it/s]

{'loss': 0.3221, 'grad_norm': 17.635517120361328, 'learning_rate': 4.3548387096774194e-05, 'epoch': 2.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [09:09<?, ?it/s]

{'eval_loss': 0.3766784071922302, 'eval_runtime': 3.2369, 'eval_samples_per_second': 104.111, 'eval_steps_per_second': 10.504, 'epoch': 2.0}


  0%|          | 0/5 [09:11<?, ?it/s]

{'loss': 0.1703, 'grad_norm': 3.554600477218628, 'learning_rate': 4.1935483870967746e-05, 'epoch': 2.04}


  0%|          | 0/5 [09:14<?, ?it/s]

{'loss': 0.3533, 'grad_norm': 4.941967964172363, 'learning_rate': 4.032258064516129e-05, 'epoch': 2.07}


  0%|          | 0/5 [09:16<?, ?it/s]

{'loss': 0.3224, 'grad_norm': 5.975667953491211, 'learning_rate': 3.870967741935484e-05, 'epoch': 2.11}


  0%|          | 0/5 [09:19<?, ?it/s]

{'loss': 0.1844, 'grad_norm': 7.9459404945373535, 'learning_rate': 3.7096774193548386e-05, 'epoch': 2.15}


  0%|          | 0/5 [09:21<?, ?it/s]

{'loss': 0.1193, 'grad_norm': 2.083036422729492, 'learning_rate': 3.548387096774194e-05, 'epoch': 2.19}


  0%|          | 0/5 [09:23<?, ?it/s]

{'loss': 0.3134, 'grad_norm': 0.5748373866081238, 'learning_rate': 3.387096774193548e-05, 'epoch': 2.22}


  0%|          | 0/5 [09:26<?, ?it/s]

{'loss': 0.2808, 'grad_norm': 4.694491863250732, 'learning_rate': 3.2258064516129034e-05, 'epoch': 2.26}


  0%|          | 0/5 [09:28<?, ?it/s]

{'loss': 0.2552, 'grad_norm': 3.4507369995117188, 'learning_rate': 3.0645161290322585e-05, 'epoch': 2.3}


  0%|          | 0/5 [09:31<?, ?it/s]

{'loss': 0.2124, 'grad_norm': 0.20137080550193787, 'learning_rate': 2.9032258064516133e-05, 'epoch': 2.33}


  0%|          | 0/5 [09:33<?, ?it/s]

{'loss': 0.2629, 'grad_norm': 5.137575149536133, 'learning_rate': 2.7419354838709678e-05, 'epoch': 2.37}


  0%|          | 0/5 [09:35<?, ?it/s]

{'loss': 0.1186, 'grad_norm': 3.1805241107940674, 'learning_rate': 2.5806451612903226e-05, 'epoch': 2.41}


  0%|          | 0/5 [09:38<?, ?it/s]

{'loss': 0.1528, 'grad_norm': 0.9968839287757874, 'learning_rate': 2.4193548387096777e-05, 'epoch': 2.44}


  0%|          | 0/5 [09:40<?, ?it/s]

{'loss': 0.1826, 'grad_norm': 8.934455871582031, 'learning_rate': 2.258064516129032e-05, 'epoch': 2.48}


  0%|          | 0/5 [09:43<?, ?it/s]

{'loss': 0.3121, 'grad_norm': 9.832908630371094, 'learning_rate': 2.0967741935483873e-05, 'epoch': 2.52}


  0%|          | 0/5 [09:45<?, ?it/s]

{'loss': 0.3755, 'grad_norm': 5.682736396789551, 'learning_rate': 1.935483870967742e-05, 'epoch': 2.56}


  0%|          | 0/5 [09:48<?, ?it/s]

{'loss': 0.2366, 'grad_norm': 4.122993469238281, 'learning_rate': 1.774193548387097e-05, 'epoch': 2.59}


  0%|          | 0/5 [09:50<?, ?it/s]

{'loss': 0.3147, 'grad_norm': 0.6576871871948242, 'learning_rate': 1.6129032258064517e-05, 'epoch': 2.63}


  0%|          | 0/5 [09:52<?, ?it/s]

{'loss': 0.2411, 'grad_norm': 3.9294593334198, 'learning_rate': 1.4516129032258066e-05, 'epoch': 2.67}


  0%|          | 0/5 [09:55<?, ?it/s]

{'loss': 0.2498, 'grad_norm': 2.9039454460144043, 'learning_rate': 1.2903225806451613e-05, 'epoch': 2.7}


  0%|          | 0/5 [09:57<?, ?it/s]

{'loss': 0.1741, 'grad_norm': 2.027338981628418, 'learning_rate': 1.129032258064516e-05, 'epoch': 2.74}


  0%|          | 0/5 [10:00<?, ?it/s]

{'loss': 0.1817, 'grad_norm': 1.225172758102417, 'learning_rate': 9.67741935483871e-06, 'epoch': 2.78}


  0%|          | 0/5 [10:02<?, ?it/s]

{'loss': 0.1251, 'grad_norm': 3.0681042671203613, 'learning_rate': 8.064516129032258e-06, 'epoch': 2.81}


  0%|          | 0/5 [10:04<?, ?it/s]

{'loss': 0.2358, 'grad_norm': 9.882058143615723, 'learning_rate': 6.451612903225806e-06, 'epoch': 2.85}


  0%|          | 0/5 [10:07<?, ?it/s]

{'loss': 0.1949, 'grad_norm': 0.9263876676559448, 'learning_rate': 4.838709677419355e-06, 'epoch': 2.89}


  0%|          | 0/5 [10:09<?, ?it/s]

{'loss': 0.1998, 'grad_norm': 2.8257527351379395, 'learning_rate': 3.225806451612903e-06, 'epoch': 2.93}


  0%|          | 0/5 [10:11<?, ?it/s]

{'loss': 0.2058, 'grad_norm': 5.1844072341918945, 'learning_rate': 1.6129032258064516e-06, 'epoch': 2.96}


  0%|          | 0/5 [10:14<?, ?it/s]

{'loss': 0.1821, 'grad_norm': 0.5698915123939514, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [10:17<?, ?it/s]

{'eval_loss': 0.3756548762321472, 'eval_runtime': 3.2612, 'eval_samples_per_second': 103.336, 'eval_steps_per_second': 10.426, 'epoch': 3.0}
{'train_runtime': 205.1647, 'train_samples_per_second': 39.378, 'train_steps_per_second': 3.948, 'train_loss': 0.6443315507453165, 'epoch': 3.0}


Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

| Global Round : 0 | Local # 8 	Malicious: False


  0%|          | 0/810 [00:00<?, ?it/s]

  0%|          | 0/5 [10:22<?, ?it/s]

{'loss': 0.4425, 'grad_norm': 2.8867251873016357, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}


  0%|          | 0/5 [10:24<?, ?it/s]

{'loss': 0.3958, 'grad_norm': 2.277596950531006, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.07}


  0%|          | 0/5 [10:26<?, ?it/s]

{'loss': 0.4755, 'grad_norm': 8.03559684753418, 'learning_rate': 3e-06, 'epoch': 0.11}


  0%|          | 0/5 [10:29<?, ?it/s]

{'loss': 0.57, 'grad_norm': 6.122886657714844, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.15}


  0%|          | 0/5 [10:31<?, ?it/s]

{'loss': 0.393, 'grad_norm': 8.883035659790039, 'learning_rate': 5e-06, 'epoch': 0.19}


  0%|          | 0/5 [10:34<?, ?it/s]

{'loss': 0.688, 'grad_norm': 5.397218704223633, 'learning_rate': 6e-06, 'epoch': 0.22}


  0%|          | 0/5 [10:36<?, ?it/s]

{'loss': 0.3486, 'grad_norm': 1.890533208847046, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.26}


  0%|          | 0/5 [10:38<?, ?it/s]

{'loss': 0.513, 'grad_norm': 4.543798923492432, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.3}


  0%|          | 0/5 [10:41<?, ?it/s]

{'loss': 0.7145, 'grad_norm': 9.71297550201416, 'learning_rate': 9e-06, 'epoch': 0.33}


  0%|          | 0/5 [10:43<?, ?it/s]

{'loss': 0.6358, 'grad_norm': 8.459402084350586, 'learning_rate': 1e-05, 'epoch': 0.37}


  0%|          | 0/5 [10:46<?, ?it/s]

{'loss': 0.5914, 'grad_norm': 9.584884643554688, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.41}


  0%|          | 0/5 [10:48<?, ?it/s]

{'loss': 0.3451, 'grad_norm': 4.200867652893066, 'learning_rate': 1.2e-05, 'epoch': 0.44}


  0%|          | 0/5 [10:50<?, ?it/s]

{'loss': 0.4989, 'grad_norm': 1.338786005973816, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.48}


  0%|          | 0/5 [10:53<?, ?it/s]

{'loss': 0.6706, 'grad_norm': 5.331663131713867, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.52}


  0%|          | 0/5 [10:55<?, ?it/s]

{'loss': 0.4755, 'grad_norm': 4.435472011566162, 'learning_rate': 1.5e-05, 'epoch': 0.56}


  0%|          | 0/5 [10:58<?, ?it/s]

{'loss': 0.4525, 'grad_norm': 11.521651268005371, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.59}


  0%|          | 0/5 [11:00<?, ?it/s]

{'loss': 0.4857, 'grad_norm': 8.951929092407227, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.63}


  0%|          | 0/5 [11:02<?, ?it/s]

{'loss': 0.2797, 'grad_norm': 4.183540344238281, 'learning_rate': 1.8e-05, 'epoch': 0.67}


  0%|          | 0/5 [11:05<?, ?it/s]

{'loss': 0.5062, 'grad_norm': 1.4261466264724731, 'learning_rate': 1.9e-05, 'epoch': 0.7}


  0%|          | 0/5 [11:07<?, ?it/s]

{'loss': 0.3991, 'grad_norm': 8.005682945251465, 'learning_rate': 2e-05, 'epoch': 0.74}


  0%|          | 0/5 [11:10<?, ?it/s]

{'loss': 0.3243, 'grad_norm': 2.9495162963867188, 'learning_rate': 2.1e-05, 'epoch': 0.78}


  0%|          | 0/5 [11:12<?, ?it/s]

{'loss': 0.4679, 'grad_norm': 8.232454299926758, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.81}


  0%|          | 0/5 [11:14<?, ?it/s]

{'loss': 0.3752, 'grad_norm': 1.334458589553833, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.85}


  0%|          | 0/5 [11:17<?, ?it/s]

{'loss': 0.4828, 'grad_norm': 6.607821464538574, 'learning_rate': 2.4e-05, 'epoch': 0.89}


  0%|          | 0/5 [11:19<?, ?it/s]

{'loss': 0.4589, 'grad_norm': 1.8923646211624146, 'learning_rate': 2.5e-05, 'epoch': 0.93}


  0%|          | 0/5 [11:22<?, ?it/s]

{'loss': 0.2975, 'grad_norm': 0.8808234930038452, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.96}


  0%|          | 0/5 [11:24<?, ?it/s]

{'loss': 0.3468, 'grad_norm': 10.01193904876709, 'learning_rate': 2.7000000000000002e-05, 'epoch': 1.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [11:27<?, ?it/s]

{'eval_loss': 0.44964301586151123, 'eval_runtime': 3.2455, 'eval_samples_per_second': 103.835, 'eval_steps_per_second': 10.476, 'epoch': 1.0}


  0%|          | 0/5 [11:30<?, ?it/s]

{'loss': 0.2961, 'grad_norm': 5.613407611846924, 'learning_rate': 2.8000000000000003e-05, 'epoch': 1.04}


  0%|          | 0/5 [11:32<?, ?it/s]

{'loss': 0.3701, 'grad_norm': 9.536311149597168, 'learning_rate': 2.9e-05, 'epoch': 1.07}


  0%|          | 0/5 [11:34<?, ?it/s]

{'loss': 0.3334, 'grad_norm': 3.781561851501465, 'learning_rate': 3e-05, 'epoch': 1.11}


  0%|          | 0/5 [11:37<?, ?it/s]

{'loss': 0.4745, 'grad_norm': 0.3858354687690735, 'learning_rate': 3.1e-05, 'epoch': 1.15}


  0%|          | 0/5 [11:39<?, ?it/s]

{'loss': 0.3974, 'grad_norm': 1.8893115520477295, 'learning_rate': 3.2000000000000005e-05, 'epoch': 1.19}


  0%|          | 0/5 [11:41<?, ?it/s]

{'loss': 0.3138, 'grad_norm': 3.165783643722534, 'learning_rate': 3.3e-05, 'epoch': 1.22}


  0%|          | 0/5 [11:44<?, ?it/s]

{'loss': 0.2991, 'grad_norm': 5.533247470855713, 'learning_rate': 3.4000000000000007e-05, 'epoch': 1.26}


  0%|          | 0/5 [11:46<?, ?it/s]

{'loss': 0.3251, 'grad_norm': 2.1862106323242188, 'learning_rate': 3.5e-05, 'epoch': 1.3}


  0%|          | 0/5 [11:49<?, ?it/s]

{'loss': 0.3354, 'grad_norm': 2.2594621181488037, 'learning_rate': 3.6e-05, 'epoch': 1.33}


  0%|          | 0/5 [11:51<?, ?it/s]

{'loss': 0.2675, 'grad_norm': 2.325186014175415, 'learning_rate': 3.7e-05, 'epoch': 1.37}


  0%|          | 0/5 [11:53<?, ?it/s]

{'loss': 0.3226, 'grad_norm': 1.3320019245147705, 'learning_rate': 3.8e-05, 'epoch': 1.41}


  0%|          | 0/5 [11:56<?, ?it/s]

{'loss': 0.2906, 'grad_norm': 1.5709127187728882, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.44}


  0%|          | 0/5 [11:58<?, ?it/s]

{'loss': 0.2785, 'grad_norm': 5.638943672180176, 'learning_rate': 4e-05, 'epoch': 1.48}


  0%|          | 0/5 [12:00<?, ?it/s]

{'loss': 0.3191, 'grad_norm': 6.345952033996582, 'learning_rate': 4.1e-05, 'epoch': 1.52}


  0%|          | 0/5 [12:03<?, ?it/s]

{'loss': 0.3713, 'grad_norm': 1.6214442253112793, 'learning_rate': 4.2e-05, 'epoch': 1.56}


  0%|          | 0/5 [12:05<?, ?it/s]

{'loss': 0.3242, 'grad_norm': 2.530877113342285, 'learning_rate': 4.3e-05, 'epoch': 1.59}


  0%|          | 0/5 [12:07<?, ?it/s]

{'loss': 0.2601, 'grad_norm': 1.776287317276001, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.63}


  0%|          | 0/5 [12:10<?, ?it/s]

{'loss': 0.3868, 'grad_norm': 4.011152744293213, 'learning_rate': 4.5e-05, 'epoch': 1.67}


  0%|          | 0/5 [12:12<?, ?it/s]

{'loss': 0.2668, 'grad_norm': 1.6576735973358154, 'learning_rate': 4.600000000000001e-05, 'epoch': 1.7}


  0%|          | 0/5 [12:15<?, ?it/s]

{'loss': 0.3126, 'grad_norm': 2.485103130340576, 'learning_rate': 4.7e-05, 'epoch': 1.74}


  0%|          | 0/5 [12:17<?, ?it/s]

{'loss': 0.3129, 'grad_norm': 7.127841949462891, 'learning_rate': 4.8e-05, 'epoch': 1.78}


  0%|          | 0/5 [12:20<?, ?it/s]

{'loss': 0.4451, 'grad_norm': 8.131647109985352, 'learning_rate': 4.9e-05, 'epoch': 1.81}


  0%|          | 0/5 [12:22<?, ?it/s]

{'loss': 0.3175, 'grad_norm': 2.681819438934326, 'learning_rate': 5e-05, 'epoch': 1.85}


  0%|          | 0/5 [12:24<?, ?it/s]

{'loss': 0.2623, 'grad_norm': 3.18853497505188, 'learning_rate': 4.8387096774193554e-05, 'epoch': 1.89}


  0%|          | 0/5 [12:27<?, ?it/s]

{'loss': 0.4111, 'grad_norm': 3.4240565299987793, 'learning_rate': 4.67741935483871e-05, 'epoch': 1.93}


  0%|          | 0/5 [12:29<?, ?it/s]

{'loss': 0.4763, 'grad_norm': 7.308559417724609, 'learning_rate': 4.516129032258064e-05, 'epoch': 1.96}


  0%|          | 0/5 [12:31<?, ?it/s]

{'loss': 0.3784, 'grad_norm': 4.708737373352051, 'learning_rate': 4.3548387096774194e-05, 'epoch': 2.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [12:35<?, ?it/s]

{'eval_loss': 0.37268123030662537, 'eval_runtime': 3.2639, 'eval_samples_per_second': 103.25, 'eval_steps_per_second': 10.417, 'epoch': 2.0}


  0%|          | 0/5 [12:37<?, ?it/s]

{'loss': 0.2281, 'grad_norm': 4.95565128326416, 'learning_rate': 4.1935483870967746e-05, 'epoch': 2.04}


  0%|          | 0/5 [12:40<?, ?it/s]

{'loss': 0.2879, 'grad_norm': 1.8123525381088257, 'learning_rate': 4.032258064516129e-05, 'epoch': 2.07}


  0%|          | 0/5 [12:42<?, ?it/s]

{'loss': 0.3268, 'grad_norm': 2.455737352371216, 'learning_rate': 3.870967741935484e-05, 'epoch': 2.11}


  0%|          | 0/5 [12:45<?, ?it/s]

{'loss': 0.3456, 'grad_norm': 0.9207210540771484, 'learning_rate': 3.7096774193548386e-05, 'epoch': 2.15}


  0%|          | 0/5 [12:47<?, ?it/s]

{'loss': 0.3666, 'grad_norm': 5.24532413482666, 'learning_rate': 3.548387096774194e-05, 'epoch': 2.19}


  0%|          | 0/5 [12:49<?, ?it/s]

{'loss': 0.2946, 'grad_norm': 2.8550312519073486, 'learning_rate': 3.387096774193548e-05, 'epoch': 2.22}


  0%|          | 0/5 [12:52<?, ?it/s]

{'loss': 0.2812, 'grad_norm': 0.5724469423294067, 'learning_rate': 3.2258064516129034e-05, 'epoch': 2.26}


  0%|          | 0/5 [12:54<?, ?it/s]

{'loss': 0.3254, 'grad_norm': 2.57489275932312, 'learning_rate': 3.0645161290322585e-05, 'epoch': 2.3}


  0%|          | 0/5 [12:56<?, ?it/s]

{'loss': 0.2704, 'grad_norm': 7.366105556488037, 'learning_rate': 2.9032258064516133e-05, 'epoch': 2.33}


  0%|          | 0/5 [12:59<?, ?it/s]

{'loss': 0.2696, 'grad_norm': 2.781035900115967, 'learning_rate': 2.7419354838709678e-05, 'epoch': 2.37}


  0%|          | 0/5 [13:01<?, ?it/s]

{'loss': 0.3881, 'grad_norm': 3.5866456031799316, 'learning_rate': 2.5806451612903226e-05, 'epoch': 2.41}


  0%|          | 0/5 [13:03<?, ?it/s]

{'loss': 0.3812, 'grad_norm': 4.096035480499268, 'learning_rate': 2.4193548387096777e-05, 'epoch': 2.44}


  0%|          | 0/5 [13:06<?, ?it/s]

{'loss': 0.3082, 'grad_norm': 2.347102403640747, 'learning_rate': 2.258064516129032e-05, 'epoch': 2.48}


  0%|          | 0/5 [13:08<?, ?it/s]

{'loss': 0.2172, 'grad_norm': 6.091243743896484, 'learning_rate': 2.0967741935483873e-05, 'epoch': 2.52}


  0%|          | 0/5 [13:10<?, ?it/s]

{'loss': 0.444, 'grad_norm': 2.583977699279785, 'learning_rate': 1.935483870967742e-05, 'epoch': 2.56}


  0%|          | 0/5 [13:13<?, ?it/s]

{'loss': 0.1882, 'grad_norm': 3.7491588592529297, 'learning_rate': 1.774193548387097e-05, 'epoch': 2.59}


  0%|          | 0/5 [13:15<?, ?it/s]

{'loss': 0.4184, 'grad_norm': 3.3540496826171875, 'learning_rate': 1.6129032258064517e-05, 'epoch': 2.63}


  0%|          | 0/5 [13:18<?, ?it/s]

{'loss': 0.3725, 'grad_norm': 4.351670265197754, 'learning_rate': 1.4516129032258066e-05, 'epoch': 2.67}


  0%|          | 0/5 [13:20<?, ?it/s]

{'loss': 0.2053, 'grad_norm': 3.412768840789795, 'learning_rate': 1.2903225806451613e-05, 'epoch': 2.7}


  0%|          | 0/5 [13:22<?, ?it/s]

{'loss': 0.2766, 'grad_norm': 3.1160435676574707, 'learning_rate': 1.129032258064516e-05, 'epoch': 2.74}


  0%|          | 0/5 [13:25<?, ?it/s]

{'loss': 0.329, 'grad_norm': 7.027251243591309, 'learning_rate': 9.67741935483871e-06, 'epoch': 2.78}


  0%|          | 0/5 [13:27<?, ?it/s]

{'loss': 0.3204, 'grad_norm': 2.851489305496216, 'learning_rate': 8.064516129032258e-06, 'epoch': 2.81}


  0%|          | 0/5 [13:29<?, ?it/s]

{'loss': 0.2202, 'grad_norm': 2.890045642852783, 'learning_rate': 6.451612903225806e-06, 'epoch': 2.85}


  0%|          | 0/5 [13:32<?, ?it/s]

{'loss': 0.2796, 'grad_norm': 2.5084762573242188, 'learning_rate': 4.838709677419355e-06, 'epoch': 2.89}


  0%|          | 0/5 [13:34<?, ?it/s]

{'loss': 0.3503, 'grad_norm': 4.599698066711426, 'learning_rate': 3.225806451612903e-06, 'epoch': 2.93}


  0%|          | 0/5 [13:36<?, ?it/s]

{'loss': 0.2945, 'grad_norm': 4.615706920623779, 'learning_rate': 1.6129032258064516e-06, 'epoch': 2.96}


  0%|          | 0/5 [13:39<?, ?it/s]

{'loss': 0.2419, 'grad_norm': 0.21458451449871063, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [13:42<?, ?it/s]

{'eval_loss': 0.36220431327819824, 'eval_runtime': 3.2277, 'eval_samples_per_second': 104.409, 'eval_steps_per_second': 10.534, 'epoch': 3.0}
{'train_runtime': 202.9405, 'train_samples_per_second': 39.81, 'train_steps_per_second': 3.991, 'train_loss': 0.3705544117056293, 'epoch': 3.0}


Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/2693 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

| Global Round : 0 | Local # 10 	Malicious: True


  0%|          | 0/810 [00:00<?, ?it/s]

  0%|          | 0/5 [13:46<?, ?it/s]

{'loss': 2.1495, 'grad_norm': 16.21196937561035, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.04}


  0%|          | 0/5 [13:49<?, ?it/s]

{'loss': 1.9945, 'grad_norm': 3.674197196960449, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.07}


  0%|          | 0/5 [13:51<?, ?it/s]

{'loss': 1.5713, 'grad_norm': 8.965267181396484, 'learning_rate': 3e-06, 'epoch': 0.11}


  0%|          | 0/5 [13:54<?, ?it/s]

{'loss': 2.3344, 'grad_norm': 6.42370080947876, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.15}


  0%|          | 0/5 [13:56<?, ?it/s]

{'loss': 1.9411, 'grad_norm': 13.045049667358398, 'learning_rate': 5e-06, 'epoch': 0.19}


  0%|          | 0/5 [13:58<?, ?it/s]

{'loss': 2.1229, 'grad_norm': 11.85414981842041, 'learning_rate': 6e-06, 'epoch': 0.22}


  0%|          | 0/5 [14:01<?, ?it/s]

{'loss': 2.258, 'grad_norm': 5.024021148681641, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.26}


  0%|          | 0/5 [14:03<?, ?it/s]

{'loss': 2.0292, 'grad_norm': 12.46856689453125, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.3}


  0%|          | 0/5 [14:05<?, ?it/s]

{'loss': 1.7643, 'grad_norm': 3.6721067428588867, 'learning_rate': 9e-06, 'epoch': 0.33}


  0%|          | 0/5 [14:08<?, ?it/s]

{'loss': 1.6414, 'grad_norm': 13.296286582946777, 'learning_rate': 1e-05, 'epoch': 0.37}


  0%|          | 0/5 [14:10<?, ?it/s]

{'loss': 1.9298, 'grad_norm': 9.884248733520508, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.41}


  0%|          | 0/5 [14:12<?, ?it/s]

{'loss': 1.6222, 'grad_norm': 2.960932731628418, 'learning_rate': 1.2e-05, 'epoch': 0.44}


  0%|          | 0/5 [14:15<?, ?it/s]

{'loss': 1.2212, 'grad_norm': 11.83984088897705, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.48}


  0%|          | 0/5 [14:17<?, ?it/s]

{'loss': 1.5654, 'grad_norm': 16.817279815673828, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.52}


  0%|          | 0/5 [14:20<?, ?it/s]

{'loss': 1.114, 'grad_norm': 3.213843584060669, 'learning_rate': 1.5e-05, 'epoch': 0.56}


  0%|          | 0/5 [14:22<?, ?it/s]

{'loss': 1.3563, 'grad_norm': 6.895436763763428, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.59}


  0%|          | 0/5 [14:24<?, ?it/s]

{'loss': 0.8317, 'grad_norm': 8.65213680267334, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.63}


  0%|          | 0/5 [14:27<?, ?it/s]

{'loss': 1.241, 'grad_norm': 10.572895050048828, 'learning_rate': 1.8e-05, 'epoch': 0.67}


  0%|          | 0/5 [14:29<?, ?it/s]

{'loss': 0.9851, 'grad_norm': 4.44859504699707, 'learning_rate': 1.9e-05, 'epoch': 0.7}


  0%|          | 0/5 [14:31<?, ?it/s]

{'loss': 0.6875, 'grad_norm': 5.097440719604492, 'learning_rate': 2e-05, 'epoch': 0.74}


  0%|          | 0/5 [14:34<?, ?it/s]

{'loss': 0.855, 'grad_norm': 7.801229000091553, 'learning_rate': 2.1e-05, 'epoch': 0.78}


  0%|          | 0/5 [14:36<?, ?it/s]

{'loss': 0.6244, 'grad_norm': 6.903042316436768, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.81}


  0%|          | 0/5 [14:38<?, ?it/s]

{'loss': 0.5889, 'grad_norm': 11.092957496643066, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.85}


  0%|          | 0/5 [14:41<?, ?it/s]

{'loss': 0.6622, 'grad_norm': 6.0078206062316895, 'learning_rate': 2.4e-05, 'epoch': 0.89}


  0%|          | 0/5 [14:43<?, ?it/s]

{'loss': 0.5298, 'grad_norm': 4.898341655731201, 'learning_rate': 2.5e-05, 'epoch': 0.93}


  0%|          | 0/5 [14:45<?, ?it/s]

{'loss': 0.2646, 'grad_norm': 1.6204240322113037, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.96}


  0%|          | 0/5 [14:48<?, ?it/s]

{'loss': 0.5445, 'grad_norm': 2.001826047897339, 'learning_rate': 2.7000000000000002e-05, 'epoch': 1.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [14:51<?, ?it/s]

{'eval_loss': 0.5906158089637756, 'eval_runtime': 3.2198, 'eval_samples_per_second': 104.665, 'eval_steps_per_second': 10.56, 'epoch': 1.0}


  0%|          | 0/5 [14:53<?, ?it/s]

{'loss': 0.3874, 'grad_norm': 3.7816877365112305, 'learning_rate': 2.8000000000000003e-05, 'epoch': 1.04}


  0%|          | 0/5 [14:56<?, ?it/s]

{'loss': 0.6197, 'grad_norm': 2.587766408920288, 'learning_rate': 2.9e-05, 'epoch': 1.07}


  0%|          | 0/5 [14:58<?, ?it/s]

{'loss': 0.5861, 'grad_norm': 2.7217609882354736, 'learning_rate': 3e-05, 'epoch': 1.11}


  0%|          | 0/5 [15:00<?, ?it/s]

{'loss': 0.3865, 'grad_norm': 5.479470729827881, 'learning_rate': 3.1e-05, 'epoch': 1.15}


  0%|          | 0/5 [15:03<?, ?it/s]

{'loss': 0.4535, 'grad_norm': 3.613007068634033, 'learning_rate': 3.2000000000000005e-05, 'epoch': 1.19}


  0%|          | 0/5 [15:05<?, ?it/s]

{'loss': 0.4844, 'grad_norm': 6.080594539642334, 'learning_rate': 3.3e-05, 'epoch': 1.22}


  0%|          | 0/5 [15:07<?, ?it/s]

{'loss': 0.3909, 'grad_norm': 5.829108238220215, 'learning_rate': 3.4000000000000007e-05, 'epoch': 1.26}


  0%|          | 0/5 [15:10<?, ?it/s]

{'loss': 0.3934, 'grad_norm': 3.034848690032959, 'learning_rate': 3.5e-05, 'epoch': 1.3}


  0%|          | 0/5 [15:12<?, ?it/s]

{'loss': 0.3299, 'grad_norm': 2.724651575088501, 'learning_rate': 3.6e-05, 'epoch': 1.33}


  0%|          | 0/5 [15:15<?, ?it/s]

{'loss': 0.349, 'grad_norm': 4.7343430519104, 'learning_rate': 3.7e-05, 'epoch': 1.37}


  0%|          | 0/5 [15:17<?, ?it/s]

{'loss': 0.4565, 'grad_norm': 3.266690969467163, 'learning_rate': 3.8e-05, 'epoch': 1.41}


  0%|          | 0/5 [15:19<?, ?it/s]

{'loss': 0.3583, 'grad_norm': 2.7420454025268555, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.44}


  0%|          | 0/5 [15:22<?, ?it/s]

{'loss': 0.291, 'grad_norm': 2.050591230392456, 'learning_rate': 4e-05, 'epoch': 1.48}


  0%|          | 0/5 [15:24<?, ?it/s]

{'loss': 0.3626, 'grad_norm': 2.8588082790374756, 'learning_rate': 4.1e-05, 'epoch': 1.52}


  0%|          | 0/5 [15:26<?, ?it/s]

{'loss': 0.3603, 'grad_norm': 3.2349355220794678, 'learning_rate': 4.2e-05, 'epoch': 1.56}


  0%|          | 0/5 [15:29<?, ?it/s]

{'loss': 0.4342, 'grad_norm': 3.5291695594787598, 'learning_rate': 4.3e-05, 'epoch': 1.59}


  0%|          | 0/5 [15:31<?, ?it/s]

{'loss': 0.3668, 'grad_norm': 4.305304050445557, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.63}


  0%|          | 0/5 [15:33<?, ?it/s]

{'loss': 0.1488, 'grad_norm': 4.172146797180176, 'learning_rate': 4.5e-05, 'epoch': 1.67}


  0%|          | 0/5 [15:36<?, ?it/s]

{'loss': 0.1853, 'grad_norm': 2.4564173221588135, 'learning_rate': 4.600000000000001e-05, 'epoch': 1.7}


  0%|          | 0/5 [15:38<?, ?it/s]

{'loss': 0.1827, 'grad_norm': 2.644777536392212, 'learning_rate': 4.7e-05, 'epoch': 1.74}


  0%|          | 0/5 [15:40<?, ?it/s]

{'loss': 0.1253, 'grad_norm': 1.7789711952209473, 'learning_rate': 4.8e-05, 'epoch': 1.78}


  0%|          | 0/5 [15:43<?, ?it/s]

{'loss': 0.2586, 'grad_norm': 6.456143379211426, 'learning_rate': 4.9e-05, 'epoch': 1.81}


  0%|          | 0/5 [15:45<?, ?it/s]

{'loss': 0.266, 'grad_norm': 1.8620375394821167, 'learning_rate': 5e-05, 'epoch': 1.85}


  0%|          | 0/5 [15:47<?, ?it/s]

{'loss': 0.2014, 'grad_norm': 4.190548896789551, 'learning_rate': 4.8387096774193554e-05, 'epoch': 1.89}


  0%|          | 0/5 [15:50<?, ?it/s]

{'loss': 0.314, 'grad_norm': 6.206932067871094, 'learning_rate': 4.67741935483871e-05, 'epoch': 1.93}


  0%|          | 0/5 [15:52<?, ?it/s]

{'loss': 0.2679, 'grad_norm': 10.512054443359375, 'learning_rate': 4.516129032258064e-05, 'epoch': 1.96}


  0%|          | 0/5 [15:54<?, ?it/s]

{'loss': 0.1878, 'grad_norm': 14.765138626098633, 'learning_rate': 4.3548387096774194e-05, 'epoch': 2.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [15:57<?, ?it/s]

{'eval_loss': 0.29320088028907776, 'eval_runtime': 3.2056, 'eval_samples_per_second': 105.13, 'eval_steps_per_second': 10.607, 'epoch': 2.0}


  0%|          | 0/5 [16:00<?, ?it/s]

{'loss': 0.2536, 'grad_norm': 3.0760374069213867, 'learning_rate': 4.1935483870967746e-05, 'epoch': 2.04}


  0%|          | 0/5 [16:02<?, ?it/s]

{'loss': 0.246, 'grad_norm': 5.486027240753174, 'learning_rate': 4.032258064516129e-05, 'epoch': 2.07}


  0%|          | 0/5 [16:05<?, ?it/s]

{'loss': 0.5656, 'grad_norm': 4.453985214233398, 'learning_rate': 3.870967741935484e-05, 'epoch': 2.11}


  0%|          | 0/5 [16:07<?, ?it/s]

{'loss': 0.3349, 'grad_norm': 6.021759986877441, 'learning_rate': 3.7096774193548386e-05, 'epoch': 2.15}


  0%|          | 0/5 [16:09<?, ?it/s]

{'loss': 0.1771, 'grad_norm': 2.0135326385498047, 'learning_rate': 3.548387096774194e-05, 'epoch': 2.19}


  0%|          | 0/5 [16:12<?, ?it/s]

{'loss': 0.0776, 'grad_norm': 2.412263870239258, 'learning_rate': 3.387096774193548e-05, 'epoch': 2.22}


  0%|          | 0/5 [16:14<?, ?it/s]

{'loss': 0.2231, 'grad_norm': 0.3792002499103546, 'learning_rate': 3.2258064516129034e-05, 'epoch': 2.26}


  0%|          | 0/5 [16:16<?, ?it/s]

{'loss': 0.3727, 'grad_norm': 8.269950866699219, 'learning_rate': 3.0645161290322585e-05, 'epoch': 2.3}


  0%|          | 0/5 [16:19<?, ?it/s]

{'loss': 0.2236, 'grad_norm': 0.6616917252540588, 'learning_rate': 2.9032258064516133e-05, 'epoch': 2.33}


  0%|          | 0/5 [16:21<?, ?it/s]

{'loss': 0.1617, 'grad_norm': 0.5371271967887878, 'learning_rate': 2.7419354838709678e-05, 'epoch': 2.37}


  0%|          | 0/5 [16:23<?, ?it/s]

{'loss': 0.1317, 'grad_norm': 3.1724131107330322, 'learning_rate': 2.5806451612903226e-05, 'epoch': 2.41}


  0%|          | 0/5 [16:26<?, ?it/s]

{'loss': 0.1592, 'grad_norm': 0.9077950119972229, 'learning_rate': 2.4193548387096777e-05, 'epoch': 2.44}


  0%|          | 0/5 [16:28<?, ?it/s]

{'loss': 0.192, 'grad_norm': 2.361091136932373, 'learning_rate': 2.258064516129032e-05, 'epoch': 2.48}


  0%|          | 0/5 [16:30<?, ?it/s]

{'loss': 0.2276, 'grad_norm': 4.148543834686279, 'learning_rate': 2.0967741935483873e-05, 'epoch': 2.52}


  0%|          | 0/5 [16:33<?, ?it/s]

{'loss': 0.2009, 'grad_norm': 0.244474858045578, 'learning_rate': 1.935483870967742e-05, 'epoch': 2.56}


  0%|          | 0/5 [16:35<?, ?it/s]

{'loss': 0.3206, 'grad_norm': 7.21647834777832, 'learning_rate': 1.774193548387097e-05, 'epoch': 2.59}


  0%|          | 0/5 [16:37<?, ?it/s]

{'loss': 0.2262, 'grad_norm': 6.978596210479736, 'learning_rate': 1.6129032258064517e-05, 'epoch': 2.63}


  0%|          | 0/5 [16:40<?, ?it/s]

{'loss': 0.2147, 'grad_norm': 3.048757314682007, 'learning_rate': 1.4516129032258066e-05, 'epoch': 2.67}


  0%|          | 0/5 [16:42<?, ?it/s]

{'loss': 0.1641, 'grad_norm': 2.1551568508148193, 'learning_rate': 1.2903225806451613e-05, 'epoch': 2.7}


  0%|          | 0/5 [16:44<?, ?it/s]

{'loss': 0.2706, 'grad_norm': 5.107598304748535, 'learning_rate': 1.129032258064516e-05, 'epoch': 2.74}


  0%|          | 0/5 [16:47<?, ?it/s]

{'loss': 0.1483, 'grad_norm': 0.377622127532959, 'learning_rate': 9.67741935483871e-06, 'epoch': 2.78}


  0%|          | 0/5 [16:49<?, ?it/s]

{'loss': 0.2504, 'grad_norm': 7.171419620513916, 'learning_rate': 8.064516129032258e-06, 'epoch': 2.81}


  0%|          | 0/5 [16:51<?, ?it/s]

{'loss': 0.1041, 'grad_norm': 1.0410081148147583, 'learning_rate': 6.451612903225806e-06, 'epoch': 2.85}


  0%|          | 0/5 [16:54<?, ?it/s]

{'loss': 0.4218, 'grad_norm': 10.839933395385742, 'learning_rate': 4.838709677419355e-06, 'epoch': 2.89}


  0%|          | 0/5 [16:56<?, ?it/s]

{'loss': 0.2529, 'grad_norm': 10.332358360290527, 'learning_rate': 3.225806451612903e-06, 'epoch': 2.93}


  0%|          | 0/5 [16:58<?, ?it/s]

{'loss': 0.1932, 'grad_norm': 1.2197967767715454, 'learning_rate': 1.6129032258064516e-06, 'epoch': 2.96}


  0%|          | 0/5 [17:01<?, ?it/s]

{'loss': 0.419, 'grad_norm': 0.334231436252594, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/5 [17:04<?, ?it/s]

{'eval_loss': 0.31343621015548706, 'eval_runtime': 3.2225, 'eval_samples_per_second': 104.576, 'eval_steps_per_second': 10.551, 'epoch': 3.0}
{'train_runtime': 200.1838, 'train_samples_per_second': 40.358, 'train_steps_per_second': 4.046, 'train_loss': 0.6433492280818798, 'epoch': 3.0}


  0%|          | 0/5 [17:04<?, ?it/s]


TypeError: detect_anomalies_by_distance() got an unexpected keyword argument 'threshold'

In [24]:
test_acc, _ = test_inference(args, global_model, test_dataset)
test_asr, _ = test_inference(args,global_model, attack_test_set)
print("|---- Test ACC: {:.2f}%".format(100 * test_acc))
print("|---- Test ASR: {:.2f}%".format(100 * test_asr))
test_acc_list.append(test_acc)
test_asr_list.append(test_asr)

Map:   0%|          | 0/444 [00:00<?, ? examples/s]

|---- Test ACC: 87.96%
|---- Test ASR: 19.82%


In [61]:
weights = []
for user, data in log[0].items():
    if user == 'global':
        continue
    weights.append(data['weights'])
for user, data in log[0].items():
    if user == 'global':
        continue
    print(f"User {user}: {data['status']}")

User 18: clean
User 1: malicious
User 19: clean
User 8: malicious
User 10: clean


In [62]:
client_B_matrices = extract_lora_matrices(weights, num_layers)[1]
distances = compute_wa_distances(clean_B_matrices, client_B_matrices)  
client_distance = [0.0] * len(distances[next(iter(distances.keys()))])
for layer_key in distances.keys():
    for i, distance in enumerate(distances[layer_key]):
        client_distance[i] += distance
attackers = detect_anomalies_by_distance(distances, method='sum', threshold=0.015)
print(f"Attackers: {attackers}")
print(client_distance)  
clean_weights = [weights[i] for i in range(len(weights)) if i not in attackers]
poison_weights = [weights[i] for i in range(len(weights)) if i in attackers]
# global_weights = average_weights(clean_weights)
A_weights = [divide_lora_params(w)[0] for w in poison_weights]

Attackers: [1, 3]
[0.008584293380355342, 0.019813877884109918, 0.011560925038859292, 0.01955447278452452, 0.011647501449801203]


In [66]:
base_model = BertForSequenceClassification.from_pretrained('save/base_model').to(device)
new_model = get_peft_model(base_model, lora_config)
new_model = load_params(new_model, global_weights)
# new_model = load_params(new_model, A_weights)
test_acc, _ = test_inference(args, new_model, test_dataset)
test_asr, _ = test_inference(args, new_model, attack_test_set)
print("|---- Test ACC: {:.2f}%".format(100 * test_acc))
print("|---- Test ASR: {:.2f}%".format(100 * test_asr))

Map:   0%|          | 0/444 [00:00<?, ? examples/s]

|---- Test ACC: 86.24%
|---- Test ASR: 15.99%


In [44]:
new_global_model = copy.deepcopy(global_model)
weights = []
for user, data in log[2].items():
    if user != 'global':
        weights.append(data['weights'])
print(len(weights))
attackers = [0, 1, 2, 3, 4]
clean_weights = [weights[i] for i in range(len(weights)) if i not in attackers]
poison_weights = [weights[i] for i in range(len(weights)) if i in attackers]
A_params = [divide_lora_params(w)[0] for w in poison_weights]
# global_weights = average_weights(clean_weights)
A_weights = average_weights(A_params)
# new_global_model = load_params(new_global_model, global_weights)
new_global_model = load_params(new_global_model, A_weights)
test_acc, _ = test_inference(args, new_global_model, test_dataset)
test_asr, _ = test_inference(args, new_global_model, attack_test_set)
print("|---- Test ACC: {:.2f}%".format(100 * test_acc))
print("|---- Test ASR: {:.2f}%".format(100 * test_asr))
test_acc_list.append(test_acc)
test_asr_list.append(test_asr)


5


Map:   0%|          | 0/444 [00:00<?, ? examples/s]

|---- Test ACC: 86.12%
|---- Test ASR: 12.39%
