### Standard FL Implementation

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Both Server and Client use Torch-type model

class Server:
    # use a pretained base model for initial weights, such as falcon-7b
    # client_fraction we use 1 as default
    def __init__(self, initial_weights, num_clients, client_fraction=1):
        self.global_weights = initial_weights  # PyTorch state_dict
        self.client_fraction = client_fraction
        self.num_clients = num_clients # assuming each client has a unique index

    def select_clients(self):
        if self.client_fraction == 1:
            return list(range(self.num_clients))
        m = max(int(self.client_fraction * self.num_clients), 1)
        selected_clients = np.random.choice(self.num_clients, m, replace=False)
        return selected_clients

    def aggregate_updates(self, client_updates, client_data_counts):
        total_data_count = np.sum(client_data_counts)
        # Initialize a new state_dict for the aggregated weights
        aggregated_weights = {key: torch.zeros_like(val) for key, val in self.global_weights.items()}

        for client_weights, data_count in zip(client_updates, client_data_counts):
            for key in self.global_weights.keys():
                # Aggregate the weights for each layer
                aggregated_weights[key] += client_weights[key] * (data_count / total_data_count)

        self.global_weights = aggregated_weights

    def send_global_weights(self):
        return self.global_weights

class Client:
    # data is a list of (x, y) pairs
    # initial_weights is a numpy array, could be a pretained language model weights
    def __init__(self, data_loader, model, epochs, learning_rate):
        self.data_loader = data_loader  # DataLoader object containing the client's data
        self.model = model  # PyTorch model
        self.epochs = epochs
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.criterion = nn.MSELoss()

    def train(self):
        self.model.train()  # Set the model to training mode
        for epoch in range(self.epochs):
            for batch in self.data_loader:
                features, labels = batch
                self.optimizer.zero_grad()  # Clear gradients from the previous step
                outputs = self.model(features)
                loss = self.criterion(outputs, labels)
                loss.backward()  # Backpropagate the loss
                self.optimizer.step()  # Update the weights
        return self.model.state_dict()  # Return the updated model weights

    # Update the local model with the global model
    def update_local_model(self, global_weights):
        self.model.load_state_dict(global_weights)

    # for weighted avrage use
    def get_data_count(self):
        return len(self.data_loader)

### Integrate the falcon-7b base model as the pre-trained model into our federated learning setting

In [1]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq bitsandbytes==0.39.0
!pip install -qqq torch==2.0.1
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71
!pip install -qqq datasets==2.12.0
!pip install -qqq loralib==0.1.1
!pip install -qqq einops==0.6.1

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/92.2 MB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m619.9/619.9 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.0/21.0 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.8/11.8 MB[0m [31m99.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.1/317.1 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.4/168.4 MB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━

In [2]:
import json
import os
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)

from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
import copy

os.environ["CUDA_VISIBLE_DEVICES"] = "0"


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so
/usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so...


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [27]:
# # Configuration for QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    # load_in_8bit_fp32_cpu_offload = True,
)

# USE CPU FOR WEIGHTS UPDATING, CANNOT USE QUANTIZATION


In [10]:
PEFT_MODEL_1 = "babel-painter/Client_Node1_Ecommerce"

config = PeftConfig.from_pretrained(PEFT_MODEL_1)
client_node_1 = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    # quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    offload_folder = "offload/"
)

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

client_node_1 = PeftModel.from_pretrained(client_node_1, PEFT_MODEL_1)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
PEFT_MODEL_2 = "babel-painter/Client_Node2_Covid"

config = PeftConfig.from_pretrained(PEFT_MODEL_2)
client_node_2 = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    # quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    offload_folder = "offload/"
)

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

client_node_2 = PeftModel.from_pretrained(client_node_2, PEFT_MODEL_2)

adapter_config.json:   0%|          | 0.00/410 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

In [15]:
# Retrieve the state dictionaries of the trained models
state_dict_node_1 = client_node_1.state_dict()
state_dict_node_2 = client_node_2.state_dict()

In [16]:
# Get the number of data points used for training in each node
data_points_node_1 = 79
data_points_node_2 = 61
total_data_points = data_points_node_1 + data_points_node_2

In [17]:
# Initialize a dictionary to hold the averaged weights
avg_state_dict = {}

In [10]:
assert set(state_dict_node_1.keys()) == set(state_dict_node_2.keys()), "State dictionaries have different keys"

In [18]:
# Compute the weighted average of the model weights
for key in state_dict_node_1:
    avg_state_dict[key] = (state_dict_node_1[key] * data_points_node_1 + state_dict_node_2[key] * data_points_node_2) / total_data_points

In [46]:
# To save RAM

import gc

# Clear large variables
del client_node_2, state_dict_node_2
gc.collect()

# Then instantiate and load the central model as shown above


In [3]:
# Save the calculated avg_state_dict

save_path = 'avg_state_dict.pt'

In [28]:

# Save the avg_state_dict
torch.save(avg_state_dict, save_path)

In [4]:
# Load the saved state dictionary
avg_state_dict = torch.load(save_path)

In [6]:
PEFT_MODEL_1 = "babel-painter/Client_Node1_Ecommerce"

config = PeftConfig.from_pretrained(PEFT_MODEL_1)

In [7]:
# Create a new model instance
central_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    # quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    offload_folder = "offload/"
)





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
# Compare keys in avg_state_dict with those in the central model
central_model_keys = set(central_model.state_dict().keys())
avg_state_dict_keys = set(avg_state_dict.keys())

missing_keys = central_model_keys - avg_state_dict_keys
extra_keys = avg_state_dict_keys - central_model_keys

print("Missing keys:", missing_keys)
print("Extra keys:", extra_keys)


Missing keys: {'transformer.h.2.self_attention.dense.weight', 'transformer.h.12.input_layernorm.weight', 'transformer.h.4.mlp.dense_4h_to_h.weight', 'transformer.h.6.input_layernorm.bias', 'transformer.h.26.self_attention.query_key_value.weight', 'transformer.h.29.input_layernorm.weight', 'transformer.h.3.mlp.dense_h_to_4h.weight', 'transformer.h.31.input_layernorm.weight', 'transformer.h.21.input_layernorm.bias', 'transformer.h.0.input_layernorm.bias', 'transformer.h.12.mlp.dense_4h_to_h.weight', 'transformer.h.21.input_layernorm.weight', 'transformer.h.13.input_layernorm.bias', 'transformer.ln_f.weight', 'transformer.h.11.mlp.dense_h_to_4h.weight', 'transformer.h.3.input_layernorm.weight', 'transformer.h.10.input_layernorm.weight', 'transformer.h.2.self_attention.query_key_value.weight', 'transformer.h.10.mlp.dense_h_to_4h.weight', 'transformer.h.8.self_attention.dense.weight', 'transformer.h.20.self_attention.dense.weight', 'transformer.h.1.mlp.dense_4h_to_h.weight', 'transformer.h.

In [8]:
# Load the state dictionary into the model
central_model.load_state_dict(avg_state_dict)

## Save Trained Model

In [None]:
central_model.save_pretrained("central_model_FL")

In [None]:
notebook_login()

In [None]:
client_node_1.push_to_hub("babel-painter/Client_Node1_Ecommerce", use_auth_token=True)

## Inference

In [None]:
# Configuration for text generation
generation_config = central_model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
generation_config.bos_token_id = 1

# Setting the device to CUDA (GPU)
DEVICE = "cuda:0"

In [None]:
generation_config