In [139]:
# we naturally first need to import torch and torchvision
import argparse
import warnings
from collections import OrderedDict

from flwr.client import NumPyClient, ClientApp
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tqdm import tqdm
NUM_CLIENTS=2
BATCH_SIZE=32
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.features = dataframe.drop(columns=["targetTput"]).values
        self.labels = dataframe["targetTput"].values

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)
def load_csv_data(filepath, target_column):
    df = pd.read_csv(filepath)

    # Identify categorical columns (columns with dtype object)
    categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
    if 'measTimeStampRf' in categorical_columns:
        categorical_columns.remove('measTimeStampRf')

    # One-hot encode categorical columns
    ohe = OneHotEncoder(sparse_output=False, drop='first')
    ohe_features = ohe.fit_transform(df[categorical_columns])
    ohe_feature_names = ohe.get_feature_names_out(categorical_columns)

    # Create a new DataFrame with one-hot encoded features
    ohe_df = pd.DataFrame(ohe_features, columns=ohe_feature_names)

    # Drop original categorical columns and 'measTimeStampRf', then concatenate with the one-hot encoded features
    df.drop(columns=categorical_columns + ['measTimeStampRf'], inplace=True)
    df = pd.concat([df.reset_index(drop=True), ohe_df.reset_index(drop=True)], axis=1)

    # Split dataset into train, validation, and test sets
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

    # Split train set into NUM_CLIENTS partitions
    train_partitions = np.array_split(train_df, NUM_CLIENTS)

    trainloaders = [DataLoader(CustomDataset(partition), batch_size=BATCH_SIZE, shuffle=True) for partition in train_partitions]
    testloader = DataLoader(CustomDataset(test_df), batch_size=BATCH_SIZE, shuffle=False)

    return trainloaders, testloader, train_df.shape[1] - 1  # Subtract 1 to exclude the target column from input dimensions

In [140]:
def show_data_from_loader(loader, num_batches=1):
    for i, (inputs, targets) in enumerate(loader):
        if i >= num_batches:
            break
        print(f"Batch {i + 1}")
        print("Inputs:")
        print(inputs)
        print("Targets:")
        print(targets)

# Example usage:
trainloaders, testloader, input_dim = load_csv_data('src_ue.csv','targetTput')
#Show dimension of trainloaders
print(f"Dimension: {len(trainloaders)}")

# Show data from the first train loader
print("\nShowing data from the first train loader:")
show_data_from_loader(trainloaders[0])
# Inspect the data from the DataLoader
for batch in trainloaders[0]:
    print("batch:",batch,len(batch))
    break  # Remove break to see more examples if needed


Dimension: 2

Showing data from the first train loader:
Batch 1
Inputs:
tensor([[1.0130e+03, 4.5000e+01, 1.6401e-01,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.0130e+03, 6.8000e+01, 3.0000e-01,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.0050e+03, 9.1000e+01, 3.5224e-01,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        ...,
        [1.0030e+03, 5.6000e+01, 1.0000e-01,  ..., 0.0000e+00, 0.0000e+00,
         1.0000e+00],
        [1.0060e+03, 6.2000e+01, 2.5000e-01,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.0050e+03, 9.1000e+01, 3.7300e-01,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00]])
Targets:
tensor([0.3000, 0.3000, 0.7500, 0.7500, 0.2500, 0.2500, 0.7500, 0.1000, 0.1000,
        0.7500, 0.3000, 0.1000, 0.2500, 0.1000, 0.1000, 0.2500, 0.1000, 0.7500,
        0.1000, 0.1000, 0.1000, 0.1000, 0.3000, 0.1000, 0.7500, 0.7500, 0.3000,
        0.1000, 0.1000, 0.1000, 0.2500, 0.7500])
batch: [tensor([[1.0090e+03, 1.360

  return bound(*args, **kwds)


In [141]:
DEVICE = torch.device("cpu")  # Try "cuda" to train on GPU

class Net(nn.Module):
    def __init__(self, input_dim):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

input_size = next(iter(trainloaders[0]))[0].shape[1]

# Initialize the network
net = Net(input_size).to(DEVICE)

num_parameters = sum(value.numel() for value in net.state_dict().values())
print(f"{num_parameters = }")



num_parameters = 24705


In [150]:
def train(net, trainloaders, epochs: int, verbose=False):
    """Train the network on the training set."""
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters())
    net.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for trainloader in trainloaders:  # Iterate over each client's data
            for features, targets in trainloader:
                features, targets = features.to(DEVICE), targets.to(DEVICE)
                optimizer.zero_grad()
                outputs = net(features)
                loss = criterion(outputs, targets.view(-1, 1))
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
        running_loss /= len(trainloaders) * len(trainloader.dataset)
        if verbose:
            print(f"Epoch {epoch+1}: train loss {running_loss}")
    return net

def test(net, testloader):
    """Evaluate the network on the entire test set."""
    criterion = torch.nn.MSELoss()
    total_loss = 0.0
    net.eval()
    with torch.no_grad():
        for features, targets in testloader:
            features, targets = features.to(DEVICE), targets.to(DEVICE)
            outputs = net(features)
            loss = criterion(outputs, targets.view(-1, 1))
            total_loss += loss.item()
    total_loss /= len(testloader.dataset)
    return total_loss

In [159]:

def run_centralised(epochs: int, lr: float, momentum: float = 0.9):
    """A minimal (but complete) training loop"""

    # instantiate the model
    model = Net(input_size).to(DEVICE)

    # define optimiser with hyperparameters supplied
    optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    # get dataset and construct a dataloaders
    trainset, testset, input_dim = load_csv_data('src_ue.csv','targetTput')
    #trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
    testloader = DataLoader(testset, batch_size=32)

    # train for the specified number of epochs
    trained_model = train(model, trainset, epochs)

    # training is completed, then evaluate model on the test set
    loss = test(trained_model, testset)
    print(f"Loss: {loss}")
    

In [160]:
run_centralised(epochs=5, lr=0.01)

  return bound(*args, **kwds)


Loss: 0.00012120244570542127


In [161]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

DEVICE = torch.device("cpu")  # Change to "cuda" for GPU support
NUM_CLIENTS = 10
BATCH_SIZE = 32

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe
        self.feature_columns = [col for col in dataframe.columns if col != 'targetTput' and col != 'measTimeStampRf']
        self.target_column = 'targetTput'

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        features = torch.tensor(row[self.feature_columns].values.astype(np.float32), dtype=torch.float32)
        target = torch.tensor(row[self.target_column].astype(np.float32), dtype=torch.float32)
        return features, target
def load_csv_dataset(filepath):
    df = pd.read_csv(filepath)

    # Identify categorical columns
    categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
    # Remove 'measTimeStampRf' from categorical columns as it's not relevant for one-hot encoding
    if 'measTimeStampRf' in categorical_columns:
        categorical_columns.remove('measTimeStampRf')

    # One-hot encode categorical columns
    ohe = OneHotEncoder(sparse_output=False, drop='first')  # Updated parameter name
    ohe_features = ohe.fit_transform(df[categorical_columns])
    ohe_feature_names = ohe.get_feature_names_out(categorical_columns)

    # Create a DataFrame with one-hot encoded features
    ohe_df = pd.DataFrame(ohe_features, columns=ohe_feature_names)

    # Drop original categorical columns and 'measTimeStampRf'
    df.drop(columns=categorical_columns + ['measTimeStampRf'], inplace=True)
    df = pd.concat([df.reset_index(drop=True), ohe_df.reset_index(drop=True)], axis=1)

    # Split dataset
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

    # Split train set into NUM_CLIENTS partitions
    train_partitions = np.array_split(train_df, NUM_CLIENTS)

    trainloaders = [DataLoader(CustomDataset(partition), batch_size=BATCH_SIZE, shuffle=True) for partition in train_partitions]
    testloader = DataLoader(CustomDataset(test_df), batch_size=BATCH_SIZE, shuffle=False)

    return trainloaders, testloader, len(train_df.columns) - 1  # Subtract 1 for the target column

class Net(nn.Module):
    def __init__(self, input_size: int) -> None:
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

def train(net, trainloaders, epochs: int, verbose=False):
    """Train the network on the training set."""
    criterion = nn.MSELoss()
    optimizer = optim.Adam(net.parameters())
    net.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for trainloader in trainloaders:  # Iterate over each client's data
            for features, targets in trainloader:
                features, targets = features.to(DEVICE), targets.to(DEVICE)
                optimizer.zero_grad()
                outputs = net(features)
                loss = criterion(outputs, targets.view(-1, 1))  # Ensure targets have the correct shape
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
        average_loss = running_loss / (len(trainloaders) * len(trainloader.dataset))
        if verbose:
            print(f"Epoch {epoch+1}: train loss {average_loss:.4f}")

def test(net, testloader):
    """Evaluate the network on the test set."""
    criterion = nn.MSELoss()
    total_loss = 0.0
    net.eval()
    with torch.no_grad():
        for features, targets in testloader:
            features, targets = features.to(DEVICE), targets.to(DEVICE)
            outputs = net(features)
            loss = criterion(outputs, targets.view(-1, 1))  # Ensure targets have the correct shape
            total_loss += loss.item()
    average_loss = total_loss / len(testloader.dataset)
    return average_loss

def run_centralized(epochs: int, lr: float, batch_size: int):
    """Perform centralized training and evaluation."""
    # Load dataset and construct DataLoaders
    trainloaders, testloader, input_size = load_csv_dataset('src_ue.csv')

    # Initialize the network
    net = Net(input_size).to(DEVICE)

    # Train the model
    train(net, trainloaders, epochs=epochs, verbose=True)

    # Test the model
    test_loss = test(net, testloader)
    print(f"Test Loss: {test_loss:.4f}")

# Example usage
run_centralized(epochs=10, lr=0.001, batch_size=BATCH_SIZE)


  return bound(*args, **kwds)


Epoch 1: train loss 0.0115
Epoch 2: train loss 0.0002
Epoch 3: train loss 0.0002
Epoch 4: train loss 0.0001
Epoch 5: train loss 0.0001
Epoch 6: train loss 0.0000
Epoch 7: train loss 0.0001
Epoch 8: train loss 0.0001
Epoch 9: train loss 0.0000
Epoch 10: train loss 0.0000
Test Loss: 0.0000


In [162]:
from collections import OrderedDict
from typing import Dict, Tuple

import torch
from flwr.common import NDArrays, Scalar

trainloaders, testloader, input_size = load_csv_dataset('src_ue.csv')
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, trainloader, valloader,input_size) -> None:
        super().__init__()
        self.input_size= input_size
        self.trainloader = trainloader
        self.valloader = valloader
        self.model = Net(input_size).to(DEVICE)

    def set_parameters(self, parameters):
        """With the model parameters received from the server,
        overwrite the uninitialise model in this class with them."""

        params_dict = zip(self.model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
        # now replace the parameters
        self.model.load_state_dict(state_dict, strict=True)

    def get_parameters(self, config: Dict[str, Scalar]):
        """Extract all model parameters and convert them to a list of
        NumPy arrays. The server doesn't work with PyTorch/TF/etc."""
        return [val.cpu().numpy() for _, val in self.model.state_dict().items()]

    def fit(self, parameters, config):
        """This method train the model using the parameters sent by the
        server on the dataset of this client. At then end, the parameters
        of the locally trained model are communicated back to the server"""

        # copy parameters sent by the server into client's local model
        self.set_parameters(parameters)

        # Define the optimizer -------------------------------------------------------------- Essentially the same as in the centralised example above
        optim = torch.optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)

        # do local training  -------------------------------------------------------------- Essentially the same as in the centralised example above (but now using the client's data instead of the whole dataset)
        train(self.model, self.trainloader, optim, epochs=1)

        # return the model parameters to the server as well as extra info (number of training examples in this case)
        return self.get_parameters({}), len(self.trainloader), {}

    def evaluate(self, parameters: NDArrays, config: Dict[str, Scalar]):
        """Evaluate the model sent by the server on this client's
        local validation set. Then return performance metrics."""

        self.set_parameters(parameters)
        loss = test(
            self.model, self.valloader
        )  # <-------------------------- calls the `test` function, just what we did in the centralised setting (but this time using the client's local validation set)
        # send statistics back to the server
        return float(loss), len(self.valloader), {"accuracy": accuracy}
        

In [163]:
def get_evaluate_fn(testloader):
    """This is a function that returns a function. The returned
    function (i.e. `evaluate_fn`) will be executed by the strategy
    at the end of each round to evaluate the stat of the global
    model."""

    def evaluate_fn(server_round: int, parameters, config):
        """This function is executed by the strategy it will instantiate
        a model and replace its parameters with those from the global model.
        The, the model will be evaluate on the test set (recall this is the
        whole MNIST test set)."""

        model = Net(input_size).to(DEVICE)

        # set parameters to the model
        params_dict = zip(model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
        model.load_state_dict(state_dict, strict=True)

        # call test
        loss, accuracy = test(
            model, testloader
        )  # <-------------------------- calls the `test` function, just what we did in the centralised setting
        return loss, {"accuracy": accuracy}

    return evaluate_fn


# now we can define the strategy
strategy = fl.server.strategy.FedAvg(
    fraction_fit=0.1,  # let's sample 10% of the client each round to do local training
    fraction_evaluate=0.1,  # after each round, let's sample 20% of the clients to asses how well the global model is doing
    min_available_clients=100,  # total number of clients available in the experiment
    evaluate_fn=get_evaluate_fn(testloader),
)  # a callback to a function that the strategy can execute to evaluate the state of the global model on a centralised dataset

In [165]:
def generate_client_fn(trainloaders, valloaders,input_size):
    def client_fn(cid: str):
        """Returns a FlowerClient containing the cid-th data partition"""

        return FlowerClient(
            trainloader=trainloaders[int(cid)], valloader=valloaders[int(cid)], input_size
        ).to_client()

    return client_fn


client_fn_callback = generate_client_fn(trainloaders, valloaders,input_size)

SyntaxError: positional argument follows keyword argument (20893064.py, line 7)