In [1]:
import numpy as np
import pandas as pd
import copy

import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler

from data import fetch_dataset
from util import move_sliding_window, num_params
from model import LSTMModel
from algorithm import fedcsa

pd.set_option('display.max_columns', None)
pd.set_option('display.max_columns', None)
np.set_printoptions(suppress=True, floatmode='fixed')

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('running on gpu')
else:
    device = torch.device("cpu")

running on gpu


# Parameters

In [3]:
window_size = 90 # Define window_size period and split inputs/labels\
batch_size = 1024
label_col_index = 0

# seq_len = 90  # (timestamps)
hidden_dim = 50
n_layers = 2
lr = 0.0001
output_dim = 1

#fed train params
num_local_epochs = 1
max_rounds = 100 #nb of total rounds for training


# Data Preperation

In [4]:
dataframes = fetch_dataset("./heterog")

Floor0
Floor1
Floor10
Floor11
Floor12
Floor13
Floor14
Floor15
Floor16
Floor17
Floor18
Floor19
Floor2
Floor20
Floor21
Floor22
Floor23
Floor24
Floor25
Floor26
Floor27
Floor28
Floor29
Floor3
Floor30
Floor31
Floor32
Floor33
Floor34
Floor35
Floor36
Floor37
Floor38
Floor39
Floor4
Floor40
Floor41
Floor42
Floor43
Floor44
Floor45
Floor46
Floor47
Floor48
Floor49
Floor5
Floor50
Floor51
Floor52
Floor53
Floor54
Floor55
Floor56
Floor57
Floor58
Floor59
Floor6
Floor60
Floor61
Floor62
Floor63
Floor64
Floor65
Floor66
Floor67
Floor68
Floor69
Floor7
Floor70
Floor71
Floor72
Floor73
Floor74
Floor75
Floor76
Floor77
Floor78
Floor79
Floor8
Floor80
Floor81
Floor82
Floor83
Floor84
Floor85
Floor86
Floor87
Floor88
Floor89
Floor9


In [5]:
# n_clients = len(dataframes) #total number of clients to partition data into

In [6]:
for _, df in dataframes.items():
    print(df.columns)
    print(df.shape)
    input_dim=df.shape[1]

Index(['total_demand', 'AC', 'Light', 'Plug', 'Year', 'Month', 'Day', 'Hour',
       'Minute'],
      dtype='object')
(68183, 9)
Index(['total_demand', 'AC', 'Light', 'Plug', 'Year', 'Month', 'Day', 'Hour',
       'Minute'],
      dtype='object')
(58618, 9)
Index(['total_demand', 'AC', 'Light', 'Plug', 'Year', 'Month', 'Day', 'Hour',
       'Minute'],
      dtype='object')
(46840, 9)
Index(['total_demand', 'AC', 'Light', 'Plug', 'Year', 'Month', 'Day', 'Hour',
       'Minute'],
      dtype='object')
(65390, 9)
Index(['total_demand', 'AC', 'Light', 'Plug', 'Year', 'Month', 'Day', 'Hour',
       'Minute'],
      dtype='object')
(57176, 9)
Index(['total_demand', 'AC', 'Light', 'Plug', 'Year', 'Month', 'Day', 'Hour',
       'Minute'],
      dtype='object')
(51707, 9)
Index(['total_demand', 'AC', 'Light', 'Plug', 'Year', 'Month', 'Day', 'Hour',
       'Minute'],
      dtype='object')
(47453, 9)
Index(['total_demand', 'AC', 'Light', 'Plug', 'Year', 'Month', 'Day', 'Hour',
       'Minute'],
 

# LSTM model

In [7]:
lstm = LSTMModel(input_dim, hidden_dim, output_dim, n_layers)
model_type = 'LSTM'
print(lstm)
print(num_params(lstm))

LSTMModel(
  (lstm): LSTM(9, 50, num_layers=2, batch_first=True)
  (fc): Linear(in_features=50, out_features=1, bias=True)
  (tanh): Tanh()
)
32651


# Build the training set

## FedCSA

## 10

In [8]:
n_clients=10

In [9]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

(68093, 90, 9) (68093, 1)
(58528, 90, 9) (58528, 1)
(46750, 90, 9) (46750, 1)
(65300, 90, 9) (65300, 1)
(57086, 90, 9) (57086, 1)
(51617, 90, 9) (51617, 1)
(47363, 90, 9) (47363, 1)
(68901, 90, 9) (68901, 1)
(67168, 90, 9) (67168, 1)
(69578, 90, 9) (69578, 1)


In [10]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)


starting round 0
clients:  [0 1 2 3 4 5 6 7 8 9]
round 0, starting client 0/9, id: 0
Epoch [1/1], Train Loss: 0.10247224664210147
loss error: 0.0438
round 0, starting client 1/9, id: 1
Epoch [1/1], Train Loss: 0.09072739904125532
loss error: 0.0445
round 0, starting client 2/9, id: 2
Epoch [1/1], Train Loss: 0.07756391447037458
loss error: 0.0401
round 0, starting client 3/9, id: 3
Epoch [1/1], Train Loss: 0.07982063527200735
loss error: 0.0357
round 0, starting client 4/9, id: 4
Epoch [1/1], Train Loss: 0.0962603839791634
loss error: 0.0292
round 0, starting client 5/9, id: 5
Epoch [1/1], Train Loss: 0.08579337066039446
loss error: 0.0363
round 0, starting client 6/9, id: 6
Epoch [1/1], Train Loss: 0.10473021794412588
loss error: 0.0347
round 0, starting client 7/9, id: 7
Epoch [1/1], Train Loss: 0.08719996180174487
loss error: 0.0513
round 0, starting client 8/9, id: 8
Epoch [1/1], Train Loss: 0.08682045709485045
loss error: 0.0368
round 0, starting client 9/9, id: 9
Epoch [1/1], Tr

In [11]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)

## 20

In [14]:
n_clients=20

In [15]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

(58528, 90, 9) (58528, 1)
(64807, 90, 9) (64807, 1)
(52888, 90, 9) (52888, 1)
(64836, 90, 9) (64836, 1)
(46055, 90, 9) (46055, 1)
(67334, 90, 9) (67334, 1)
(61853, 90, 9) (61853, 1)
(60219, 90, 9) (60219, 1)
(58120, 90, 9) (58120, 1)
(62212, 90, 9) (62212, 1)
(65910, 90, 9) (65910, 1)
(49050, 90, 9) (49050, 1)
(61800, 90, 9) (61800, 1)
(68093, 90, 9) (68093, 1)
(68901, 90, 9) (68901, 1)
(49996, 90, 9) (49996, 1)
(65300, 90, 9) (65300, 1)
(47544, 90, 9) (47544, 1)
(57086, 90, 9) (57086, 1)
(55024, 90, 9) (55024, 1)


In [16]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)


starting round 0
clients:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
round 0, starting client 0/19, id: 0
Epoch [1/1], Train Loss: 0.0532392820964257
loss error: 0.0370
round 0, starting client 1/19, id: 1
Epoch [1/1], Train Loss: 0.05795219575986268
loss error: 0.0236
round 0, starting client 2/19, id: 2
Epoch [1/1], Train Loss: 0.05163836978921076
loss error: 0.0427
round 0, starting client 3/19, id: 3
Epoch [1/1], Train Loss: 0.05724650308489798
loss error: 0.0450
round 0, starting client 4/19, id: 4
Epoch [1/1], Train Loss: 0.04961939434121763
loss error: 0.0363
round 0, starting client 5/19, id: 5
Epoch [1/1], Train Loss: 0.04642355198470446
loss error: 0.0312
round 0, starting client 6/19, id: 6
Epoch [1/1], Train Loss: 0.032882349992481366
loss error: 0.0280
round 0, starting client 7/19, id: 7
Epoch [1/1], Train Loss: 0.04765066258767818
loss error: 0.0321
round 0, starting client 8/19, id: 8
Epoch [1/1], Train Loss: 0.048659365706973594
loss error: 0.0376


In [17]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)

## 30

In [12]:
n_clients=30

In [13]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to(device), torch.from_numpy(train_y).to(device))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to(device), torch.from_numpy(test_y).to(device))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

(58528, 90, 9) (58528, 1)
(64807, 90, 9) (64807, 1)
(52888, 90, 9) (52888, 1)
(64836, 90, 9) (64836, 1)
(46055, 90, 9) (46055, 1)
(67334, 90, 9) (67334, 1)
(61853, 90, 9) (61853, 1)
(60219, 90, 9) (60219, 1)
(58120, 90, 9) (58120, 1)
(62212, 90, 9) (62212, 1)
(65910, 90, 9) (65910, 1)
(49050, 90, 9) (49050, 1)
(61800, 90, 9) (61800, 1)
(68093, 90, 9) (68093, 1)
(68901, 90, 9) (68901, 1)
(49996, 90, 9) (49996, 1)
(65300, 90, 9) (65300, 1)
(47544, 90, 9) (47544, 1)
(57086, 90, 9) (57086, 1)
(55024, 90, 9) (55024, 1)
(56491, 90, 9) (56491, 1)
(69578, 90, 9) (69578, 1)
(72217, 90, 9) (72217, 1)
(47363, 90, 9) (47363, 1)
(67302, 90, 9) (67302, 1)
(62003, 90, 9) (62003, 1)
(46750, 90, 9) (46750, 1)
(69858, 90, 9) (69858, 1)
(51617, 90, 9) (51617, 1)
(63049, 90, 9) (63049, 1)


In [14]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
starting round 1
clients:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
round 1, starting client 0/29, id: 0
Epoch [1/1], Train Loss: 0.05754117446227206
loss error: 0.0332
round 1, starting client 1/29, id: 1
Epoch [1/1], Train Loss: 0.06262731246650219
loss error: 0.0348
round 1, starting client 2/29, id: 2
Epoch [1/1], Train Loss: 0.05574668480492221
loss error: 0.0267
round 1, starting client 3/29, id: 3
Epoch [1/1], Train Loss: 0.0612885853648186
loss error: 0.0599
round 1, starting client 4/29, id: 4
Epoch [1/1], Train Loss: 0.05406717942761523
loss error: 0.0397
round 1, starting client 5/29, id: 5
Epoch [1/1], Train Loss: 0.04959859283497701
loss error: 0.0457
round 1, starting client 6/29, id: 6
Epoch [1/1], Train Loss: 0.03524837293662131
loss error: 0.0336
round 1, starting client 7/29, id: 7
Epoch [1/1], Train Loss: 0.051307381309093326
loss err

In [15]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)

## 40

In [None]:
n_clients=40

In [None]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

In [None]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)

In [None]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)

## 50

In [None]:
n_clients=50

In [None]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

In [None]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)

In [None]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)

## 60

In [None]:
n_clients=60

In [None]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

In [None]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)

In [None]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)

## 70

In [None]:
n_clients=70

In [None]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

In [None]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)

In [None]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)

## 80

In [None]:
n_clients=80

In [None]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

In [None]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)

In [None]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)

## 90

In [None]:
n_clients=90

In [None]:
train_loader = []
test_loader = []
label_scalers = []
i=0
for _, df in dataframes.items():
    if i==n_clients:
        break
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

In [None]:
%%time
lstm_csa = copy.deepcopy(lstm)
outputs_csa, targets_csa, loss_csa, smape_csa, mae_csa, rmse_csa = fedcsa(
    global_model = lstm_csa,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)

In [None]:
np.save(f'metrics/fedcsa_outputs_C{n_clients}.npy', np.array(outputs_csa, dtype=object))
np.save(f'metrics/fedcsa_targets_C{n_clients}.npy', np.array(targets_csa, dtype=object))
np.save(f'metrics/fedcsa_loss_C{n_clients}.npy', loss_csa)
np.save(f'metrics/fedcsa_smape_C{n_clients}.npy', smape_csa)
np.save(f'metrics/fedcsa_mae_C{n_clients}.npy', mae_csa)
np.save(f'metrics/fedcsa_rmse_C{n_clients}.npy', rmse_csa)