In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
os.chdir('/content/drive/MyDrive/PhD/extension-v-4.1')

In [6]:
!pip install kneed



In [7]:
import numpy as np
import pandas as pd
import copy

import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler

from data import fetch_dataset
from util import move_sliding_window, num_params
from model import LSTMNet, GRUNet
from algorithm import fedavg

pd.set_option('display.max_columns', None)
pd.set_option('display.max_columns', None)
np.set_printoptions(suppress=True, floatmode='fixed')

In [8]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('running on gpu')
else:
    device = torch.device("cpu")

running on gpu


# Parameters

In [9]:
window_size = 90 # Define window_size period and split inputs/labels\
batch_size = 1024
label_col_index = 0

# seq_len = 90  # (timestamps)
hidden_dim = 256
n_layers = 2
lr = 0.001
output_dim = 1

#fed train params
num_local_epochs = 1
max_rounds = 100 #nb of total rounds for training
num_clients_per_round = 30 #number of clients to participate in training

# Data Preperation

In [13]:
dataframes = fetch_dataset("./cleaned_data18")

Floor10_p7
Floor11_p3
Floor12_p6
Floor13_p4
Floor14_p7
Floor15_p10
Floor16_p4
Floor17_p10
Floor18_p7
Floor19_p8
Floor20_p7
Floor21_p2
Floor22_p7
Floor23_p11
Floor24_p11
Floor25_p3
Floor26_p6
Floor27_p2
Floor28_p4
Floor29_p3
Floor0_p4
Floor1_p8
Floor2_p9
Floor3_p8
Floor4_p2
Floor5_p2
Floor6_p6
Floor7_p6
Floor8_p6
Floor9_p7


In [14]:
n_clients = len(dataframes) #total number of clients to partition data into

In [15]:
for _, df in dataframes.items():
    print(df.columns)
    print(df.shape)

Index(['total_demand', 'z1_AC1(kW)', 'z1_AC2(kW)', 'z1_AC3(kW)', 'z1_AC4(kW)',
       'z1_Light(kW)', 'z1_Plug(kW)', 'z2_AC1(kW)', 'z2_Light(kW)',
       'z2_Plug(kW)', 'z3_Light(kW)', 'z3_Plug(kW)', 'z4_AC1(kW)',
       'z4_Light(kW)', 'z4_Plug(kW)', 'z5_AC1(kW)', 'z5_Light(kW)',
       'z5_Plug(kW)', 'Year', 'Month', 'Day', 'Hour', 'Minute'],
      dtype='object')
(72864, 23)
Index(['total_demand', 'z1_AC1(kW)', 'z1_AC2(kW)', 'z1_AC3(kW)', 'z1_AC4(kW)',
       'z1_Light(kW)', 'z1_Plug(kW)', 'z2_AC1(kW)', 'z2_Light(kW)',
       'z2_Plug(kW)', 'z3_Light(kW)', 'z3_Plug(kW)', 'z4_AC1(kW)',
       'z4_Light(kW)', 'z4_Plug(kW)', 'z5_AC1(kW)', 'z5_Light(kW)',
       'z5_Plug(kW)', 'Year', 'Month', 'Day', 'Hour', 'Minute'],
      dtype='object')
(72864, 23)
Index(['total_demand', 'z1_AC1(kW)', 'z1_AC2(kW)', 'z1_AC3(kW)', 'z1_AC4(kW)',
       'z1_Light(kW)', 'z1_Plug(kW)', 'z2_AC1(kW)', 'z2_Light(kW)',
       'z2_Plug(kW)', 'z3_Light(kW)', 'z3_Plug(kW)', 'z4_AC1(kW)',
       'z4_Light(kW)', '

# Build the training set

In [16]:
train_loader = []
test_loader = []
label_scalers = []
i= 0
for _, df in dataframes.items():
    inputs_cols_indices = range(0, df.shape[1])  # use (total_demand,Year,Month,Day,Hour,Minute) columns as features
    if i ==30:
      break
    #move the window
    inputs, labels = move_sliding_window(
        df.values,
        window_size,
        inputs_cols_indices=inputs_cols_indices,
        label_col_index=label_col_index
    )

    # Normalize the input data columns
    sc = MinMaxScaler()
    # Obtaining the scaler for the labels(usage data) so that output can be re-scaled to actual value during evaluation
    label_sc = MinMaxScaler()

    # Split data into train/test portions and combining all data into a single array
    test_portion = int(0.2 * len(inputs))

    train_x = sc.fit_transform(inputs[:-test_portion].reshape(-1, window_size * df.shape[1]))
    train_x = train_x.reshape(-1, window_size, df.shape[1])
    train_y = label_sc.fit_transform(labels[:-test_portion])

    test_x = sc.transform(inputs[-test_portion:].reshape(-1, window_size * df.shape[1]))
    test_x = test_x.reshape(-1, window_size, df.shape[1])
    test_y = label_sc.transform(labels[-test_portion:])

    # test_x.append(testx)
    # test_y.append(testy)
    label_scalers.append(label_sc)

    # pytorch data loaders
    train_data = TensorDataset(torch.from_numpy(train_x).to('cpu'), torch.from_numpy(train_y).to('cpu'))
    train_loader.append(DataLoader(train_data, batch_size=batch_size, drop_last=True))# Drop the last incomplete batch
    test_data = TensorDataset(torch.from_numpy(test_x).to('cpu'), torch.from_numpy(test_y).to('cpu'))
    test_loader.append(DataLoader(test_data, batch_size=batch_size))# Drop the last incomplete batch

    # release some memory
    del train_x, train_y
    i=i+1

input_dim = next(iter(train_loader[0]))[0].shape[2]  # 22

(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72768, 90, 23) (72768, 1)
(72725, 90, 23) (72725, 1)
(72317, 90, 23) (72317, 1)
(72744, 90, 23) (72744, 1)
(72774, 90, 23) (72774, 1)
(72774, 90, 23) (72774, 1)
(72771, 90, 23) (72771, 1)
(66476, 90, 23) (66476, 1)
(72774, 90, 23) (72774, 1)
(72736, 90, 23) (72736, 1)


# LSTM model

In [17]:
lstm = LSTMNet(input_dim, hidden_dim, output_dim, n_layers)
model_type = 'LSTM'
print(lstm)
print(num_params(lstm))

LSTMNet(
  (lstm): LSTM(23, 256, num_layers=2, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=256, out_features=1, bias=True)
  (relu): ReLU()
)
814337


## FedAvg

In [18]:
%%time
lstm_K5_avg = copy.deepcopy(lstm)
outputs_avg, targets_avg, loss_avg, smape_avg, mae_avg, rmse_avg = fedavg(
    global_model = lstm_K5_avg,
    client_train_loader = train_loader,
    test_loader = test_loader,
    label_sc = label_scalers,
    n_clients = n_clients,
    num_clients_per_round = num_clients_per_round,
    batch_size = batch_size,
    num_local_epochs = num_local_epochs,
    lr = lr,
    max_rounds = max_rounds,
    model_type = model_type,
    device = device,
)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
round 26, starting client 16/30, id: 15
Epoch [1/1], Train Loss: 0.0021116530468654154
round 26, starting client 17/30, id: 16
Epoch [1/1], Train Loss: 0.0027892834914382556
round 26, starting client 18/30, id: 17
Epoch [1/1], Train Loss: 0.004327456899253386
round 26, starting client 19/30, id: 18
Epoch [1/1], Train Loss: 0.004111430607736111
round 26, starting client 20/30, id: 19
Epoch [1/1], Train Loss: 0.0007295572535700297
round 26, starting client 21/30, id: 20
Epoch [1/1], Train Loss: 0.000119184629771293
round 26, starting client 22/30, id: 21
Epoch [1/1], Train Loss: 0.00014024880748105976
round 26, starting client 23/30, id: 22
Epoch [1/1], Train Loss: 0.0001102838095253641
round 26, starting client 24/30, id: 23
Epoch [1/1], Train Loss: 0.00011500649803305608
round 26, starting client 25/30, id: 24
Epoch [1/1], Train Loss: 0.00029579082872390134
round 26, starting client 26/30, id: 25
Epoch [1/1], Train Loss: 

In [19]:
np.save(f'metrics/fedavg_lstm_outputs_C{num_clients_per_round}-n5.npy', np.array(outputs_avg, dtype=object))
np.save(f'metrics/fedavg_lstm_targets_C{num_clients_per_round}-n5.npy', np.array(targets_avg, dtype=object))
np.save(f'metrics/fedavg_lstm_loss_C{num_clients_per_round}-n5.npy', loss_avg)
np.save(f'metrics/fedavg_lstm_smape_C{num_clients_per_round}-n5.npy', smape_avg)
np.save(f'metrics/fedavg_lstm_mae_C{num_clients_per_round}-n5.npy', mae_avg)
np.save(f'metrics/fedavg_lstm_rmse_C{num_clients_per_round}-n5.npy', rmse_avg)