In [694]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from tqdm import tqdm
import torch.nn as nn
import gc
import torch
from numpy import array
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

Lets load the CSV file into a Pandas dataframe.

In [695]:
data = pd.read_csv('/Users/fernando/PyNetSim/tutorials/surrogate/data/data.csv')

Lets check the shape of the dataframe.


In [696]:
print(data.shape)

(76460, 9)


Lets get only the first 10% of the data to speed up the training process.

In [697]:
num_samples = len(data)
training_set = data[:int(num_samples*0.4)]
# Validation is the last 10% of samples
validation_set = data[int(num_samples*0.9):]
# reset index in both dataframes
training_set.reset_index(drop=True, inplace=True)
validation_set.reset_index(drop=True, inplace=True)
# print shapes
print('Training set shape: ', training_set.shape)
print('Validation set shape: ', validation_set.shape)

Training set shape:  (30584, 9)
Validation set shape:  (7646, 9)


Proportion of the data that will be used for training and testing.

In [698]:
print(f"Proportion of training set: {training_set.shape[0]/len(data)*100}%")
print(f"Proportion of validation set: {validation_set.shape[0]/len(data)*100}%")

Proportion of training set: 40.0%
Proportion of validation set: 10.0%


In [699]:
print(f"{training_set.head()},{training_set.shape}")
print(f"{validation_set.head()},{validation_set.shape}")

     alpha      beta     gamma  remaining_energy  alive_nodes  \
0  0.12304  7.944358  5.167098          5.118514           99   
1  0.12304  7.944358  5.167098          5.066551           99   
2  0.12304  7.944358  5.167098          5.016569           99   
3  0.12304  7.944358  5.167098          4.966186           99   
4  0.12304  7.944358  5.167098          4.917782           99   

          cluster_heads                                      energy_levels  \
0       [0, 0, 0, 0, 0]  [0.0698517842813034, 0.06605397164431653, 0.00...   
1   [2, 22, 50, 77, 86]  [0.05932810428130321, 0.06567921164431653, 0.0...   
2   [2, 22, 50, 77, 86]  [0.04882442428130327, 0.06532445164431654, 0.0...   
3  [22, 53, 58, 77, 86]  [0.04860234428130327, 0.06496485164431653, 0.0...   
4  [22, 53, 58, 77, 86]  [0.04840026428130327, 0.06462525164431654, 0.0...   

                                 dst_to_cluster_head  \
0  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
1  [0.0, 62.20128616033595, 

In [700]:
def split_sequence(sequence, n_steps):
    num_samples = len(sequence)
    x = []
    y = []

    alpha_values = sequence['alpha'].astype(float)
    beta_values = sequence['beta'].astype(float)
    gamma_values = sequence['gamma'].astype(float)
    remaining_energy_values = sequence['remaining_energy']
    alive_nodes_values = sequence['alive_nodes']
    energy_levels_values = np.array(
        [eval(x) for x in sequence['energy_levels']])
    # print shape of energy levels
    dst_to_cluster_head_values = np.array(
        [eval(x) for x in sequence['dst_to_cluster_head']])
    membership_values = np.array([eval(x) for x in sequence['membership']])
    cluster_heads = sequence['cluster_heads']
    cluster_heads = np.array([eval(x) for x in cluster_heads])

    for i in tqdm(range(num_samples), desc="Processing sequence"):
        alpha_val, beta_val, gamma_val = alpha_values[i], beta_values[i], gamma_values[i]
        remaining_energy = remaining_energy_values[i:i+n_steps].values
        alive_nodes = alive_nodes_values[i:i+n_steps].values
        energy_levels = energy_levels_values[i]
        dst_to_cluster_head = dst_to_cluster_head_values[i]
        membership = membership_values[i]
        end_ix = i + n_steps

        if end_ix > num_samples - 1:
            break

        chs, seq_y = cluster_heads[i:end_ix], cluster_heads[end_ix]
        chs = [item for sublist in chs for item in sublist]
        # remaining_energy = [item for sublist in remaining_energy for item in sublist]
        # alive_nodes = [item for sublist in alive_nodes for item in sublist]
        # seq_x = [alpha_val, beta_val, gamma_val, remaining_energy, alive_nodes]
        seq_x = [alpha_val/10, beta_val/10, gamma_val/10]
        assert all(
            x <= 1 and x >= -1 for x in seq_x), f"Incorrect values of alpha, beta or gamma: {seq_x}"
        # Normalize remaining energy dividing by 10
        remaining_energy = [x/10 for x in remaining_energy]
        assert all(
            x <= 1 and x >= -1 for x in remaining_energy), f"Incorrect values of remaining energy: {remaining_energy}"
        seq_x.extend(remaining_energy)
        # Normalize alive nodes dividing by 100
        alive_nodes = [x/100 for x in alive_nodes]
        assert all(
            x <= 1 and x >= -1 for x in alive_nodes), f"Incorrect values of alive nodes: {alive_nodes}"
        seq_x.extend(alive_nodes)
        # Normalize energy levels dividing by 5
        energy_levels = [x/5 for x in energy_levels]
        assert all(
            x <= 1 and x >= -1 for x in energy_levels), f"Incorrect values of energy levels: {energy_levels}"
        seq_x.extend(energy_levels)
        # Normalize distance to cluster head dividing by 100
        dst_to_cluster_head = [x/200 for x in dst_to_cluster_head]
        assert all(
            x <= 1 and x >= -1 for x in dst_to_cluster_head), f"Incorrect values of distance to cluster head: {dst_to_cluster_head}"
        seq_x.extend(dst_to_cluster_head)
        # Normalize membership dividing by 100
        membership = [x/100 for x in membership]
        assert all(
            x <= 1 and x >= -1 for x in membership), f"Incorrect values of membership: {membership}"
        seq_x.extend(membership)
        # Normalize cluster heads dividing by 100
        chs = [x/100 for x in chs]
        assert all(x <= 1 and x >= -
                   1 for x in chs), f"Incorrect values of cluster heads: {chs}"
        seq_x.extend(chs)

        if (alpha_values[end_ix] != alpha_val) or (beta_values[end_ix] != beta_val) or (gamma_values[end_ix] != gamma_val):
            continue

        x.append(seq_x)
        y.append(seq_y)

    return np.array(x), np.array(y)


n_steps = 5
x_train, y_train = split_sequence(training_set, n_steps)
y_train = np.eye(101)[y_train.astype('int')]
x_val, y_val = split_sequence(validation_set, n_steps)
y_val = np.eye(101)[y_val.astype('int')]

Processing sequence: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 30579/30584 [00:14<00:00, 2161.94it/s]
Processing sequence: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 7641/7646 [00:03<00:00, 2278.18it/s]


In [701]:
# print(x_train)
print(x_train.shape)
# print(y_train)
print(y_train.shape)

(29384, 335)
(29384, 5, 101)


Create the dataset class.

In [702]:
class ClusterHeadDataset(Dataset):
    def __init__(self, x, y):
        self.X = torch.from_numpy(x.astype(np.float32))
        self.y = torch.from_numpy(y.astype(np.float32))
        self.len = x.shape[0]

    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

    # Support batching
    def collate_fn(self, batch):
        X = torch.stack([x[0] for x in batch])
        y = torch.stack([x[1] for x in batch])
        return X, y

Create the network architecture.

In [703]:
class ForecastCCH(nn.Module):
    def __init__(self):
        super(ForecastCCH, self).__init__()
        self.fc1 = nn.Linear(335, 800)
        self.drop1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(800, 800)
        self.drop2 = nn.Dropout(0.4)
        self.fc3 = nn.Linear(800, 101*5)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.drop1(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.drop2(x)
        x = self.fc3(x)
        return x.view(-1, 5, 101)

In [704]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = ForecastCCH().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss()
rl_scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

Create the dataset objects.

In [705]:
train = ClusterHeadDataset(x_train, y_train)
valid = ClusterHeadDataset(x_val, y_val)
train_loader = DataLoader(train, batch_size=2000, shuffle=False)
valid_loader = DataLoader(valid, batch_size=2000, shuffle=False)

In [706]:
train_losses = []
valid_losses = []


def Train():
    running_loss = .0

    model.train()

    # Wrap the data loader with tqdm to add a progress bar
    for idx, (inputs, labels) in enumerate(tqdm(train_loader, desc="Training")):
        # print(inputs.shape)
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        preds = model(inputs.float())
        loss = criterion(preds, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss

    train_loss = running_loss / len(train_loader)
    train_losses.append(train_loss.detach().numpy())

    print(f'train_loss {train_loss}')


def Valid():
    running_loss = .0

    model.eval()

    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(valid_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model(inputs.float())
            loss = criterion(preds, labels)
            running_loss += loss

        valid_loss = running_loss/len(valid_loader)
        valid_losses.append(valid_loss.detach().numpy())
        print(f'valid_loss {valid_loss}')

In [707]:
def test_predicted():
    model.eval()
    avg_accuracy = []
    losses = []
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(valid_loader):
            # print(f"inputs: {inputs}, shape: {inputs.shape}")
            # print(f"labels: {labels}, shape: {labels.shape}")
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model(inputs.float())
            loss = criterion(preds, labels)
            losses.append(loss.item())
            y = torch.argmax(labels, dim=2)
            output = torch.argmax(preds, dim=2)
            correct = (output == y).sum().item()
            total = np.product(y.shape)
            avg_accuracy.append(correct/total*100)
    print(
        f"Average loss: {np.mean(losses)}")
    print(
        f"Average accuracy: {np.mean(avg_accuracy)}%")
    print

In [708]:
epochs = 1000
for epoch in range(epochs):
    print('epochs {}/{}'.format(epoch+1,epochs))
    Train()
    rl_scheduler.step()
    Valid()
    if epoch % 5 == 0:
        test_predicted()
    gc.collect()

epochs 1/1000


Training: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [01:51<00:00,  7.43s/it]


train_loss 0.10068816691637039
valid_loss 0.05699867010116577
Average loss: 0.0569986691698432
Average accuracy: 1.7031133482475764%
epochs 2/1000


Training:  20%|████████████████████████████████▍                                                                                                                                 | 3/15 [00:38<02:35, 12.94s/it]


KeyboardInterrupt: 

In [None]:
valid_loader = DataLoader(valid, batch_size=1, shuffle=False)
test_predicted()