In [2]:
import argparse, time
import numpy as np
import pandas as pd
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from tqdm import tqdm
import scipy.sparse as sp
from datetime import datetime
import math
%matplotlib inline

from GCN import GCN
from torch.utils.data import DataLoader
from dataUtils import loadEnergyData, processData, energyDataset
from modelUtils import saveCheckpoint, loadCheckpoint, plotPredVsTrue

In [3]:
processed_dir = "data/"
validation_range = ["2014-10-01 00:00:00", "2014-12-31 23:00:00"]
validation_range = [datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in validation_range]

In [45]:
def DGL_Process(df):
    grouped = df.groupby('time')
    inputs = []
    targets = []
    for time, group in tqdm(grouped):
        group.node = group.node.astype('int64')
        group = group.sort_values('node')


        node_targets = group.load.values
        #print(group.loc[group.time==time,['solar_ecmwf','wind_ecmwf','holiday','hour','dow','month','year',
                                          #'season','country','voltage']].values)
        node_features = group.loc[group.time==time,['solar_ecmwf','wind_ecmwf','holiday','hour','dow','month','year',
                                          'season','country','voltage']].values

        node_features = torch.FloatTensor(node_features).unsqueeze(1)
        node_targets = torch.FloatTensor(node_targets)
           
        inputs.append(node_features)
        targets.append(node_targets)
    return torch.stack(inputs).transpose(0,1).type(torch.FloatTensor), \
            torch.stack(targets).transpose(0,1).type(torch.FloatTensor)

In [46]:
def getDatasets(energy_demand, validation_range):
    energy_demand['time'] = pd.to_datetime(energy_demand['time'], format='%Y-%m-%d %H:%M:%S')
    
    # extract validation and training sets
    train_df = energy_demand[energy_demand['time'] < validation_range[0]].reset_index(drop = True)
    val_df = energy_demand[(energy_demand['time'] >= validation_range[0]) & 
                           (energy_demand['time'] <= validation_range[1])].reset_index(drop = True)
    
    train_dataset = DGL_Process(train_df)
    valid_dataset = DGL_Process(val_df)
    #print(valid_dataset)
    #train_dataset = train_dataset.type(torch.FloatTensor)
    #valid_dataset = valid_dataset.type(torch.FloatTensor)


    return train_dataset,valid_dataset

In [6]:
#load data
energy_demand, adj_mat = loadEnergyData(processed_dir, incl_nodes = 300, partial = True)
#bacis preprocessing and normalization
energy_demand = processData(energy_demand)
#train_dataset,val_dataset = getDatasets(energy_demand)
#train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
#val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

In [7]:
energy_demand

Unnamed: 0,node,time,solar_ecmwf,wind_ecmwf,holiday,hour,dow,month,year,season,country,voltage,load
0,1,2012-01-01 00:00:00,0.0,0.0284,1,0.000000,1.000000,0.0,0.0,1.0,0.0,0.0,0.036393
1,1,2012-01-01 01:00:00,0.0,0.0336,1,0.043478,1.000000,0.0,0.0,1.0,0.0,0.0,0.033963
2,1,2012-01-01 02:00:00,0.0,0.0392,1,0.086957,1.000000,0.0,0.0,1.0,0.0,0.0,0.031690
3,1,2012-01-01 03:00:00,0.0,0.0424,1,0.130435,1.000000,0.0,0.0,1.0,0.0,0.0,0.029789
4,1,2012-01-01 04:00:00,0.0,0.0475,1,0.173913,1.000000,0.0,0.0,1.0,0.0,0.0,0.028787
...,...,...,...,...,...,...,...,...,...,...,...,...,...
499771,19,2014-12-31 19:00:00,0.0,0.0305,0,0.826087,0.333333,1.0,1.0,1.0,0.0,0.0,0.079104
499772,19,2014-12-31 20:00:00,0.0,0.0479,0,0.869565,0.333333,1.0,1.0,1.0,0.0,0.0,0.076119
499773,19,2014-12-31 21:00:00,0.0,0.0567,0,0.913043,0.333333,1.0,1.0,1.0,0.0,0.0,0.068698
499774,19,2014-12-31 22:00:00,0.0,0.0534,0,0.956522,0.333333,1.0,1.0,1.0,0.0,0.0,0.064993


In [47]:
train_dataset,val_dataset = getDatasets(energy_demand,validation_range)

100%|██████████| 24096/24096 [01:14<00:00, 321.32it/s]
100%|██████████| 2208/2208 [00:06<00:00, 328.01it/s]


In [48]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)


In [None]:
def normalizeAdjMat(adj_mat):
    # add self loop - ensures that a node's own features are included in calculations by creating an edge to itself
    n = adj_mat.shape[0]
    adj_mat = adj_mat +  np.diag(np.ones(n, dtype=np.float32))
    
    # generate node degree matrix
    D = np.zeros((n, n), float)
    np.fill_diagonal(D, np.sum(adj_mat, axis = 1))
    
    # get D^-(1/2)
    D_norm = D**(-(1/2))
    D_norm[D_norm==math.inf] =0  # handle infs 
    
    # Normalization formula is  D^(−1/2) * A * D^(−1/2)
    norm_adj_mat = np.matmul(np.matmul(D_norm, adj_mat), D_norm)  
    return torch.FloatTensor(norm_adj_mat)

In [None]:
adj_norm = normalizeAdjMat(adj_mat)
#print(adj_norm.data.numpy())
g = nx.from_numpy_matrix(adj_norm.data.numpy())

In [None]:
in_feats = 10

In [None]:
model = GCN(g,in_feats,16,1,1,F.relu,.5)

In [None]:
criterion = torch.nn.MSELoss()

# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
                                 lr=.01,
                                 weight_decay=5e-4)
train_loss = []
val_loss = []
val_best = 1
# initialize graph
for epoch in range(2):
    model.train()
    for  features, target in train_loader:
        features = features.to('cpu')
        target = target.to('cpu')
        
        optimizer.zero_grad()
        
        
        predicted = model(features)
        loss = criterion(predicted, target)
        loss.backward()
        optimizer.step()
        
        # update tracking
        np_loss = loss.detach().cpu().numpy()
        avg_trn_loss.update(np_loss, 64)
        epoch_trn_loss.append(np_loss)

In [54]:
 for batc in val_loader:
        inputs = batch[0]
        print(batch.shape)

torch.Size([1, 19, 2208])
torch.Size([1, 19, 2208])


In [52]:
for features,target in val_loader:
    print(features)

ValueError: not enough values to unpack (expected 2, got 1)

In [49]:
dataiter = iter(train_loader)
features, targets = dataiter.next()
print(features)

ValueError: not enough values to unpack (expected 2, got 1)