In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import datetime as dt
import pickle
import os

In [2]:
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F

DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


Using backend: pytorch


In [3]:
gcn_msg = fn.copy_src(src='h', out='m')
gcn_reduce = fn.sum(msg='m', out='h')

In [17]:
class GCNLSTM(nn.Module):
    def __init__(self, n_feats, seq_len):
        super(GCNLSTM, self).__init__()
        self.n_feats = n_feats
        self.seq_len = seq_len
        self.n_hidden = 6 # number of hidden states for LSTM cell
        self.n_layers = 3 # number of stacked LSTM layers

        self.lstm = nn.LSTM(input_size=n_feats,
                            hidden_size=self.n_hidden,
                            num_layers=self.n_layers,
                            batch_first=True,
                            dropout=0.3)
    
    def forward(self, g, feature):
        with g.local_scope():
            g.ndata['h'] = feature
            g.update_all(gcn_msg, gcn_reduce)
            h = g.ndata['h']
            return self.lstm(h)[0]

In [14]:
class GCNLinear(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCNLinear, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)

    def forward(self, g, feature):
        with g.local_scope():
            g.ndata['h'] = feature
            g.update_all(gcn_msg, gcn_reduce)
            h = g.ndata['h']
            return self.linear(h)


In [18]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.layer1 = GCNLSTM(1, 6)
        self.dropout1 = nn.Dropout(0.3)
        self.layer2 = GCNLinear(100, 50)
        self.layer3 = GCNLinear(50, 1)

    def forward(self, g, features):
        batch_size, seq_len, n_feats = features.size()

        x = self.layer1(g, features)
        x = x.contiguous().view(batch_size, -1) # flatten
        x = F.relu(self.layer2(g, x))
        x = F.sigmoid(self.layer3(g, x))
        return x

In [11]:
def load_data(file):
    with open(f'model_ready_data/{file}', 'rb') as f:
        C, features, targets = pickle.load(f)
        features = th.FloatTensor(features)
        targets = th.FloatTensor(targets)
        g = dgl.from_networkx(C)
#         g.dgl.from_networkx(C)
        return g, features, targets

In [8]:
th.manual_seed(0)
net = Net()
print(net)

Net(
  (layer1): GCNLSTM(
    (lstm): LSTM(2, 10, num_layers=3, batch_first=True, dropout=0.3)
  )
  (dropout1): Dropout(p=0.3, inplace=False)
  (layer2): GCNLinear(
    (linear): Linear(in_features=100, out_features=50, bias=True)
  )
  (layer3): GCNLinear(
    (linear): Linear(in_features=50, out_features=2, bias=True)
  )
)


In [9]:
train_files = []
test_files = []
for file in os.listdir('model_ready_data'):
    date = dt.datetime.strptime(file[:-4], '%Y-%m-%d')
    if date < dt.datetime.strptime('2017-01-01', '%Y-%m-%d'):
        train_files.append(file)
    else:
        test_files.append(file)

In [19]:
optimizer = th.optim.Adam(net.parameters(), lr=1e-3)
n_epochs = 500
train_loss = []
test_loss = []
for epoch in range(n_epochs):
    train_loss_epoch = []
    test_loss_epoch = []
    for file in train_files:
        g, features, targets = load_data(file)

        net.train()
        pred = net(g, features)
        loss = F.mse_loss(pred, targets)
        train_loss_epoch.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
  
    for file in test_files:
        g, features, targets = load_data(file)
        pred = net(g, features)
        loss = F.mse_loss(pred, targets)
        test_loss_epoch.append(loss.item())

    train_loss.append(np.mean(train_loss_epoch))
    test_loss.append(np.mean(test_loss_epoch))

    if epoch % 50 == 0:
        print(f'Epoch: {epoch}, Train Loss: {train_loss[epoch]}, Test Loss: {test_loss[epoch]}')

RuntimeError: input.size(-1) must be equal to input_size. Expected 2, got 1