In [1]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim import Adam
from fddbenchmark import FDDDataset, FDDDataloader, FDDEvaluator

In [2]:
data = FDDDataset(name='small_tep')
data.df = (data.df - data.df.mean()) / data.df.std()

Reading data/small_tep/dataset.csv: 100%|██████████| 153300/153300 [00:00<00:00, 220756.71it/s]
Reading data/small_tep/labels.csv: 100%|██████████| 153300/153300 [00:00<00:00, 3028085.97it/s]
Reading data/small_tep/train_mask.csv: 100%|██████████| 153300/153300 [00:00<00:00, 3040083.61it/s]
Reading data/small_tep/test_mask.csv: 100%|██████████| 153300/153300 [00:00<00:00, 2921405.77it/s]


In [3]:
train_dl = FDDDataloader(
    dataframe=data.df,
    mask=data.train_mask,
    labels=data.labels,
    window_size=10,
    step_size=1,
    minibatch_training=True,
    batch_size=1024,
    shuffle=True
)

test_dl = FDDDataloader(
    dataframe=data.df,
    mask=data.test_mask,
    labels=data.labels,
    window_size=10, 
    step_size=1, 
    minibatch_training=True,
    batch_size=1024
)

evaluator = FDDEvaluator(
    step_size=test_dl.step_size
    )

Creating sequence of samples: 100%|██████████| 105/105 [00:00<00:00, 978.56it/s]
Creating sequence of samples: 100%|██████████| 105/105 [00:00<00:00, 799.12it/s]


# MLP Class

In [4]:
# MLP Class

def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot

class MLP(nn.Module):
    """Two-layer fully-connected ELU net with batch norm."""

    def __init__(self, n_in, n_hid, n_out, do_prob=0.):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(n_in, n_hid)
        self.fc2 = nn.Linear(n_hid, n_out)
        self.bn = nn.BatchNorm1d(n_out)
        self.dropout_prob = do_prob

        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal(m.weight.data)
                m.bias.data.fill_(0.1)
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def batch_norm(self, inputs):
        x = inputs.view(inputs.size(0) * inputs.size(1), -1)
        x = self.bn(x)
        return x.view(inputs.size(0), inputs.size(1), -1)

    def forward(self, inputs):
        # Input shape: [num_sims, num_things, num_features]
        x = F.elu(self.fc1(inputs))
        x = F.dropout(x, self.dropout_prob, training=self.training)
        x = F.elu(self.fc2(x))
        return self.batch_norm(x)



# Decoder Class

In [5]:
class MLPDecoder(nn.Module):
    """MLP decoder module."""

    def __init__(self, n_in_node, edge_types, msg_hid, msg_out, n_hid,
                 do_prob=0., skip_first=False):
        super(MLPDecoder, self).__init__()
        self.msg_fc1 = nn.ModuleList(
            [nn.Linear(2 * n_in_node, msg_hid) for _ in range(edge_types)])
        self.msg_fc2 = nn.ModuleList(
            [nn.Linear(msg_hid, msg_out) for _ in range(edge_types)])
        self.msg_out_shape = msg_out
        self.skip_first_edge_type = skip_first

        self.out_fc1 = nn.Linear(n_in_node + msg_out, n_hid)
        self.out_fc2 = nn.Linear(n_hid, n_hid)
        self.out_fc3 = nn.Linear(n_hid, n_in_node)

        print('Using learned interaction net decoder.')

        self.dropout_prob = do_prob

    def single_step_forward(self, single_timestep_inputs, rel_rec, rel_send,
                            single_timestep_rel_type):

        # single_timestep_inputs has shape
        # [batch_size, num_timesteps, num_atoms, num_dims]

        # single_timestep_rel_type has shape:
        # [batch_size, num_timesteps, num_atoms*(num_atoms-1), num_edge_types]

        # Node2edge
        receivers = torch.matmul(rel_rec, single_timestep_inputs)
        senders = torch.matmul(rel_send, single_timestep_inputs)
        pre_msg = torch.cat([senders, receivers], dim=-1)

        all_msgs = Variable(torch.zeros(pre_msg.size(0), pre_msg.size(1),
                                        pre_msg.size(2), self.msg_out_shape))
        if single_timestep_inputs.is_cuda:
            all_msgs = all_msgs.cuda()

        if self.skip_first_edge_type:
            start_idx = 1
        else:
            start_idx = 0

        # Run separate MLP for every edge type
        # NOTE: To exlude one edge type, simply offset range by 1
        for i in range(start_idx, len(self.msg_fc2)):
            msg = F.relu(self.msg_fc1[i](pre_msg))
            msg = F.dropout(msg, p=self.dropout_prob)
            msg = F.relu(self.msg_fc2[i](msg))
            msg = msg * single_timestep_rel_type[:, :, :, i:i + 1]
            all_msgs += msg

        # Aggregate all msgs to receiver
        agg_msgs = all_msgs.transpose(-2, -1).matmul(rel_rec).transpose(-2, -1)
        agg_msgs = agg_msgs.contiguous()

        # Skip connection
        aug_inputs = torch.cat([single_timestep_inputs, agg_msgs], dim=-1)

        # Output MLP
        pred = F.dropout(F.relu(self.out_fc1(aug_inputs)), p=self.dropout_prob)
        pred = F.dropout(F.relu(self.out_fc2(pred)), p=self.dropout_prob)
        pred = self.out_fc3(pred)

        # Predict position/velocity difference
        return single_timestep_inputs + pred

    def forward(self, inputs, rel_type, rel_rec, rel_send, pred_steps=1):
        # NOTE: Assumes that we have the same graph across all samples.

        inputs = inputs.transpose(1, 2).contiguous()

        sizes = [rel_type.size(0), inputs.size(1), rel_type.size(1),
                 rel_type.size(2)]
        rel_type = rel_type.unsqueeze(1).expand(sizes)

        time_steps = inputs.size(1)
        assert (pred_steps <= time_steps)
        preds = []

        # Only take n-th timesteps as starting points (n: pred_steps)
        last_pred = inputs[:, 0::pred_steps, :, :]
        curr_rel_type = rel_type[:, 0::pred_steps, :, :]
        # NOTE: Assumes rel_type is constant (i.e. same across all time steps).

        # Run n prediction steps
        for step in range(0, pred_steps):
            last_pred = self.single_step_forward(last_pred, rel_rec, rel_send,
                                                 curr_rel_type)
            preds.append(last_pred)

        sizes = [preds[0].size(0), preds[0].size(1) * pred_steps,
                 preds[0].size(2), preds[0].size(3)]

        output = Variable(torch.zeros(sizes))
        if inputs.is_cuda:
            output = output.cuda()

        # Re-assemble correct timeline
        for i in range(len(preds)):
            output[:, i::pred_steps, :, :] = preds[i]

        pred_all = output[:, :(inputs.size(1) - 1), :, :]

        return pred_all.transpose(1, 2).contiguous()


In [6]:
edge_types = 1

In [7]:
decoder = MLPDecoder(n_in_node=1,
                         edge_types=edge_types,
                         msg_hid=32,
                         msg_out=32,
                         n_hid=32,
                         do_prob=0.0,
                         skip_first=False)

Using learned interaction net decoder.


# Encoder Class

In [8]:
# Generate off-diagonal interaction graph
off_diag = np.ones([52, 52]) - np.eye(52)

rel_rec = np.array(encode_onehot(np.where(off_diag)[0]), dtype=np.float32)
rel_send = np.array(encode_onehot(np.where(off_diag)[1]), dtype=np.float32)
rel_rec = torch.FloatTensor(rel_rec)
rel_send = torch.FloatTensor(rel_send)

In [9]:
class MLPEncoder(nn.Module):
    def __init__(self, n_in, n_hid, n_out, do_prob=0., factor=True):
        super(MLPEncoder, self).__init__()

        self.factor = factor

        self.mlp1 = MLP(n_in, n_hid, n_hid, do_prob)
        self.mlp2 = MLP(n_hid * 2, n_hid, n_hid, do_prob)
        self.mlp3 = MLP(n_hid, n_hid, n_hid, do_prob)
        if self.factor:
            self.mlp4 = MLP(n_hid * 3, n_hid, n_hid, do_prob)
            print("Using factor graph MLP encoder.")
        else:
            self.mlp4 = MLP(n_hid * 2, n_hid, n_hid, do_prob)
            print("Using MLP encoder.")
        self.fc_out = nn.Linear(n_hid, n_out)
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal(m.weight.data)
                m.bias.data.fill_(0.1)

    def edge2node(self, x, rel_rec, rel_send):
        # NOTE: Assumes that we have the same graph across all samples.
        incoming = torch.matmul(rel_rec.t(), x)
        return incoming / incoming.size(1)

    def node2edge(self, x, rel_rec, rel_send):
        # NOTE: Assumes that we have the same graph across all samples.
        receivers = torch.matmul(rel_rec, x)
        senders = torch.matmul(rel_send, x)
        edges = torch.cat([senders, receivers], dim=2)
        return edges

    def forward(self, inputs, rel_rec, rel_send):
        # Input shape: [num_sims, num_atoms, num_timesteps, num_dims]
        x = inputs #.view(inputs.size(0), inputs.size(1), -1)
        # New shape: [num_sims, num_atoms, num_timesteps*num_dims]

        x = self.mlp1(x)  # 2-layer ELU net per node

        x = self.node2edge(x, rel_rec, rel_send)
        x = self.mlp2(x)
        x_skip = x

        if self.factor:
            x = self.edge2node(x, rel_rec, rel_send)
            x = self.mlp3(x)
            x = self.node2edge(x, rel_rec, rel_send)
            x = torch.cat((x, x_skip), dim=2)  # Skip connection
            x = self.mlp4(x)
        else:
            x = self.mlp3(x)
            x = torch.cat((x, x_skip), dim=2)  # Skip connection
            x = self.mlp4(x)

        return self.fc_out(x)

In [10]:
# енкодер:

def my_softmax(input, axis=1):
    trans_input = input.transpose(axis, 0).contiguous()
    soft_max_1d = F.softmax(trans_input)
    return soft_max_1d.transpose(axis, 0)

def sample_gumbel(shape, eps=1e-10):
    """
    NOTE: Stolen from https://github.com/pytorch/pytorch/pull/3341/commits/327fcfed4c44c62b208f750058d14d4dc1b9a9d3

    Sample from Gumbel(0, 1)

    based on
    https://github.com/ericjang/gumbel-softmax/blob/3c8584924603869e90ca74ac20a6a03d99a91ef9/Categorical%20VAE.ipynb ,
    (MIT license)
    """
    U = torch.rand(shape).float()
    return - torch.log(eps - torch.log(U + eps))


def gumbel_softmax_sample(logits, tau=1, eps=1e-10):
    """
    NOTE: Stolen from https://github.com/pytorch/pytorch/pull/3341/commits/327fcfed4c44c62b208f750058d14d4dc1b9a9d3

    Draw a sample from the Gumbel-Softmax distribution

    based on
    https://github.com/ericjang/gumbel-softmax/blob/3c8584924603869e90ca74ac20a6a03d99a91ef9/Categorical%20VAE.ipynb
    (MIT license)
    """
    gumbel_noise = sample_gumbel(logits.size(), eps=eps)
    if logits.is_cuda:
        gumbel_noise = gumbel_noise.cuda()
    y = logits + Variable(gumbel_noise)
    return my_softmax(y / tau, axis=-1)

def gumbel_softmax(logits, tau=1, hard=False, eps=1e-10):
    """
    NOTE: Stolen from https://github.com/pytorch/pytorch/pull/3341/commits/327fcfed4c44c62b208f750058d14d4dc1b9a9d3

    Sample from the Gumbel-Softmax distribution and optionally discretize.
    Args:
      logits: [batch_size, n_class] unnormalized log-probs
      tau: non-negative scalar temperature
      hard: if True, take argmax, but differentiate w.r.t. soft sample y
    Returns:
      [batch_size, n_class] sample from the Gumbel-Softmax distribution.
      If hard=True, then the returned sample will be one-hot, otherwise it will
      be a probability distribution that sums to 1 across classes

    Constraints:
    - this implementation only works on batch_size x num_features tensor for now

    based on
    https://github.com/ericjang/gumbel-softmax/blob/3c8584924603869e90ca74ac20a6a03d99a91ef9/Categorical%20VAE.ipynb ,
    (MIT license)
    """
    y_soft = gumbel_softmax_sample(logits, tau=tau, eps=eps)
    if hard:
        shape = logits.size()
        _, k = y_soft.data.max(-1)
        # this bit is based on
        # https://discuss.pytorch.org/t/stop-gradients-for-st-gumbel-softmax/530/5
        y_hard = torch.zeros(*shape)
        if y_soft.is_cuda:
            y_hard = y_hard.cuda()
        y_hard = y_hard.zero_().scatter_(-1, k.view(shape[:-1] + (1,)), 1.0)
        # this cool bit of code achieves two things:
        # - makes the output value exactly one-hot (since we add then
        #   subtract y_soft value)
        # - makes the gradient equal to y_soft gradient (since we strip
        #   all other gradients)
        y = Variable(y_hard - y_soft.data) + y_soft
    else:
        y = y_soft
    return y


In [11]:
encoder = MLPEncoder(10, 32, edge_types, 0.0, False)


Using MLP encoder.




In [12]:
def nll_gaussian(preds, target, variance, add_const=False):
    neg_log_p = ((preds - target) ** 2 / (2 * variance))
    if add_const:
        const = 0.5 * np.log(2 * np.pi * variance)
        neg_log_p += const
    return neg_log_p.sum() / (target.size(0) * target.size(1))

def kl_categorical(preds, log_prior, num_atoms, eps=1e-16):
    kl_div = preds * (torch.log(preds + eps) - log_prior)
    return kl_div.sum() / (num_atoms * preds.size(0))


def kl_categorical_uniform(preds, num_atoms, num_edge_types, add_const=False,
                           eps=1e-16):
    kl_div = preds * torch.log(preds + eps)
    if add_const:
        const = np.log(num_edge_types)
        kl_div += const
    return kl_div.sum() / (num_atoms * preds.size(0))

In [13]:
corr = data.df.corr(method='pearson')
coef = 0.7
A = torch.FloatTensor(corr.values)
A[(A > coef) | (A < -coef)] = 1
A[A < 1] = 0
device = torch.device("cuda")
print(device)
optimizer = Adam(list(encoder.parameters()) + list(decoder.parameters()),
                       lr=0.001)
n_epochs = 5
encoder.train()

a = None
for e in range(n_epochs):
    for train_ts, train_index, train_label in train_dl:
        ts = torch.FloatTensor(train_ts)
        ts = torch.transpose(ts, 1, 2)
        optimizer.zero_grad()
        logits = encoder(ts, rel_rec, rel_send)
        edges = gumbel_softmax(logits, tau=0.5, hard=False)
        prob = my_softmax(logits, -1)
        ts = ts.unsqueeze(dim=3)
        output = decoder(ts, edges, rel_rec, rel_send, 1)
        target = ts[:, :, 1:, :]
        loss_nll = nll_gaussian(output, target, 5e-5)
        loss_kl = kl_categorical_uniform(prob, 52, edge_types)
        loss = loss_nll + loss_kl
        loss.backward()
        optimizer.step()
#torch.save(encoder.state_dict(), 'encoder.pt')
#torch.save(decoder.state_dict(), 'decoder.pt')

cuda


  """


KeyboardInterrupt: 

In [None]:
edges

In [131]:
ts.unsqueeze(dim=3)[:, :, 1:, :].shape

torch.Size([256, 52, 9, 1])

In [None]:
ts.unsqueeze(dim=3).shape

In [69]:
ts = ts.transpose(1, 2).contiguous()
ts.shape

torch.Size([256, 10, 52])

In [79]:
edges.shape

torch.Size([256, 2652, 1])

In [72]:
ts.size(1)

10

In [74]:
edges.size(1)

2652

In [75]:
edges.size(2)

1

In [78]:
sizes = [256,10,2652,1]
rel_type = edges.unsqueeze(1).expand(sizes)
print(rel_type.shape)

torch.Size([256, 10, 2652, 1])


In [82]:
ts.shape

torch.Size([256, 10, 52])

In [85]:
x = ts[:, 0::1, :]

In [86]:
x.shape

torch.Size([256, 10, 52])

In [None]:
sizes = [256,10,2652,1]
rel_type = rel_type.unsqueeze(1).expand(sizes)

time_steps = inputs.size(1)
assert (pred_steps <= time_steps)
preds = []
print(time_steps)

# Only take n-th timesteps as starting points (n: pred_steps)
last_pred = inputs[:, 0::pred_steps, :, :]

In [12]:
edges = gumbel_softmax(logits, tau=0.5, hard=False)

  soft_max_1d = F.softmax(trans_input)


In [17]:
prob = my_softmax(logits, -1)

  """


In [95]:
print(edges)

tensor([[[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        ...,

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]],

        [[1.],
         [1.],
         [1.],
         ...,
         [1.],
         [1.],
         [1.]]], grad_fn=<TransposeBackward0>)


In [15]:
a

tensor([[[-0.7154],
         [ 1.5010],
         [ 0.7967],
         ...,
         [-1.4997],
         [-2.1056],
         [-0.8520]],

        [[ 1.7129],
         [ 1.2555],
         [-1.1899],
         ...,
         [-3.4767],
         [-0.0178],
         [-1.4288]],

        [[ 2.9453],
         [ 2.5366],
         [ 2.7629],
         ...,
         [ 0.7023],
         [ 2.2458],
         [-1.6868]],

        ...,

        [[ 2.6564],
         [ 0.3357],
         [ 0.3521],
         ...,
         [ 0.4581],
         [ 0.3857],
         [ 0.5502]],

        [[ 1.0119],
         [-0.1681],
         [ 1.8196],
         ...,
         [-1.8215],
         [-1.1519],
         [-0.9417]],

        [[-0.8506],
         [-0.6669],
         [-0.1034],
         ...,
         [-0.9253],
         [ 2.7951],
         [ 1.9839]]], grad_fn=<ViewBackward0>)

In [16]:
a.shape

torch.Size([256, 2652, 1])

In [15]:
model = torch.load('A.pt')


In [17]:
model

AttributeError: 'Tensor' object has no attribute 'train'