In [1]:
import chainer
import chainer.functions as F
import chainer.links as L

import pandas as pd
import numpy  as np

import random
import itertools
import pickle
import re

In [2]:
data_path = "../logs/bpic2011.xes"
traces_finalpath = data_path.replace(".xes", "_traces_encoded.pickled")
n_sp2_features = 624
n_pfs_features = 25

In [3]:
traces = pickle.load(open(traces_finalpath, "rb"))

In [4]:
xp = np

In [5]:
# set up chainer on a single GPU for now
device_id = -1
# try:
#     chainer.cuda.check_cuda_available()
#     device_id = 0
#     chainer.cuda.get_device(device_id).use()
#     import cupy
#     print("running on GPU, switching numpy for cupy!")
#     xp = cupy
# except:
#     xp = np

## Dataset and Iterator Setup

In [11]:
# shuffle complete traces and create test and training set
random.shuffle(traces)
sep_idx = int(0.8*len(traces))

train_traces = pd.concat(traces[:sep_idx], ignore_index=True)
test_traces  = pd.concat(traces[sep_idx:], ignore_index=True)
assert(sum([len(t) for t in traces]) == len(train_traces)+len(test_traces))

In [12]:
# extract the feature indices
# data is organized like this: normal features | SP2 features | PFS features | TARGET features
trace_columns = train_traces.columns.tolist()
trace_columns = list(map(lambda e: bool(re.match('^TARGET_.+', e)), trace_columns))
target_col_start_index = trace_columns.index(True)
pfs_col_start_index = target_col_start_index - n_pfs_features
sp2_col_start_index  = pfs_col_start_index - n_sp2_features


train_x = train_traces.iloc[:, :sp2_col_start_index].values.astype(xp.int16)
test_x  =  test_traces.iloc[:, :sp2_col_start_index].values.astype(xp.int16)

# # extract only the last column and put each element into an array of its own
train_y = train_traces.iloc[:, target_col_start_index:].values.astype(xp.int8)
test_y  =  test_traces.iloc[:, target_col_start_index:].values.astype(xp.int8)

In [13]:
train_ds = chainer.datasets.TupleDataset(train_x, train_y)
test_ds  = chainer.datasets.TupleDataset(test_x, test_y)

train_iter = chainer.iterators.SerialIterator(train_ds, 131, repeat=True, shuffle=False)
test_iter  = chainer.iterators.SerialIterator(test_ds,  131, repeat=False, shuffle=False)

## Neural Network Modeling

In [None]:
class HalfLifeModel(Chain):
    def __init__(self, vocab_size, dim_embed=33*3, dim1=400, dim2=400, dim3=200, class_size=None):
        super(HalfLifeModel, self).__init__()
        if class_size is None:
            class_size = vocab_size
        
        # ss = subsequence
        # sq = sequence
        # co = concatenated
        self.sq_embed1    = L.EmbedID(vocab_size, dim_embed)
        self.sq_lstm2     = L.LSTM(dim_embed, dim1, forget_bias_init=0)
        self.sq_lstm3     = L.LSTM(dim1, dim2, forget_bias_init=0)
        
        self.ss_embed1 = L.Linear(vocab_size, dim_embed)
        self.ss_lin2   = L.Linear(dim_embed, dim1)

        self.co_lin1 = L.Linear(dim1+dim2, dim3)
        self.co_lin2 = L.Linear(dim3, class_size)
        
        self.vocab_size = vocab_size
        self.dim_embed  = dim_embed
        self.loss_var = Variable(xp.zeros((), dtype=np.float32))
#         self.reset_state()

    def __call__(self, x, train):
        print(x)
        seq_window = x[0]
        ss_vector  = x[1]
        
        x_uni = x_3gram[:,0]
        y  = Variable(x_uni, volatile = not train)
        y  = self.sq_embed1(y)     
        y2 = self.sq_lstm2(y)
        y2 = self.sq_lstm3(y2)        

        y = Variable(sp2, volatile = not train)
        y = self.ss_embed1(y)
        y = self.ss_lin2(y)
        y3 = F.relu(y)
        
        y = concat.concat((y2,y3) )
        y = self.co_lin1(F.dropout(y, train=train))
        y = F.relu(y)
        y = self.co_lin2(F.dropout(y, train=train)) 
        
        return y

    def reset_state(self):
        if self.loss_var is not None:
            self.loss_var.unchain_backward()
            
        self.loss_var = Variable(xp.zeros((), dtype=xp.float32))
        self.sq_lstm2.reset_state()
        self.sq_lstm3.reset_state()
        return
    
gordon = HalfLifeModel(vocab_size=4, class_size=len(events))
model = L.Classifier(gordon, accfun=F.accuracy)
optimizer = optimizers.MomentumSGD().setup(model)

In [14]:
gx = gy = None
class SeqDataModel(chainer.Chain):
    def __init__(self, vocab_size, dim_embed=200, dim1=400, dim2=400, dim3=200, class_size=666):
        super(SeqDataModel, self).__init__()
        self.class_size = class_size
        self.vocab_size = vocab_size
        self.dim_embed  = dim_embed
        
        # ss = subsequence
        # sq = sequence
        # co = concatenated
        self.sq_embed1    = L.EmbedID(vocab_size, dim_embed)
        self.sq_lstm2     = L.LSTM(dim_embed, dim1, forget_bias_init=0)
        self.sq_lstm3     = L.LSTM(dim1, dim2, forget_bias_init=0)

        self.co_lin1 = L.Linear(dim2, dim3)
        self.co_lin2 = L.Linear(dim3, vocab_size)
        
        self.loss_var = chainer.Variable(xp.zeros((), dtype=np.float32))
        self.reset_state()

    def __call__(self, x, train = True):
        global gx, gy
        gx = x
        y  = self.sq_embed1(x)    
        gy = y
        y  = self.sq_lstm2(y)
        y  = self.sq_lstm3(y)        
        
        y = self.co_lin1(F.dropout(y, train=train))
        y = F.relu(y)
        y = self.co_lin2(F.dropout(y, train=train))
        
        return y

    def reset_state(self):
        if self.loss_var is not None:
            self.loss_var.unchain_backward()
            
        self.loss_var = chainer.Variable(xp.zeros((), dtype=xp.float32))
        self.sq_lstm2.reset_state()
        self.sq_lstm3.reset_state()
        return
    
gordon = SeqDataModel(vocab_size=train_x.shape[0], class_size=train_x.shape[1])
model = L.Classifier(gordon, accfun=F.accuracy)
optimizer = chainer.optimizers.MomentumSGD().setup(model)

## Linking Iterator And Optimizer together

In [15]:
updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device_id)
trainer = chainer.training.Trainer(updater, (1, 'epoch'), out='result')

trainer.extend(chainer.training.extensions.LogReport())
trainer.extend(chainer.training.extensions.PrintReport(
    ['epoch', 'main/loss', 'validation/main/loss',
     'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
trainer.extend(chainer.training.extensions.Evaluator(test_iter, model, device=device_id))

if device_id != -1:
    model.to_gpu()

trainer.run()

Exception in main training loop: 
Invalid operation is performed in: LinearFunction (Forward)

Expect: in_types[0].shape[1] == in_types[1].shape[1]
Actual: 133800 != 200
Traceback (most recent call last):
  File "/home/felix.wolff2/.local/lib/python3.6/site-packages/chainer/training/trainer.py", line 316, in run
    update()
  File "/home/felix.wolff2/.local/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 149, in update
    self.update_core()
  File "/home/felix.wolff2/.local/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 160, in update_core
    optimizer.update(loss_func, *in_arrays)
  File "/home/felix.wolff2/.local/lib/python3.6/site-packages/chainer/optimizer.py", line 650, in update
    loss = lossfun(*args, **kwds)
  File "/home/felix.wolff2/.local/lib/python3.6/site-packages/chainer/links/model/classifier.py", line 134, in __call__
    self.y = self.predictor(*args, **kwargs)
  File "<ipython-input-14-9a3e77c45be

InvalidType: 
Invalid operation is performed in: LinearFunction (Forward)

Expect: in_types[0].shape[1] == in_types[1].shape[1]
Actual: 133800 != 200

In [66]:
gy[0].shape

(669, 99)