Control-Flow-Prediction: Comparison of Models
--

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
from exp.eventlog import *
from exp.dl_utils import *
from exp.control_flow_prediction import *

In [3]:
#export
def _expand_path(fpath): return Path(fpath).expanduser()

class Config():
    "Creates a default config file 'config.yml' in $FASTPM HOME (default `~/.fastpm/`)"
    DEFAULT_CONFIG_LOCATION = os.path.expanduser(os.getenv('FASTPM_HOME', '~/.fastpm'))
    DEFAULT_CONFIG_PATH = DEFAULT_CONFIG_LOCATION + '/config.yml'
    DEFAULT_CONFIG = {
        'data_path': DEFAULT_CONFIG_LOCATION + '/data',
        'model_path': DEFAULT_CONFIG_LOCATION + '/model'
    }
    
    @classmethod
    def get_key(cls, key):
        cf=cls.get()
        "Get the path to `key` in the config file."
        return cf.get(key, cls.DEFAULT_CONFIG.get(key,None))

    @classmethod
    def get_path(cls, path):
        "Get the `path` in the config file."
        return _expand_path(cls.get_key(path))

    @classmethod
    def data_path(cls):
        "Get the path to data in the config file."
        return cls.get_path('data_path')

    @classmethod
    def data_archive_path(cls):
        "Get the path to data archives in the config file."
        return cls.get_path('data_archive_path')
    
    @classmethod
    def get_model(cls, path):
        "Get the 'path' in the config file."
        return _expand_path(cls.get_key(path))
    
    @classmethod
    def model_path(cls):
        "Get the path to model in the config file."
        return cls.get_path('model_path')
    
    @classmethod
    def model_archive_path(cls):
        "Get the path to data archives in the config file."
        return cls.get_path('model_archive_path')

    @classmethod
    def get(cls, fpath=None, create_missing=True):
        "Retrieve the `Config` in `fpath`."
        fpath = _expand_path(fpath or cls.DEFAULT_CONFIG_PATH)
        if not fpath.exists() and create_missing: cls.create(fpath)
        assert fpath.exists(), f'Could not find config at: {fpath}. Please create'
        with open(fpath, 'r') as yaml_file:
            yfile=yaml.safe_load(yaml_file)
            return yfile

    @classmethod
    def create(cls, fpath):
        "Creates a `Config` from `fpath`."
        fpath = _expand_path(fpath)
        assert(fpath.suffix == '.yml')
        if fpath.exists(): return
        fpath.parent.mkdir(parents=True, exist_ok=True)
        with open(fpath, 'w') as yaml_file:
            yaml.dump(cls.DEFAULT_CONFIG, yaml_file, default_flow_style=False)

### Building Basic Model

In [4]:
class BasicModel(nn.Module):
    def __init__(self, n_in, n_out, emb_sz, nh):
        super().__init__()
        self.nh = nh
        self.emb = nn.Embedding(n_in, 7, padding_idx=1)
        self.lin1 = nn.Linear(7, nh)
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(nh, n_out)
        
    def __call__(self, x):
        x = x.long()
        x = self.emb(x)
        x = self.lin1(x)
        x = self.relu(x)
        x = self.lin2(x)
        return x.float()
    
    def reset(self):
        "Reset the hidden states."
        pass

In [5]:
sched = combine_scheds([0.3, 0.7], [sched_cos(0.3, 0.6), sched_cos(0.6, 0.2)]) 
opt_func = partial(Optimizer, steppers=[sgd_step])

### Load and Preprocess Data

In [6]:
# bpi2012 = import_xes(untar_data(URLs.BPIC_2012)).events
# bpi2017 = import_xes(untar_data(URLs.BPIC_2017)).events
# data_sets = [bpi2012, bpi2017]

In [146]:
datasets = [URLs.BPIC_2012, URLs.BPIC_2017]
models = ["awd_lstm"]

processing_params = {'seed': 42, 'bs': 64, 'bptt': 70}
training_params = {'awd_lstm': {'emb_sz': 300, 'nh': 300, 'nl': 2, 'cbs': [partial(AvgStatsCallback, accuracy_flat),
                                                                           CudaCallback, Recorder,
                                                                           partial(GradientClipping, clip=0.1),
                                                                           partial(RNNTrainer, α=2., β=1.),
                                                                           ProgressBarCallback]},
                   'basic': {'nh': 10, 'bs': 64, 'bptt': 70, 'cbfs': [partial(AvgStatsCallback, accuracy_flat), 
                                                                      CudaCallback, Recorder,
                                                                      partial(ParamScheduler, 'lr', sched),
                                                                      ProgressBarCallback]}
                  }
evaluation_params = {'bs': 64, 'bptt': 70}

### Predict Functions for Basic Model

In [147]:
def predict_next_step_basic(learner, test_dl):
    
    #def predict_next_step(model,df):
    #model.eval()
    #model.cpu()
    #preds=[]
    #for e in df.values:
    #    t=torch.stack([tensor(e[c]).float() for c in range(len(list(df)))])
    #    pred=model(t[None])
    #    preds.append(pred[0][-1].tolist())
    #return np.argmax(np.array(preds),axis=1)
    
    iter_dl = iter(test_dl)
    learner.model.cuda()
    learner.model.eval()
    acc4batch = []
    for x, y in progress_bar(iter_dl):
        x, y = x.cuda(), y.cuda()
        print(x.shape, y.shape)
        print(y)
        learner.model.reset()
        print(learner.model(x)[0].shape)
        acc4batch.append((torch.argmax(learner.model(x), dim=1)==y)).float().mean().cpu()
        
    return np.mean(acc4batch)

In [148]:
tensor = torch.tensor([[1., 2., 3., 4.], [5., 6., 7., 8.], [1., 2., 3., 4.]])
#tensor = tensor.new_full((3, 4), 3.2)
print(tensor.shape)
torch.argmax(tensor, dim=1)

torch.Size([3, 4])


tensor([3, 3, 3])

In [149]:
torch.argmax??

### Helper Functions for "run()"

In [150]:
def load_data(dataset):
    return import_xes(untar_data(dataset)).events

def func(t, data):
    return len(data[int(t)][0])

def process_data_set(data_set, processing_params):
    seed = processing_params.get("seed")
    bs = processing_params.get("bs")
    bptt = processing_params.get("bptt")
    
    cfl = ControlFlowList.from_df(data_set)
    #split = SplitData.split_by_func(cfl, partial(random_splitter, p_valid=0.1))
    
    UNK, PAD, BOT, EOT = "xxunk xxpad xxbot xxeot ".split()
    proc_tok = TokenizeProcessor()
    proc_num = NumericalizeProcessor()
    sd = SplitData.split_by_func(cfl, partial(random_splitter, p_valid=0.1))
    ll = label_by_func(sd, lambda x: 0, proc_x = [proc_tok, proc_num])
    vocab = proc_num.vocab
    data = lm_databunchify(ll, bs, bptt)
    
    return data, ll.test.x, vocab

def train_model(model, train, vocab, training_params):
    if model == "awd_lstm":
        parameters = training_params.get("awd_lstm")
        emb_sz = parameters.get("emb_sz")
        nh = parameters.get("nh")
        nl = parameters.get("nl")
        cbs = parameters.get("cbs")
        
        tok_pad = vocab.index(PAD)
        awd_lstm = get_language_model(len(vocab), emb_sz, nh, nl, tok_pad, input_p=0.6, 
                                      output_p=0.4, weight_p=0.5, embed_p=0.1, hidden_p=0.2)
        learner = Learner(awd_lstm, train, cross_entropy_flat, lr=5e-3, cb_funcs=cbs, opt_func=adam_opt())
        learner.fit(20)
        return learner
    
    elif model == "basic":
        parameters = training_params.get("basic")
        nh = parameters.get("nh")
        bs = parameters.get("bs")
        bptt = parameters.get("bptt")
        cbfs = parameters.get("cbfs")
        n_in = bs * bptt
        emb_sz = int(len(vocab) / 2)
        
        basic = BasicModel(n_in, len(vocab), emb_sz, nh)
        learner = Learner(basic, train, cross_entropy_flat, cb_funcs=cbfs, opt_func=opt_func)
        learner.fit(5)
        return learner

    else:
        pass

def measure_next_step_prediction(model, test, evaluation_params):
    bs = evaluation_params.get("bs")
    bptt = evaluation_params.get("bptt")
    
    x, y = process_data_for_next_step_prediction(test)
    pd_data = Dataset(x, y)
    test_sampler = SortSampler(pd_data.x, key=partial(func, data=pd_data))
    
    test_dl = DataLoader(pd_data, batch_size=bs*64, sampler=test_sampler, collate_fn=partial(pad_collate, pad_first=True))
    mean = predict_next_step_basic(model, test_dl)
    return mean

def measure_suffix_prediction(model, test, evaluation_params):
    bs = evaluation_params.get("bs")
    bptt = evaluation_params.get("bptt")
    
    x, y = process_data_for_suffix_prediction(test)
    pd_data = Dataset(x,y)
    test_sampler = SortSampler(pd_data.x, key=partial(func, data=pd_data))
    
    test_dl = DataLoader(pd_data, batch_size=bs*8, sampler=test_sampler, collate_fn=pad_collate_sp)
    mean = predict_suffix(model, test_dl)
    return mean

In [151]:
def run(data_sets=datasets, models=models, processing_params=processing_params,
         training_params=training_params, evaluation_params=evaluation_params):
    
    for dataset in progress_bar(datasets):
        data_set = load_data(dataset)
        train_valid, test, vocab = process_data_set(data_set, processing_params)
        for model in progress_bar(models):
            Learner = train_model(model, train_valid, vocab, training_params)
            res_df1 = measure_next_step_prediction(Learner, test, evaluation_params)
            res_df2 = measure_suffix_prediction(Learner, test, evaluation_params)
            
            print("For {} and {}:\n--> Next-Step-Prediction value = {}\n--> Suffix-Prediction value = {}".format(model, dataset, res_df1, res_df2))

In [152]:
run()

epoch,train_loss,train_accuracy_flat,valid_loss,valid_accuracy_flat,time
0,1.70872,0.502874,0.845181,0.715625,00:01
1,0.715162,0.772358,0.539267,0.82154,00:01
2,0.559081,0.813807,0.484214,0.828571,00:01
3,0.518218,0.824654,0.465942,0.837054,00:01
4,0.467986,0.83458,0.460282,0.837612,00:01
5,0.454022,0.835796,0.445014,0.839918,00:01
6,0.440516,0.838283,0.439826,0.836719,00:01
7,0.412463,0.846142,0.436441,0.841964,00:01
8,0.441041,0.839195,0.439692,0.839546,00:01
9,0.4208,0.841937,0.435276,0.840513,00:01


torch.Size([4096, 169]) torch.Size([4096])
tensor([ 6, 20, 18,  ...,  5,  5, 22], device='cuda:0')
torch.Size([692224, 28])


TypeError: argmax(): argument 'input' (position 1) must be Tensor, not tuple

In [24]:

def func1():
    print('func1')

def func2():
    print('func2')
dictc={'A':func1,'B':func2}

def defaultfunc():
    print('default')
s='...'
dictc.get(s,defaultfunc)()
    

default


In [153]:
#def random_splitter(fn, p_valid, seed): 
#    random.seed(seed)
#    return random.random() < p_valid

class Preprocessing():
    
    def __init__(self, dataset, processing_params):
        self.data_set = import_xes(untar_data(dataset)).events
        self.seed = processing_params.get("seed")
        self.bs = processing_params.get("bs")
        self.bptt = processing_params.get("bptt")
        
    def preprocess_it(self):
        cfl = ControlFlowList.from_df(self.data_set)
        UNK, PAD, BOT, EOT = "xxunk xxpad xxbot xxeot ".split()
        proc_tok = TokenizeProcessor()
        proc_num = NumericalizeProcessor()
        
        sd = SplitData.split_by_func(cfl, partial(random_splitter, p_valid=0.1))
        ll = label_by_func(sd, lambda x: 0, proc_x = [proc_tok, proc_num])
        vocab = proc_num.vocab
        data = lm_databunchify(ll, self.bs, self.bptt)
        return data, ll.test.x, vocab
    

class Basic1():
    def __init__(self, training_params, vocab):
        self.parameters = training_params.get("basic")
        self.nh = self.parameters.get("nh")
        self.n_in = self.parameters.get("bs") * self.parameters.get("bptt")
        self.cbfs = self.parameters.get("cbfs")
        self.vocab = vocab
        self.emb_sz = int(len(vocab) / 2)
    
    def trainbasic(self, train):
        basic = BasicModel(self.n_in, len(self.vocab), self.emb_sz, self.nh)
        learner = Learner(basic, train, cross_entropy_flat, cb_funcs=self.cbfs, opt_func=opt_func)
        learner.fit(5)
        return learner
    
class AwdLstm1():
    def __init__(self, training_params, vocab):
        self.parameters = training_params.get("awd_lstm")
        self.emb_sz = self.parameters.get("emb_sz")
        self.nh = self.parameters.get("nh")
        self.nl = self.parameters.get("nl")
        self.cbs = self.parameters.get("cbs")
        self.vocab = vocab
        self.tok_pad = vocab.index(PAD)

    
    def trainawd(self, train):
        awd_lstm = get_language_model(len(vocab), self.emb_sz, self.nh, self.nl, self.tok_pad, input_p=0.6, 
                                      output_p=0.4, weight_p=0.5, embed_p=0.1, hidden_p=0.2)
        learner = Learner(awd_lstm, train, cross_entropy_flat, lr=5e-3, cb_funcs=self.cbs, opt_func=adam_opt())
        learner.fit(20)
        return learner
    
    
class Basic2():
    def __init__(self, nh, bs, bptt, vocab):
        self.nh = nh
        self.n_in = bs * bptt
        self.vocab = vocab
        self.emb_sz = int(len(vocab) / 2)
        
    def buildbasic(self):
        basic = BasicModel(self.n_in, len(self.vocab), self.emb_sz, self.nh)
        return basic
    
    def trainbasic(self, model, train, training_params):
        parameters = training_params.get("basic")
        cbfs = parameters.get("cbfs")
        learner = Learner(model, train, cross_entropy_flat, cb_funcs=cbfs, opt_func=opt_func)
        learner.fit(5)
        return learner
    
    
class AwdLstm2():
    def __init__(self, emb_sz, nh, nl, vocab):
        self.emb_sz = emb_sz
        self.nh = nh
        self.nl = nl
        self.vocab = vocab
        self.tok_pad = vocab.index(PAD)
        
    def buildawd(self):
        awd_lstm = get_language_model(len(vocab), self.emb_sz, self.nh, self.nl, self.tok_pad, input_p=0.6, 
                                      output_p=0.4, weight_p=0.5, embed_p=0.1, hidden_p=0.2)
        return awd_lstm
    
    def trainawd(self, model, train, training_params):
        parameters = training_params.get("awd_lstm")
        cbs = parameters.get("cbs")
        learner = Learner(model, train, cross_entropy_flat, lr=5e-3, cb_funcs=cbs, opt_func=adam_opt())
        learner.fit(20)
        return learner

    
class ProcessPrediction(Preprocessing, Basic, AwdLstm):
    
    def training(model, train, training_params):
        parameters = training_params.get("awd_lstm")
        cbs = parameters.get("cbs")
        learner = Learner(model, train, cross_entropy_flat, lr=5e-3, cb_funcs=cbs, opt_func=adam_opt())
        learner.fit(20)
        return learner
    
    def suffixPrediction(learner, test, evaluation_params):
        bs = evaluation_params.get("bs")
        bptt = evaluation_params.get("bptt")

        x, y = process_data_for_suffix_prediction(test)
        pd_data = Dataset(x,y)
        test_sampler = SortSampler(pd_data.x, key=partial(func, data=pd_data))

        test_dl = DataLoader(pd_data, batch_size=bs*8, sampler=test_sampler, collate_fn=pad_collate_sp)
        mean = predict_suffix(learner, test_dl)
        return mean
    
    def nextStepPrediction(learner, test, evaluation_params):
        bs = evaluation_params.get("bs")
        bptt = evaluation_params.get("bptt")

        x, y = process_data_for_next_step_prediction(test)
        pd_data = Dataset(x, y)
        test_sampler = SortSampler(pd_data.x, key=partial(func, data=pd_data))

        test_dl = DataLoader(pd_data, batch_size=bs*64, sampler=test_sampler, collate_fn=partial(pad_collate, pad_first=True))
        mean = predict_next_step(learner, test_dl)
        return mean
       
    def testing(learner, test, evaluation_params):
        suffix = suffixPrediction(learner, test, evaluation_params)
        nextstep = nextStepPrediction(learner, test, evaluation_params)
        return suffix, nextstep

In [154]:
data, test, vocab = Preprocessing(URLs.BPIC_2012, processing_params).preprocess_it()

In [155]:
awdLearner1 = AwdLstm1(training_params, vocab).trainawd(data)

epoch,train_loss,train_accuracy_flat,valid_loss,valid_accuracy_flat,time
0,1.668687,0.509598,0.842297,0.740179,00:01
1,0.778453,0.756505,0.55832,0.826339,00:01
2,0.564979,0.817192,0.507524,0.826377,00:01
3,0.511239,0.824704,0.473566,0.835417,00:01
4,0.488048,0.830708,0.458698,0.840365,00:01
5,0.466366,0.834995,0.453076,0.841853,00:01
6,0.436574,0.840926,0.442732,0.838914,00:01
7,0.427685,0.843549,0.439816,0.843378,00:01
8,0.401118,0.849891,0.435787,0.843862,00:01
9,0.408733,0.844675,0.431381,0.845499,00:01


In [156]:
ProcessPrediction.nextStepPrediction(awdLearner1, test, evaluation_params)

0.8412029

In [157]:
basicLearner = Basic1(training_params, vocab).trainbasic(data)

epoch,train_loss,train_accuracy_flat,valid_loss,valid_accuracy_flat,time
0,2.136154,0.441563,1.527118,0.580952,00:00
1,1.354069,0.613962,1.253433,0.634561,00:00
2,1.21335,0.632503,1.184963,0.634561,00:00
3,1.167028,0.634917,1.15763,0.638281,00:00
4,1.145744,0.639117,1.143019,0.641257,00:00


In [158]:
ProcessPrediction.nextStepPrediction(basicLearner, test, evaluation_params)

RuntimeError: shape '[4096, 110, -1]' is invalid for input of size 3080

In [26]:
preprocesser = Preprocessing(URLs.BPIC_2012, processing_params)

In [27]:
data, test, vocab = preprocesser.preprocess_it()

In [71]:
awdlstm = AwdLstm(300, 300, 2, vocab).buildawd()

SequentialRNN(
  (0): AWD_LSTM(
    (emb): Embedding(28, 300, padding_idx=1)
    (emb_dp): EmbeddingDropout(
      (emb): Embedding(28, 300, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): WeightDropout(
        (module): LSTM(300, 300, batch_first=True)
      )
      (1): WeightDropout(
        (module): LSTM(300, 300, batch_first=True)
      )
    )
    (input_dp): RNNDropout()
    (hidden_dps): ModuleList(
      (0): RNNDropout()
      (1): RNNDropout()
    )
  )
  (1): LinearDecoder(
    (output_dp): RNNDropout()
    (decoder): Linear(in_features=300, out_features=28, bias=True)
  )
)

In [78]:
learner = ProcessPrediction.training(awdlstm, data, training_params)

epoch,train_loss,train_accuracy_flat,valid_loss,valid_accuracy_flat,time
0,1.73365,0.501549,0.842395,0.729196,00:01
1,0.753355,0.759794,0.537232,0.825089,00:01
2,0.563382,0.815716,0.472045,0.838527,00:01
3,0.528647,0.821738,0.446587,0.841607,00:01
4,0.470215,0.83427,0.435952,0.843795,00:01
5,0.469708,0.833027,0.434758,0.848571,00:01
6,0.438017,0.838602,0.420035,0.849598,00:01
7,0.454278,0.835195,0.420199,0.849732,00:01
8,0.421732,0.842352,0.419329,0.849196,00:01
9,0.410793,0.845007,0.41593,0.847902,00:01


In [45]:
ProcessPrediction.suffixPrediction(learner, test, evaluation_params)
ProcessPrediction.nextStepPrediction(learner, test, evaluation_params)

0.8467781