In [1]:
from utils import *
from preprocessing import preprocess
from windowfy import windowfy
from featurizing import featurize
from training import train, do_ensemble, do_train
from eval_erisk import evaluate
from IPython.display import display, Markdown
from itertools import product
from numpy.random import seed
import tensorflow



In [2]:
seed(42)
tensorflow.random.set_seed(42) 
logger("Initialized numpy random and tensorflow random seed at 42")

Initialized numpy random and tensorflow random seed at 42


In [3]:
def do_experiment(params):
    logger("Starting experiment {}".format(params))
    windowfy(window_size=params["feat_window_size"], is_oversample=params["oversample"])
    featurize(calculate_feats=params["calculate_feats"], normalize=params["normalize"], discretize=params["discretize"],
                      include_feats=params["include_feats"])
    train(cnn_model=params["cnn_model"], maxlen=params["max_len"], batch_size=params["batch_size"])
    evaluate(params["eval_window_size"], params["feat_window_size"], params)
    logger("Finished experiment {}".format(params))

# obtain features and save them so we dont have to calculate them every single time
def do_experiment_first(params):
    windowfy(window_size=params["feat_window_size"], is_oversample=params["oversample"], 
             include_new_data=params["include_new_data"])
    featurize(calculate_feats=True, normalize=False, discretize=False, scale=False,
                      include_feats=params["include_feats"])

# obtain saved features, normalize or discretize them if necessary, and train and evaluate model
def do_experiment_second(params):
    featurize(calculate_feats=False, normalize=params["normalize"], discretize=params["discretize"],
                      scale=params["scale"], include_feats=params["include_feats"])
    do_train(model_name=params["model_name"], maxlen=params["max_len"], batch_size=params["batch_size"],
         shuffle=params["shuffle"])
    evaluate(params["eval_window_size"], params["feat_window_size"], params)
    
# obtain saved features, normalize or discretize them if necessary, and train and evaluate model
def do_experiment_second_old(params):
    featurize(calculate_feats=False, normalize=params["normalize"], discretize=params["discretize"],
                      scale=params["scale"], include_feats=params["include_feats"])
    train(cnn_model=True, maxlen=params["max_len"], batch_size=params["batch_size"],
         shuffle=params["shuffle"])
    evaluate(params["eval_window_size"], params["feat_window_size"], params)
    
    train(cnn_model=False, maxlen=params["max_len"], batch_size=params["batch_size"],
         shuffle=params["shuffle"])
    evaluate(params["eval_window_size"], params["feat_window_size"], params)
    
    do_ensemble(maxlen=params["max_len"], batch_size=params["batch_size"])
    evaluate(params["eval_window_size"], params["feat_window_size"], params)
    
    
def traverse(d):
    K,V = zip(*d.items())
    for v in product(*(v if isinstance(v,list) else traverse(v) for v in V)):
        yield dict(zip(K,v))
        
def plot_metrics(history):
    metrics = ['loss', 'prc', 'precision', 'recall']
    for n, metric in enumerate(metrics):
        name = metric.replace("_"," ").capitalize()
        plt.subplot(2,2,n+1)
        plt.plot(history.epoch, history.history[metric], color=colors[0], label='Train')
        plt.plot(history.epoch, history.history['val_'+metric],
                 color=colors[0], linestyle="--", label='Val')
        plt.xlabel('Epoch')
        plt.ylabel(name)
        if metric == 'loss':
          plt.ylim([0, plt.ylim()[1]])
        elif metric == 'auc':
          plt.ylim([0.8,1])
        else:
          plt.ylim([0,1])

        plt.legend()


## Experiments

In [4]:
first_part = {
    "include_feats": [["first_prons"],["first_prons","sentiment","nssi"]],
    "feat_window_size": [10000, 10], #10
    "oversample": [False, True],
    "include_new_data": [False, True]
}

second_part = {
    "scale": [False],
    "normalize": [False],
    "discretize": [False],
    "eval_window_size": [1],
    "max_len": [1000],
    "batch_size": [2], # 8, 2
    "shuffle": [True, False],
    "model_name": ["cnn_model", "lstm_model", "ensemble_model"]
}

In [5]:
firstpart_generator = traverse(first_part)

for i in firstpart_generator:
    try:
        logger("********** CALCULATING FEATURES FOR {} ***********".format(i))
        display(Markdown("#### Calculating features for {}".format(i)))
        do_experiment_first(i)

        secondpart_generator = traverse(second_part)

        for j in secondpart_generator:
            params = j.copy()
            params.update(i)
            logger("************ STARTING EXPERIMENT {} ***************".format(params))
            display(Markdown("#### Experiment {}".format(params)))
            try:
                do_experiment_second(params)
                logger("************ FINISHED EXPERIMENT {} ************* \n".format(params))
            except Exception as e:
                logger("*************************************")
                logger("Error during experiment {}: {}".format(params, e))
                logger("*************************************")
        del secondpart_generator
    except Exception as e:
        logger("*************************************")
        logger("General error during experiment {}: {}".format(i, e))
        logger("*************************************")

********** CALCULATING FEATURES FOR {'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False} ***********


#### Calculating features for {'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False}

Windowfying training users
Windowfying test users

Finished windowfying
Featurizing calculate_feats=True, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Data size: 340, 340
Data size: 423, 423
Calculating first prons
Calculating first prons
************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False} ***************


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 340
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.19118, saving model to models/cnn_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 0.19118
Epoch 3/100

Epoch 00003: val_prc did not improve from 0.19118
Epoch 4/100

Epoch 00004: val_prc improved from 0.19118 to 0.21018, saving model to models/cnn_model10002True.hdf5
Epoch 5/100

Epoch 00005: val_prc improved from 0.21018 to 0.21152, saving model to models/cnn_model10002True.hdf5
Epoch 6/100

Epoch 00006: val_prc did not improve from 0.21152
Epoch 7/100

Epoch 00007: val_prc improved from 0.21152 to 0.36734, saving model to models/cnn_model10002True.hdf5
Epoch 8/100

Epoch 00008: val_prc did not improve from


Epoch 00016: val_prc did not improve from 0.48813
Epoch 17/100

Epoch 00017: val_prc did not improve from 0.48813
Epoch 18/100

Epoch 00018: val_prc did not improve from 0.48813
Epoch 19/100
Restoring model weights from the end of the best epoch.

Epoch 00019: val_prc did not improve from 0.48813
Epoch 00019: early stopping
Evaluating
Test Score: 11.126032829284668
Test Accuracy: 92.0
Entered here
              precision    recall  f1-score   support

           0       0.89      0.31      0.46       319
           1       0.29      0.88      0.44       104

    accuracy                           0.45       423
   macro avg       0.59      0.60      0.45       423
weighted avg       0.75      0.45      0.46       423

[[ 99 220]
 [ 12  92]]
Finished training and evaluation
{'precision': 0.2948717948717949, 'recall': 0.8846153846153846, 'F1': 0.4423076923076923, 'ERDE_5': 0.3737348333696606, 'ERDE_50': 0.3737348333696606, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 340
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.19118, saving model to models/lstm_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 0.19118
Epoch 3/100

Epoch 00003: val_prc did not improve from 0.19118
Epoch 4/100

Epoch 00004: val_prc did not improve from 0.19118
Epoch 5/100

Epoch 00005: val_prc did not improve from 0.19118
Epoch 6/100

Epoch 00006: val_prc did not improve from 0.19118
Epoch 7/100

Epoch 00007: val_prc did not improve from 0.19118
Epoch 8/100

Epoch 00008: val_prc did not improve from 0.19118
Epoch 9/100

Epoch 00009: val_prc did not improve from 0.19118
Epoch 10/100

Epoch 00010: val_prc did not improve from 0.19118
Epoch 11/100
Resto

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False} ************* 

************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False} ***************


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Evaluating ensemble
Test Score: 0.6869466304779053
Test Accuracy: 0.7541370987892151
Entered here
              precision    recall  f1-score   support

           0       0.75      1.00      0.86       319
           1       0.00      0.00      0.00       104

    accuracy                           0.75       423
   macro avg       0.38      0.50      0.43       423
weighted avg       0.57      0.75      0.65       423

[[319   0]
 [104   0]]
Finished ensemble evaluation


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False} ************* 

************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False} ***************


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 340
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.19118, saving model to models/cnn_model10002False.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 0.19118
Epoch 3/100

Epoch 00003: val_prc did not improve from 0.19118
Epoch 4/100

Epoch 00004: val_prc did not improve from 0.19118
Epoch 5/100

Epoch 00005: val_prc did not improve from 0.19118
Epoch 6/100

Epoch 00006: val_prc improved from 0.19118 to 0.34529, saving model to models/cnn_model10002False.hdf5
Epoch 7/100

Epoch 00007: val_prc did not improve from 0.34529
Epoch 8/100

Epoch 00008: val_prc did not improve from 0.34529
Epoch 9/100

Epoch 00009: val_prc did not improve from 0.34529
Epoch 10/100

Epoch 00010: val

Test Score: 7.294650077819824
Test Accuracy: 85.0
Entered here
              precision    recall  f1-score   support

           0       0.87      0.39      0.54       319
           1       0.31      0.82      0.45       104

    accuracy                           0.50       423
   macro avg       0.59      0.61      0.49       423
weighted avg       0.73      0.50      0.52       423

[[126 193]
 [ 19  85]]
Finished training and evaluation
{'precision': 0.3057553956834532, 'recall': 0.8173076923076923, 'F1': 0.4450261780104712, 'ERDE_5': 0.3580414577849315, 'ERDE_50': 0.3580414577849315, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 340
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.19118, saving model to models/lstm_model10002False.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 0.19118
Epoch 3/100

Epoch 00003: val_prc did not improve from 0.19118
Epoch 4/100

Epoch 00004: val_prc did not improve from 0.19118
Epoch 5/100

Epoch 00005: val_prc did not improve from 0.19118
Epoch 6/100

Epoch 00006: val_prc did not improve from 0.19118
Epoch 7/100

Epoch 00007: val_prc did not improve from 0.19118
Epoch 8/100

Epoch 00008: val_prc did not improve from 0.19118
Epoch 9/100

Epoch 00009: val_prc did not improve from 0.19118
Epoch 10/100

Epoch 00010: val_prc did not improve from 0.19118
Epoch 11/100
Rest

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False} ************* 

************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False} ***************


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Evaluating ensemble
Test Score: 0.9676070213317871
Test Accuracy: 0.7446808218955994
Entered here
              precision    recall  f1-score   support

           0       0.75      0.99      0.85       319
           1       0.00      0.00      0.00       104

    accuracy                           0.74       423
   macro avg       0.38      0.49      0.43       423
weighted avg       0.57      0.74      0.64       423

[[315   4]
 [104   0]]
Finished ensemble evaluation
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.24818782869182748, 'ERDE_50': 0.24818782869182748, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


#### Calculating features for {'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': True}

Windowfying training users
Windowfying test users

Finished windowfying
Featurizing calculate_feats=True, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Data size: 414, 414
Data size: 423, 423
Calculating first prons
Calculating first prons
************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': True} ***************


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 414
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.96229, saving model to models/cnn_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 0.96229
Epoch 3/100

Epoch 00003: val_prc improved from 0.96229 to 0.96727, saving model to models/cnn_model10002True.hdf5
Epoch 4/100

Epoch 00004: val_prc did not improve from 0.96727
Epoch 5/100

Epoch 00005: val_prc did not improve from 0.96727
Epoch 6/100

Epoch 00006: val_prc did not improve from 0.96727
Epoch 7/100

Epoch 00007: val_prc did not improve from 0.96727
Epoch 8/100

Epoch 00008: val_prc did not improve from 0.96727
Epoch 9/100

Epoch 00009: val_prc did not improve from 0.96727
Epoch 10/100

Epoch 00010: val_p

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 414
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.91271, saving model to models/lstm_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc improved from 0.91271 to 0.92151, saving model to models/lstm_model10002True.hdf5
Epoch 3/100

Epoch 00003: val_prc improved from 0.92151 to 0.92398, saving model to models/lstm_model10002True.hdf5
Epoch 4/100

Epoch 00004: val_prc improved from 0.92398 to 0.93256, saving model to models/lstm_model10002True.hdf5
Epoch 5/100

Epoch 00005: val_prc did not improve from 0.93256
Epoch 6/100

Epoch 00006: val_prc improved from 0.93256 to 0.95303, saving model to models/lstm_model10002True.hdf5
Epoch 7/100

Epoch 00007: val_prc did not improve from 0.95


Epoch 00016: val_prc did not improve from 0.95652
Epoch 17/100

Epoch 00017: val_prc did not improve from 0.95652
Epoch 18/100

Epoch 00018: val_prc did not improve from 0.95652
Epoch 19/100

Epoch 00019: val_prc did not improve from 0.95652
Epoch 20/100
Restoring model weights from the end of the best epoch.

Epoch 00020: val_prc did not improve from 0.95652
Epoch 00020: early stopping
Evaluating
Test Score: 8.004263877868652
Test Accuracy: 15.0
Entered here
              precision    recall  f1-score   support

           0       0.75      0.82      0.78       319
           1       0.21      0.14      0.17       104

    accuracy                           0.65       423
   macro avg       0.48      0.48      0.47       423
weighted avg       0.61      0.65      0.63       423

[[261  58]
 [ 89  15]]
Finished training and evaluation
{'precision': 0.2054794520547945, 'recall': 0.14423076923076922, 'F1': 0.1694915254237288, 'ERDE_5': 0.2795745798612858, 'ERDE_50': 0.2795745798612858, 

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Evaluating ensemble
Test Score: 0.818117618560791
Test Accuracy: 0.6453900933265686
Entered here
              precision    recall  f1-score   support

           0       0.80      0.71      0.75       319
           1       0.34      0.46      0.39       104

    accuracy                           0.65       423
   macro avg       0.57      0.58      0.57       423
weighted avg       0.69      0.65      0.66       423

[[225  94]
 [ 56  48]]
Finished ensemble evaluation
{'precision': 0.3380281690140845, 'recall': 0.46153846153846156, 'F1': 0.39024390243902435, 'ERDE_5': 0.3004990806409246, 'ERDE_50': 0.3004990806409246, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': Fa

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 414
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.92398, saving model to models/cnn_model10002False.hdf5
Epoch 2/100

Epoch 00002: val_prc improved from 0.92398 to 0.93629, saving model to models/cnn_model10002False.hdf5
Epoch 3/100

Epoch 00003: val_prc improved from 0.93629 to 0.93805, saving model to models/cnn_model10002False.hdf5
Epoch 4/100

Epoch 00004: val_prc did not improve from 0.93805
Epoch 5/100

Epoch 00005: val_prc improved from 0.93805 to 0.94300, saving model to models/cnn_model10002False.hdf5
Epoch 6/100

Epoch 00006: val_prc improved from 0.94300 to 0.96452, saving model to models/cnn_model10002False.hdf5
Epoch 7/100

Epoch 00007: val_prc did not improve from 0.964

Test Score: 7.289518356323242
Test Accuracy: 85.0
Entered here
              precision    recall  f1-score   support

           0       0.87      0.39      0.54       319
           1       0.31      0.82      0.45       104

    accuracy                           0.50       423
   macro avg       0.59      0.61      0.49       423
weighted avg       0.73      0.50      0.52       423

[[126 193]
 [ 19  85]]
Finished training and evaluation
{'precision': 0.3057553956834532, 'recall': 0.8173076923076923, 'F1': 0.4450261780104712, 'ERDE_5': 0.3580414577849315, 'ERDE_50': 0.3580414577849315, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 414
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.92151, saving model to models/lstm_model10002False.hdf5
Epoch 2/100

Epoch 00002: val_prc improved from 0.92151 to 0.92398, saving model to models/lstm_model10002False.hdf5
Epoch 3/100

Epoch 00003: val_prc improved from 0.92398 to 0.96045, saving model to models/lstm_model10002False.hdf5
Epoch 4/100

Epoch 00004: val_prc did not improve from 0.96045
Epoch 5/100

Epoch 00005: val_prc did not improve from 0.96045
Epoch 6/100

Epoch 00006: val_prc did not improve from 0.96045
Epoch 7/100

Epoch 00007: val_prc did not improve from 0.96045
Epoch 8/100

Epoch 00008: val_prc did not improve from 0.96045
Epoch 9/100

Epoch 00009: val_prc di

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': False, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Evaluating ensemble
Test Score: 1.0000375509262085
Test Accuracy: 0.6122931241989136
Entered here
              precision    recall  f1-score   support

           0       0.78      0.67      0.72       319
           1       0.30      0.42      0.35       104

    accuracy                           0.61       423
   macro avg       0.54      0.55      0.54       423
weighted avg       0.66      0.61      0.63       423

[[215 104]
 [ 60  44]]
Finished ensemble evaluation
{'precision': 0.2972972972972973, 'recall': 0.4230769230769231, 'F1': 0.34920634920634924, 'ERDE_5': 0.3063114419686021, 'ERDE_50': 0.3063114419686021, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': Fa

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Calculating features for {'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': False}

Windowfying training users
Windowfying test users
Oversampling train users
Data size: 448

Finished windowfying
Featurizing calculate_feats=True, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Data size: 448, 448
Data size: 423, 423
Calculating first prons
Calculating first prons
************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': False} ***************


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 448
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 1.00000, saving model to models/cnn_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 1.00000
Epoch 3/100

Epoch 00003: val_prc did not improve from 1.00000
Epoch 4/100

Epoch 00004: val_prc improved from 1.00000 to 1.00000, saving model to models/cnn_model10002True.hdf5
Epoch 5/100

Epoch 00005: val_prc did not improve from 1.00000
Epoch 6/100

Epoch 00006: val_prc did not improve from 1.00000
Epoch 7/100

Epoch 00007: val_prc did not improve from 1.00000
Epoch 8/100

Epoch 00008: val_prc did not improve from 1.00000
Epoch 9/100

Epoch 00009: val_prc did not improve from 1.00000
Epoch 10/100

Epoch 00010: val_p

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 448
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 1.00000, saving model to models/lstm_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 1.00000
Epoch 3/100

Epoch 00003: val_prc did not improve from 1.00000
Epoch 4/100

Epoch 00004: val_prc did not improve from 1.00000
Epoch 5/100

Epoch 00005: val_prc improved from 1.00000 to 1.00000, saving model to models/lstm_model10002True.hdf5
Epoch 6/100

Epoch 00006: val_prc did not improve from 1.00000
Epoch 7/100

Epoch 00007: val_prc did not improve from 1.00000
Epoch 8/100

Epoch 00008: val_prc did not improve from 1.00000
Epoch 9/100

Epoch 00009: val_prc did not improve from 1.00000
Epoch 10/100

Epoch 00010: va

Test Score: 9.044405937194824
Test Accuracy: 10.0
Entered here
              precision    recall  f1-score   support

           0       0.74      0.84      0.79       319
           1       0.17      0.10      0.12       104

    accuracy                           0.66       423
   macro avg       0.45      0.47      0.46       423
weighted avg       0.60      0.66      0.62       423

[[269  50]
 [ 94  10]]
Finished training and evaluation
{'precision': 0.16666666666666666, 'recall': 0.09615384615384616, 'F1': 0.1219512195121951, 'ERDE_5': 0.27492469079914383, 'ERDE_50': 0.27492469079914383, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': Tru

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Evaluating ensemble
Test Score: 1.7506589889526367
Test Accuracy: 0.652482271194458
Entered here
              precision    recall  f1-score   support

           0       0.76      0.78      0.77       319
           1       0.28      0.26      0.27       104

    accuracy                           0.65       423
   macro avg       0.52      0.52      0.52       423
weighted avg       0.64      0.65      0.65       423

[[249  70]
 [ 77  27]]
Finished ensemble evaluation
{'precision': 0.27835051546391754, 'recall': 0.25961538461538464, 'F1': 0.26865671641791045, 'ERDE_5': 0.2865494134544987, 'ERDE_50': 0.2865494134544987, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': F

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 448
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 1.00000, saving model to models/cnn_model10002False.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 1.00000
Epoch 3/100

Epoch 00003: val_prc did not improve from 1.00000
Epoch 4/100

Epoch 00004: val_prc did not improve from 1.00000
Epoch 5/100

Epoch 00005: val_prc did not improve from 1.00000
Epoch 6/100

Epoch 00006: val_prc did not improve from 1.00000
Epoch 7/100

Epoch 00007: val_prc did not improve from 1.00000
Epoch 8/100

Epoch 00008: val_prc improved from 1.00000 to 1.00000, saving model to models/cnn_model10002False.hdf5
Epoch 9/100

Epoch 00009: val_prc did not improve from 1.00000
Epoch 10/100

Epoch 00010: val


Epoch 00017: val_prc did not improve from 1.00000
Epoch 18/100
Restoring model weights from the end of the best epoch.

Epoch 00018: val_prc did not improve from 1.00000
Epoch 00018: early stopping
Evaluating
Test Score: 14.84710693359375
Test Accuracy: 21.0
Entered here
              precision    recall  f1-score   support

           0       0.77      0.86      0.81       319
           1       0.32      0.20      0.25       104

    accuracy                           0.70       423
   macro avg       0.54      0.53      0.53       423
weighted avg       0.66      0.70      0.67       423

[[274  45]
 [ 83  21]]
Finished training and evaluation
{'precision': 0.3181818181818182, 'recall': 0.20192307692307693, 'F1': 0.24705882352941175, 'ERDE_5': 0.27201851013530504, 'ERDE_50': 0.27201851013530504, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize'

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 448
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 1.00000, saving model to models/lstm_model10002False.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 1.00000
Epoch 3/100

Epoch 00003: val_prc did not improve from 1.00000
Epoch 4/100

Epoch 00004: val_prc did not improve from 1.00000
Epoch 5/100

Epoch 00005: val_prc did not improve from 1.00000
Epoch 6/100

Epoch 00006: val_prc did not improve from 1.00000
Epoch 7/100

Epoch 00007: val_prc did not improve from 1.00000
Epoch 8/100

Epoch 00008: val_prc did not improve from 1.00000
Epoch 9/100

Epoch 00009: val_prc did not improve from 1.00000
Epoch 10/100

Epoch 00010: val_prc did not improve from 1.00000
Epoch 11/100

Epo


Epoch 00016: val_prc did not improve from 1.00000
Epoch 17/100

Epoch 00017: val_prc did not improve from 1.00000
Epoch 18/100

Epoch 00018: val_prc did not improve from 1.00000
Epoch 19/100

Epoch 00019: val_prc did not improve from 1.00000
Epoch 20/100

Epoch 00020: val_prc did not improve from 1.00000
Epoch 21/100
Restoring model weights from the end of the best epoch.

Epoch 00021: val_prc did not improve from 1.00000
Epoch 00021: early stopping
Evaluating
Test Score: 15.466588020324707
Test Accuracy: 15.0
Entered here
              precision    recall  f1-score   support

           0       0.74      0.79      0.77       319
           1       0.19      0.14      0.16       104

    accuracy                           0.63       423
   macro avg       0.46      0.47      0.46       423
weighted avg       0.60      0.63      0.62       423

[[253  66]
 [ 89  15]]
Finished training and evaluation
{'precision': 0.18518518518518517, 'recall': 0.14423076923076922, 'F1': 0.1621621621621

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Evaluating ensemble
Test Score: 2.7070302963256836
Test Accuracy: 0.6761229038238525
Entered here
              precision    recall  f1-score   support

           0       0.76      0.84      0.80       319
           1       0.27      0.18      0.22       104

    accuracy                           0.68       423
   macro avg       0.51      0.51      0.51       423
weighted avg       0.64      0.68      0.65       423

[[267  52]
 [ 85  19]]
Finished ensemble evaluation
{'precision': 0.2676056338028169, 'recall': 0.18269230769230768, 'F1': 0.2171428571428571, 'ERDE_5': 0.2760871630646793, 'ERDE_50': 0.2760871630646793, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': Fa

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Calculating features for {'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True}

Windowfying training users
Windowfying test users
Oversampling train users
Data size: 448

Finished windowfying
Featurizing calculate_feats=True, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Data size: 448, 448
Data size: 423, 423
Calculating first prons
Calculating first prons
************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True} ***************


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 448
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 1.00000, saving model to models/cnn_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 1.00000
Epoch 3/100

Epoch 00003: val_prc did not improve from 1.00000
Epoch 4/100

Epoch 00004: val_prc improved from 1.00000 to 1.00000, saving model to models/cnn_model10002True.hdf5
Epoch 5/100

Epoch 00005: val_prc did not improve from 1.00000
Epoch 6/100

Epoch 00006: val_prc did not improve from 1.00000
Epoch 7/100

Epoch 00007: val_prc did not improve from 1.00000
Epoch 8/100

Epoch 00008: val_prc did not improve from 1.00000
Epoch 9/100

Epoch 00009: val_prc did not improve from 1.00000
Epoch 10/100

Epoch 00010: val_p

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 448
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 1.00000, saving model to models/lstm_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 1.00000
Epoch 3/100

Epoch 00003: val_prc did not improve from 1.00000
Epoch 4/100

Epoch 00004: val_prc did not improve from 1.00000
Epoch 5/100

Epoch 00005: val_prc improved from 1.00000 to 1.00000, saving model to models/lstm_model10002True.hdf5
Epoch 6/100

Epoch 00006: val_prc did not improve from 1.00000
Epoch 7/100

Epoch 00007: val_prc did not improve from 1.00000
Epoch 8/100

Epoch 00008: val_prc did not improve from 1.00000
Epoch 9/100

Epoch 00009: val_prc did not improve from 1.00000
Epoch 10/100

Epoch 00010: va

{'precision': 0.15384615384615385, 'recall': 0.019230769230769232, 'F1': 0.03418803418803419, 'ERDE_5': 0.2522564816212017, 'ERDE_50': 0.2522564816212017, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True} ************* 

************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True} ***************


  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Evaluating ensemble
Test Score: 3.011082410812378
Test Accuracy: 0.7234042286872864
Entered here
              precision    recall  f1-score   support

           0       0.75      0.96      0.84       319
           1       0.07      0.01      0.02       104

    accuracy                           0.72       423
   macro avg       0.41      0.48      0.43       423
weighted avg       0.58      0.72      0.64       423

[[305  14]
 [103   1]]
Finished ensemble evaluation
{'precision': 0.06666666666666667, 'recall': 0.009615384615384616, 'F1': 0.01680672268907563, 'ERDE_5': 0.25400019001950497, 'ERDE_50': 0.25400019001950497, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize'

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 448
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 1.00000, saving model to models/cnn_model10002False.hdf5
Epoch 2/100

Epoch 00002: val_prc improved from 1.00000 to 1.00000, saving model to models/cnn_model10002False.hdf5
Epoch 3/100

Epoch 00003: val_prc did not improve from 1.00000
Epoch 4/100

Epoch 00004: val_prc did not improve from 1.00000
Epoch 5/100

Epoch 00005: val_prc did not improve from 1.00000
Epoch 6/100

Epoch 00006: val_prc did not improve from 1.00000
Epoch 7/100

Epoch 00007: val_prc did not improve from 1.00000
Epoch 8/100

Epoch 00008: val_prc did not improve from 1.00000
Epoch 9/100

Epoch 00009: val_prc did not improve from 1.00000
Epoch 10/100

Epoch 00010: val

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True} ************* 

************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True} ***************


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 448
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 1.00000, saving model to models/lstm_model10002False.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 1.00000
Epoch 3/100

Epoch 00003: val_prc did not improve from 1.00000
Epoch 4/100

Epoch 00004: val_prc did not improve from 1.00000
Epoch 5/100

Epoch 00005: val_prc improved from 1.00000 to 1.00000, saving model to models/lstm_model10002False.hdf5
Epoch 6/100

Epoch 00006: val_prc did not improve from 1.00000
Epoch 7/100

Epoch 00007: val_prc did not improve from 1.00000
Epoch 8/100

Epoch 00008: val_prc did not improve from 1.00000
Epoch 9/100

Epoch 00009: val_prc did not improve from 1.00000
Epoch 10/100

Epoch 00010: 

Entered here
              precision    recall  f1-score   support

           0       0.75      0.89      0.81       319
           1       0.20      0.09      0.12       104

    accuracy                           0.69       423
   macro avg       0.47      0.49      0.47       423
weighted avg       0.61      0.69      0.64       423

[[283  36]
 [ 95   9]]
Finished training and evaluation
{'precision': 0.2, 'recall': 0.08653846153846154, 'F1': 0.1208053691275168, 'ERDE_5': 0.26678738494039533, 'ERDE_50': 0.26678738494039533, 'median_latency_tps': 10001.0, 'median_penalty_tps': 1.0, 'speed': 0.0, 'latency_weighted_f1': 0.0}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True} ************* 

************ STARTIN

  erdes5[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 5.0)))
  erdes50[ierdes] = 1.0 - (1.0 / (1.0 + np.exp((r['sequence'] + 1) - 50.0)))


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': False, 'model_name': 'ensemble_model', 'include_feats': ['first_prons'], 'feat_window_size': 10000, 'oversample': True, 'include_new_data': True}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Evaluating ensemble
Test Score: 2.742306709289551
Test Accuracy: 0.7446808218955994
Entered here
              precision    recall  f1-score   support

           0       0.75      0.99      0.85       319
           1       0.00      0.00      0.00       104

    accuracy                           0.74       423
   macro avg       0.38      0.49      0.43       423
weighted avg       0.57      0.74      0.64       423

[[315   4]
 [104   0]]
Finished ensemble evaluation
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.24818782869182748, 'ERDE_50': 0.24818782869182748, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
************ FINISHED EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_l

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


#### Calculating features for {'include_feats': ['first_prons'], 'feat_window_size': 10, 'oversample': False, 'include_new_data': False}

Windowfying training users
Windowfying test users

Finished windowfying
Featurizing calculate_feats=True, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Data size: 167714, 167714
Data size: 99443, 99443
Calculating first prons
Calculating first prons
************ STARTING EXPERIMENT {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10, 'oversample': False, 'include_new_data': False} ***************


#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'cnn_model', 'include_feats': ['first_prons'], 'feat_window_size': 10, 'oversample': False, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=cnn_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 167714
Training
Epoch 1/100

Epoch 00001: val_prc improved from -inf to 0.05981, saving model to models/cnn_model10002True.hdf5
Epoch 2/100

Epoch 00002: val_prc did not improve from 0.05981
Epoch 3/100

Epoch 00003: val_prc did not improve from 0.05981
Epoch 4/100

Epoch 00004: val_prc did not improve from 0.05981
Epoch 5/100

Epoch 00005: val_prc did not improve from 0.05981
Epoch 6/100

Epoch 00006: val_prc did not improve from 0.05981
Epoch 7/100

Epoch 00007: val_prc did not improve from 0.05981
Epoch 8/100

Epoch 00008: val_prc improved from 0.05981 to 0.29414, saving model to models/cnn_model10002True.hdf5
Epoch 9/100

Epoch 00009: val_prc did not improve from 0.29414
Epoch 10/100

Epoch 00010: va


Epoch 00016: val_prc did not improve from 0.29414
Epoch 17/100

Epoch 00017: val_prc did not improve from 0.29414
Epoch 18/100
Restoring model weights from the end of the best epoch.

Epoch 00018: val_prc did not improve from 0.29414
Epoch 00018: early stopping
Evaluating
Test Score: 0.5334315896034241
Test Accuracy: 4762.0
Entered here
              precision    recall  f1-score   support

           0       0.93      0.87      0.90     88584
           1       0.30      0.44      0.36     10859

    accuracy                           0.83     99443
   macro avg       0.61      0.66      0.63     99443
weighted avg       0.86      0.83      0.84     99443

[[77482 11102]
 [ 6097  4762]]
Finished training and evaluation
{'precision': 0.28771929824561404, 'recall': 0.7884615384615384, 'F1': 0.4215938303341903, 'ERDE_5': 0.3635509567564749, 'ERDE_50': 0.18043223769740366, 'median_latency_tps': 11.0, 'median_penalty_tps': 0.03898023902249159, 'speed': 0.9610197609775084, 'latency_weighte

#### Experiment {'scale': False, 'normalize': False, 'discretize': False, 'eval_window_size': 1, 'max_len': 1000, 'batch_size': 2, 'shuffle': True, 'model_name': 'lstm_model', 'include_feats': ['first_prons'], 'feat_window_size': 10, 'oversample': False, 'include_new_data': False}

Featurizing calculate_feats=False, normalize=False, discretize=False, discretize_size=10, include_feats=['first_prons']
Initialized numpy random and tensorflow random seed at 42
Starting training with cnn_model=lstm_model and maxlen=1000 and batch size=2
Generating embeddings
Data size: 167714
Training
Epoch 1/100

KeyboardInterrupt: 