In [2]:
from utils import *
from experiment_utils import *
from preprocessing import preprocess
from windowfy import windowfy
from featurizing import featurize
from tfidf_featurizer import combine_features, tfidf_featurize
from training import train, do_ensemble, do_train
from training_traditional import train_and_evaluate
from eval_erisk import evaluate, ensemble_vote
from IPython.display import display, Markdown
from itertools import product
from numpy.random import seed
import tensorflow
import numpy as np

In [3]:
seed(42)
tensorflow.random.set_seed(42) 
logger("Initialized numpy random and tensorflow random seed at 42")

Initialized numpy random and tensorflow random seed at 42


In [5]:
# params

first_part = {
    "include_feats": [["first_prons", "nssi"],["first_prons","sentiment","nssi"]],
    "feat_window_size": [10], #10
    "max_size": [20],
    "sample_weights_size": [20],
    "oversample": [False],
    "include_new_data": [False],
    "tfidf_max_features": [5000, 50000],
    "scale": [False, True],
    "normalize": [True, False],
    "discretize": [True, False],
    "discretize_size": [50, 75],
    "dis_strategy": ["quantile"]
}

second_part = {
    "eval_window_size": [1],
    "maxlen": [1000],
    "batch_size": [32],
    "epochs": [100],
    "patience": [10],
    "iterations": [1],
    "shuffle": [True, False],
}

models = ["svm", "bayes", "cnn_model"]
ensemble_combinations = [["svm", "bayes", "cnn_model"]]
weights = [[1, 1, 1], [2, 1, 1], [1, 2, 1], [2, 2, 1], [1, 1, 2], [3, 3, 1], [5, 5, 1], [1, 5, 1]]
eval_filename = "experiments_10-nonedata-test.csv"

## Experiments

In [6]:
experiment = Experiment(models, ensemble_combinations, eval_filename)

firstpart_generator = traverse(first_part)

for i in firstpart_generator:
    try:
        logger("********** CALCULATING FEATURES FOR {} ***********".format(i))
        display(Markdown("#### Calculating features for {}".format(i)))
        
        experiment.prepare_data(i)

        secondpart_generator = traverse(second_part)

        for j in secondpart_generator:
            params = j.copy()
            params.update(i)
            logger("************ STARTING EXPERIMENT {} ***************".format(params))
            display(Markdown("#### Experiment {}".format(params)))
            try:
                experiment.train_and_evaluate_model(params, weights)
                logger("************ FINISHED EXPERIMENT {} ************* \n".format(params))
            except Exception as e:
                logger("*************************************")
                logger("Error during experiment {}: {}".format(params, e))
                logger("*************************************")
        del secondpart_generator
    except Exception as e:
        logger("*************************************")
        logger("General error during experiment {}: {}".format(i, e))
        logger("*************************************")

********** CALCULATING FEATURES FOR {'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 50, 'dis_strategy': 'quantile'} ***********


#### Calculating features for {'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 50, 'dis_strategy': 'quantile'}

PREPARING DATA FOR PARAMS {'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 50, 'dis_strategy': 'quantile'}
Windowfying training users
Windowfying test users

Finished windowfying
Featurizing calculate_feats=True, normalize=True, discretize=True, discretize_size=50, include_feats=['first_prons', 'nssi']
Initialized numpy random and tensorflow random seed at 42
Data size: 3926, 3926
Data size: 4650, 4650
Calculating first prons
Calculating NSSI words
Calculating first prons
Calculating NSSI words
Normalizing features
Discretizing


  'decreasing the number of bins.' % jj)
  'decreasing the number of bins.' % jj)
  "replaced with 0." % jj)
  'decreasing the number of bins.' % jj)
  'decreasing the number of bins.' % jj)


Is the combined the same from tfidf: False
************ STARTING EXPERIMENT {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': True, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 50, 'dis_strategy': 'quantile'} ***************


#### Experiment {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': True, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 50, 'dis_strategy': 'quantile'}

TRAINING AND EVALUATING TRADITIONAL MODEL svm
Starting training traditional
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      3602
           1       0.90      0.20      0.32      1048

    accuracy                           0.81      4650
   macro avg       0.85      0.59      0.61      4650
weighted avg       0.83      0.81      0.76      4650

[[3579   23]
 [ 843  205]]
Evaluating after getting time 248.481100507
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      3602
           1       0.90      0.20      0.32      1048

    accuracy                           0.81      4650
   macro avg       0.85      0.59      0.61      4650
weighted avg       0.83      0.81      0.76      4650

[[3579   23]
 [ 843  205]]
Evaluated with elapsed time 36.46596258599999
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL svm
{'precision': 0.7833333333333333, 'recall': 0.4519230769230769, 'F1': 0.57317

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


{'precision': 0.8461538461538461, 'recall': 0.21153846153846154, 'F1': 0.3384615384615385, 'ERDE_5': 0.24812800344341096, 'ERDE_50': 0.19617837242705216, 'median_latency_tps': 12.5, 'median_penalty_tps': 0.044819781830275796, 'speed': 0.9551802181697242, 'latency_weighted_f1': 0.3232917661497528}
Writing results to CSV file
{'precision': 0.8181818181818182, 'recall': 0.08653846153846154, 'F1': 0.15652173913043477, 'ERDE_5': 0.24701859584703958, 'ERDE_50': 0.2257487606816118, 'median_latency_tps': 15.0, 'median_penalty_tps': 0.054545807509676525, 'speed': 0.9454541924903235, 'latency_weighted_f1': 0.14798413447674627}
Writing results to CSV file
{'precision': 1.0, 'recall': 0.07692307692307693, 'F1': 0.14285714285714288, 'ERDE_5': 0.2458619418069205, 'ERDE_50': 0.22695035460993082, 'median_latency_tps': 16.5, 'median_penalty_tps': 0.06037624654335805, 'speed': 0.939623753456642, 'latency_weighted_f1': 0.1342319647795203}
Writing results to CSV file
EVALUATING ENSEMBLE ['svm', 'bayes', '

#### Experiment {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': False, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 50, 'dis_strategy': 'quantile'}

TRAINING AND EVALUATING TRADITIONAL MODEL svm
Starting training traditional
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      3602
           1       0.90      0.20      0.32      1048

    accuracy                           0.81      4650
   macro avg       0.85      0.59      0.61      4650
weighted avg       0.83      0.81      0.76      4650

[[3579   23]
 [ 843  205]]
Evaluating after getting time 930.182297032
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      3602
           1       0.90      0.20      0.32      1048

    accuracy                           0.81      4650
   macro avg       0.85      0.59      0.61      4650
weighted avg       0.83      0.81      0.76      4650

[[3579   23]
 [ 843  205]]
Evaluated with elapsed time 37.88156583
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL svm
{'precision': 0.7833333333333333, 'recall': 0.4519230769230769, 'F1': 0.57317073170

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Test Score: 1.0830055475234985
Test Accuracy: 0.0
Entered here
              precision    recall  f1-score   support

           0       0.77      1.00      0.87      3602
           1       0.00      0.00      0.00      1048

    accuracy                           0.77      4650
   macro avg       0.39      0.50      0.44      4650
weighted avg       0.60      0.77      0.68      4650

[[3602    0]
 [1048    0]]
Evaluated with elapsed time 64.31685532000006
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL cnn_model
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1'

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


{'precision': 1.0, 'recall': 0.07692307692307693, 'F1': 0.14285714285714288, 'ERDE_5': 0.2458619418069205, 'ERDE_50': 0.22695035460993082, 'median_latency_tps': 16.5, 'median_penalty_tps': 0.06037624654335805, 'speed': 0.939623753456642, 'latency_weighted_f1': 0.1342319647795203}
Writing results to CSV file
EVALUATING ENSEMBLE ['svm', 'bayes', 'cnn_model'] with weights [2, 1, 1]
EVALUATING ENSEMBLE ['svm', 'bayes', 'cnn_model'] WITH WEIGHTS [2, 1, 1] FOR WINDOW SIZES 1, 2 AND 3
{'precision': 0.8461538461538461, 'recall': 0.21153846153846154, 'F1': 0.3384615384615385, 'ERDE_5': 0.24812800344341096, 'ERDE_50': 0.19617837242705216, 'median_latency_tps': 12.5, 'median_penalty_tps': 0.044819781830275796, 'speed': 0.9551802181697242, 'latency_weighted_f1': 0.3232917661497528}
Writing results to CSV file
{'precision': 0.8181818181818182, 'recall': 0.08653846153846154, 'F1': 0.15652173913043477, 'ERDE_5': 0.24701859584703958, 'ERDE_50': 0.2257487606816118, 'median_latency_tps': 15.0, 'median_p

#### Calculating features for {'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 75, 'dis_strategy': 'quantile'}

PREPARING DATA FOR PARAMS {'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 75, 'dis_strategy': 'quantile'}
Windowfying training users
Windowfying test users

Finished windowfying
Featurizing calculate_feats=True, normalize=True, discretize=True, discretize_size=75, include_feats=['first_prons', 'nssi']
Initialized numpy random and tensorflow random seed at 42
Data size: 3926, 3926
Data size: 4650, 4650
Calculating first prons
Calculating NSSI words
Calculating first prons
Calculating NSSI words
Normalizing features
Discretizing


  'decreasing the number of bins.' % jj)
  'decreasing the number of bins.' % jj)
  "replaced with 0." % jj)
  'decreasing the number of bins.' % jj)
  'decreasing the number of bins.' % jj)


Is the combined the same from tfidf: False
************ STARTING EXPERIMENT {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': True, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 75, 'dis_strategy': 'quantile'} ***************


#### Experiment {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': True, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 75, 'dis_strategy': 'quantile'}

TRAINING AND EVALUATING TRADITIONAL MODEL svm
Starting training traditional
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      3602
           1       0.91      0.18      0.30      1048

    accuracy                           0.81      4650
   macro avg       0.86      0.59      0.60      4650
weighted avg       0.83      0.81      0.76      4650

[[3582   20]
 [ 857  191]]
Evaluating after getting time 1472.229396681
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      3602
           1       0.91      0.18      0.30      1048

    accuracy                           0.81      4650
   macro avg       0.86      0.59      0.60      4650
weighted avg       0.83      0.81      0.76      4650

[[3582   20]
 [ 857  191]]
Evaluated with elapsed time 47.77661557500005
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL svm
{'precision': 0.8135593220338984, 'recall': 0.46153846153846156, 'F1': 0.588

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Test Score: 0.5897975564002991
Test Accuracy: 0.0
Entered here
              precision    recall  f1-score   support

           0       0.77      1.00      0.87      3602
           1       0.00      0.00      0.00      1048

    accuracy                           0.77      4650
   macro avg       0.39      0.50      0.44      4650
weighted avg       0.60      0.77      0.68      4650

[[3602    0]
 [1048    0]]
Evaluated with elapsed time 65.08561477300009
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL cnn_model
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1'

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


{'precision': 1.0, 'recall': 0.0673076923076923, 'F1': 0.12612612612612611, 'ERDE_5': 0.2458612388649735, 'ERDE_50': 0.22931442080378478, 'median_latency_tps': 17.0, 'median_penalty_tps': 0.06231913573607972, 'speed': 0.9376808642639203, 'latency_weighted_f1': 0.11826605495220616}
Writing results to CSV file
EVALUATING ENSEMBLE ['svm', 'bayes', 'cnn_model'] with weights [2, 1, 1]
EVALUATING ENSEMBLE ['svm', 'bayes', 'cnn_model'] WITH WEIGHTS [2, 1, 1] FOR WINDOW SIZES 1, 2 AND 3
{'precision': 0.9130434782608695, 'recall': 0.20192307692307693, 'F1': 0.3307086614173228, 'ERDE_5': 0.24698910545911937, 'ERDE_50': 0.19737996635537286, 'median_latency_tps': 14.0, 'median_penalty_tps': 0.05065660333872746, 'speed': 0.9493433966612725, 'latency_weighted_f1': 0.31395608393522395}
Writing results to CSV file
{'precision': 0.8, 'recall': 0.07692307692307693, 'F1': 0.14035087719298248, 'ERDE_5': 0.24702076540872125, 'ERDE_50': 0.2281128268754652, 'median_latency_tps': 15.5, 'median_penalty_tps': 0

#### Experiment {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': False, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': True, 'discretize_size': 75, 'dis_strategy': 'quantile'}

TRAINING AND EVALUATING TRADITIONAL MODEL svm
Starting training traditional
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      3602
           1       0.91      0.18      0.30      1048

    accuracy                           0.81      4650
   macro avg       0.86      0.59      0.60      4650
weighted avg       0.83      0.81      0.76      4650

[[3582   20]
 [ 857  191]]
Evaluating after getting time 2042.135082085
              precision    recall  f1-score   support

           0       0.81      0.99      0.89      3602
           1       0.91      0.18      0.30      1048

    accuracy                           0.81      4650
   macro avg       0.86      0.59      0.60      4650
weighted avg       0.83      0.81      0.76      4650

[[3582   20]
 [ 857  191]]
Evaluated with elapsed time 108.74872146199982
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL svm
{'precision': 0.8135593220338984, 'recall': 0.46153846153846156, 'F1': 0.58

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Test Score: 0.8961288332939148
Test Accuracy: 0.0
Entered here
              precision    recall  f1-score   support

           0       0.77      1.00      0.87      3602
           1       0.00      0.00      0.00      1048

    accuracy                           0.77      4650
   macro avg       0.39      0.50      0.44      4650
weighted avg       0.60      0.77      0.68      4650

[[3602    0]
 [1048    0]]
Evaluated with elapsed time 69.05809290199977
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL cnn_model
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
EVALUATING ENSEMBLE ['svm', 'bayes', 'cnn_model'] with weights [1, 1, 1]
EVALUATING ENSEMBLE ['svm', 'bayes', 'cnn_model'] WITH WEIGHTS [1, 1, 1] FOR WINDOW SIZES 1, 2 AND 3
{'precision': 0.9130434782608695, 'recall': 0.20192307692307693, 'F1': 0.3307086614173228, 'ERDE_5': 0.24698910545911937, 'ERDE_50': 0.19737996635537286, 'median_latency_tps': 14.0, 'median_penalty_tps': 0.05065660333872746, 'speed': 0.9493433966612725, 'latency_weighted_f1': 0.31395608393522395}
Writing results to CSV file
{'precision': 0.8, 'recall': 0.07692307692307693, 'F

{'precision': 0.8181818181818182, 'recall': 0.08653846153846154, 'F1': 0.15652173913043477, 'ERDE_5': 0.2470186116233917, 'ERDE_50': 0.22574876068161176, 'median_latency_tps': 15.0, 'median_penalty_tps': 0.054545807509676525, 'speed': 0.9454541924903235, 'latency_weighted_f1': 0.14798413447674627}
Writing results to CSV file
{'precision': 1.0, 'recall': 0.0673076923076923, 'F1': 0.12612612612612611, 'ERDE_5': 0.2458612388649735, 'ERDE_50': 0.22931442080378478, 'median_latency_tps': 17.0, 'median_penalty_tps': 0.06231913573607972, 'speed': 0.9376808642639203, 'latency_weighted_f1': 0.11826605495220616}
Writing results to CSV file
************ FINISHED EXPERIMENT {'eval_window_size': 3, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': False, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'norm

#### Calculating features for {'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': False, 'discretize_size': 50, 'dis_strategy': 'quantile'}

PREPARING DATA FOR PARAMS {'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': False, 'discretize_size': 50, 'dis_strategy': 'quantile'}
Windowfying training users
Windowfying test users

Finished windowfying
Featurizing calculate_feats=True, normalize=True, discretize=False, discretize_size=50, include_feats=['first_prons', 'nssi']
Initialized numpy random and tensorflow random seed at 42
Data size: 3926, 3926
Data size: 4650, 4650
Calculating first prons
Calculating NSSI words
Calculating first prons
Calculating NSSI words
Normalizing features
Is the combined the same from tfidf: False
************ STARTING EXPERIMENT {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': True, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, '

#### Experiment {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': True, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': False, 'discretize_size': 50, 'dis_strategy': 'quantile'}

TRAINING AND EVALUATING TRADITIONAL MODEL svm
Starting training traditional
              precision    recall  f1-score   support

           0       0.78      0.99      0.87      3602
           1       0.48      0.02      0.04      1048

    accuracy                           0.77      4650
   macro avg       0.63      0.51      0.46      4650
weighted avg       0.71      0.77      0.68      4650

[[3578   24]
 [1026   22]]
Evaluating after getting time 2873.120623878
              precision    recall  f1-score   support

           0       0.78      0.99      0.87      3602
           1       0.48      0.02      0.04      1048

    accuracy                           0.77      4650
   macro avg       0.63      0.51      0.46      4650
weighted avg       0.71      0.77      0.68      4650

[[3578   24]
 [1026   22]]
Evaluated with elapsed time 110.43294261999972
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL svm
{'precision': 0.3333333333333333, 'recall': 0.028846153846153848, 'F1': 0.0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.77      1.00      0.87      3602
           1       0.00      0.00      0.00      1048

    accuracy                           0.77      4650
   macro avg       0.39      0.50      0.44      4650
weighted avg       0.60      0.77      0.68      4650

[[3602    0]
 [1048    0]]
Evaluating after getting time 2989.150851314
              precision    recall  f1-score   support

           0       0.77      1.00      0.87      3602
           1       0.00      0.00      0.00      1048

    accuracy                           0.77      4650
   macro avg       0.39      0.50      0.44      4650
weighted avg       0.60      0.77      0.68      4650

[[3602    0]
 [1048    0]]
Evaluated with elapsed time 4.447577830999762
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL bayes


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
TRAINING AND EVALUATING DL MODEL cnn_model
STARTING ITERATION FOR DL MODEL cnn_model FOR 1 ITERATIONS
Starting training deep model cnn_model
Starting training with model_name=cnn_model and maxlen=1000 and batch size=32
Generating embeddings
Data size: 3926
Training with callback
Restoring model weights from the end of the best

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
EVALUATING ENSEMBLE ['svm', 'bayes', 'cnn_model'] with weights [2, 2, 1]
EVALUATING ENSEMBLE ['svm', 'bayes', 'cnn_model'] WITH WEIGHTS [2, 2, 1] FOR WINDOW SIZES 1, 2 AND 3
{'precision': 0.3333333333333333, 'recall': 0.028846153846153848, 'F1': 0.05309734513274337, 'ERDE_5': 0.24933276462291873, 'ERDE_50': 0.2422581023758027, 'median_latency_tps': 11.0, 'median_penalty_tps': 0.03898023902249159, 'speed': 0.9610197609775084, 'latency_weighted_f1': 0.0510275979280093}
Writing results to CSV file
{'precision': 0.3333333333333333, 'recall': 0.028846153846153848, 'F1': 0.05309734513274337, 'ERDE_5': 0.24934383960137432, 'ERDE_50': 0.2422581023758027, 'median_latency_tps': 12.0, 'median_penalty_tps': 0.042873701496841665, 'speed': 0.9571262985031583, 'latency_weighted_f1

#### Experiment {'eval_window_size': 1, 'maxlen': 1000, 'batch_size': 32, 'epochs': 100, 'patience': 10, 'iterations': 1, 'shuffle': False, 'include_feats': ['first_prons', 'nssi'], 'feat_window_size': 10, 'max_size': 20, 'sample_weights_size': 20, 'oversample': False, 'include_new_data': False, 'tfidf_max_features': 5000, 'scale': False, 'normalize': True, 'discretize': False, 'discretize_size': 50, 'dis_strategy': 'quantile'}

TRAINING AND EVALUATING TRADITIONAL MODEL svm
Starting training traditional
              precision    recall  f1-score   support

           0       0.78      0.99      0.87      3602
           1       0.48      0.02      0.04      1048

    accuracy                           0.77      4650
   macro avg       0.63      0.51      0.46      4650
weighted avg       0.71      0.77      0.68      4650

[[3578   24]
 [1026   22]]
Evaluating after getting time 3877.139746118
              precision    recall  f1-score   support

           0       0.78      0.99      0.87      3602
           1       0.48      0.02      0.04      1048

    accuracy                           0.77      4650
   macro avg       0.63      0.51      0.46      4650
weighted avg       0.71      0.77      0.68      4650

[[3578   24]
 [1026   22]]
Evaluated with elapsed time 110.82039464699983
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL svm
{'precision': 0.3333333333333333, 'recall': 0.028846153846153848, 'F1': 0.0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.77      1.00      0.87      3602
           1       0.00      0.00      0.00      1048

    accuracy                           0.77      4650
   macro avg       0.39      0.50      0.44      4650
weighted avg       0.60      0.77      0.68      4650

[[3602    0]
 [1048    0]]
Evaluating after getting time 3994.326170992
              precision    recall  f1-score   support

           0       0.77      1.00      0.87      3602
           1       0.00      0.00      0.00      1048

    accuracy                           0.77      4650
   macro avg       0.39      0.50      0.44      4650
weighted avg       0.60      0.77      0.68      4650

[[3602    0]
 [1048    0]]
Evaluated with elapsed time 3.0195249960002
EVALUATING FOR WINDOW SIZES 1, 2 AND 3 MODEL bayes


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
{'precision': 0, 'recall': 0, 'F1': 0, 'ERDE_5': 0.2458628841607565, 'ERDE_50': 0.2458628841607565, 'median_latency_tps': nan, 'median_penalty_tps': nan, 'speed': nan, 'latency_weighted_f1': nan}
Writing results to CSV file
TRAINING AND EVALUATING DL MODEL cnn_model
STARTING ITERATION FOR DL MODEL cnn_model FOR 1 ITERATIONS
Starting training deep model cnn_model
Starting training with model_name=cnn_model and maxlen=1000 and batch size=32
Generating embeddings
Data size: 3926
Training with callback


KeyboardInterrupt: 