In [None]:
from utils import *
from experiment_utils import *
from preprocessing import preprocess
from windowfy import windowfy
from featurizing import featurize
from tfidf_featurizer import combine_features, tfidf_featurize
from training import train, do_ensemble, do_train
from training_traditional import train_and_evaluate
from eval_erisk import evaluate, ensemble_vote
from IPython.display import display, Markdown
from itertools import product
from numpy.random import seed
import tensorflow
import numpy as np

In [None]:
seed(42)
tensorflow.random.set_seed(42) 
logger("Initialized numpy random and tensorflow random seed at 42")

In [None]:
# params

first_part = {
    "include_feats": [["first_prons", "nssi"],["first_prons","sentiment","nssi"]],
    "feat_window_size": [10], #10
    "max_size": [20],
    "sample_weights_size": [20],
    "oversample": [False],
    "include_new_data": [False],
    "tfidf_max_features": [5000, 50000],
    "scale": [False, True],
    "normalize": [True, False],
    "discretize": [True, False],
    "discretize_size": [50, 75],
    "dis_strategy": ["quantile"]
}

second_part = {
    "eval_window_size": [1],
    "maxlen": [1000],
    "batch_size": [32],
    "epochs": [100],
    "patience": [10],
    "iterations": [5],
    "shuffle": [True, False],
}

models = ["svm", "bayes", "cnn_model", "lstm_model", "lstm_model_32", "lstm_model_16"]
ensemble_combinations = [["svm", "cnn_model", "lstm_model"], ["svm", "cnn_model", "lstm_model_32"],
                         ["svm", "cnn_model", "lstm_model_16"],
                         ["svm", "bayes", "lstm_model"], ["svm", "bayes", "lstm_model_32"],
                         ["svm", "bayes", "lstm_model_16"]]
weights = [[1, 1, 1], [2, 1, 1], [1, 2, 1], [2, 2, 1], [1, 1, 2], [3, 3, 1], [5, 5, 1], [1, 5, 1]]
eval_filename = "experiments_20-nonedata.csv"

## Experiments

In [None]:
experiment = Experiment(models, ensemble_combinations, eval_filename)

firstpart_generator = traverse(first_part)

for i in firstpart_generator:
    try:
        logger("********** CALCULATING FEATURES FOR {} ***********".format(i))
        display(Markdown("#### Calculating features for {}".format(i)))
        
        experiment.prepare_data(i)

        secondpart_generator = traverse(second_part)

        for j in secondpart_generator:
            params = j.copy()
            params.update(i)
            logger("************ STARTING EXPERIMENT {} ***************".format(params))
            display(Markdown("#### Experiment {}".format(params)))
            try:
                experiment.train_and_evaluate_model(params, weights)
                logger("************ FINISHED EXPERIMENT {} ************* \n".format(params))
            except Exception as e:
                logger("*************************************")
                logger("Error during experiment {}: {}".format(params, e))
                logger("*************************************")
        del secondpart_generator
    except Exception as e:
        logger("*************************************")
        logger("General error during experiment {}: {}".format(i, e))
        logger("*************************************")