### Saved as an example of how to do experiments with this code

In [None]:
from utils import *
from experiment_utils import *
from preprocessing import preprocess
from windowfy import windowfy
from featurizing import featurize
from tfidf_featurizer import combine_features, tfidf_featurize
from training import train, do_ensemble, do_train
from training_traditional import train_and_evaluate
from eval_erisk import evaluate, ensemble_vote
from IPython.display import display, Markdown
from itertools import product
import tensorflow
import numpy as np

In [None]:
# experiment params. this is the part that must be modified for a different experiment

seed_part = np.random.randint(1,101,10).tolist()

first_part = {
    "random_seed": seed_part,
    "include_feats": [["first_prons", "nssi"],["first_prons","sentiment","nssi"]],
    "feat_window_size": [10], #10
    "max_size": [50],
    "sample_weights_size": [50],
    "oversample": [True],
    "sampling_strategy": [0.8, 1, 1.5, 2],
    "include_new_data": [True],
    "tfidf_max_features": [5000],
    "scale": [False],
    "normalize": [True],
    "discretize": [True],
    "discretize_size": [50, 75],
    "dis_strategy": ["quantile"]
}

second_part = {
    "eval_window_size": [1],
    "maxlen": [1000],
    "batch_size": [32],
    "epochs": [100],
    "patience": [10],
    "iterations": [5],
    "shuffle": [True],
}

models = ["svm", "bayes", "cnn_model"]
ensemble_combinations = [["svm", "bayes", "cnn_model"]]
weights = [[1, 1, 1], [2, 1, 1], [1, 2, 1], [2, 2, 1], [1, 1, 2], [3, 3, 1], [5, 5, 1], [1, 5, 1]]
eval_filename = "experiments_50-bothdata-seed.csv"

## Experiments

In [None]:
firstpart_generator = traverse(first_part)

for i in firstpart_generator:
    try:
        # experiment object defined in experiment_utils.py
        experiment = Experiment(models, ensemble_combinations, eval_filename, random_seed=i["random_seed"])
        
        logger("********** CALCULATING FEATURES FOR {} ***********".format(i))
        display(Markdown("#### Calculating features for {}".format(i)))
                
        experiment.prepare_data(i)

        secondpart_generator = traverse(second_part)

        for j in secondpart_generator:
            params = j.copy()
            params.update(i)
            logger("************ STARTING EXPERIMENT {} ***************".format(params))
            display(Markdown("#### Experiment {}".format(params)))
            try:
                experiment.train_and_evaluate_model(params, weights)
                logger("************ FINISHED EXPERIMENT {} ************* \n".format(params))
            except Exception as e:
                logger("*************************************")
                logger("Error during experiment {}: {}".format(params, e))
                logger("*************************************")
        del secondpart_generator
    except Exception as e:
        logger("*************************************")
        logger("General error during experiment {}: {}".format(i, e))
        logger("*************************************")