In [None]:
from utils import *
from preprocessing import preprocess
from windowfy import windowfy
from featurizing import featurize
from tfidf_featurizer import combine_features, tfidf_featurize
from training import train, do_ensemble, do_train
from training_traditional import train_and_evaluate
from eval_erisk import evaluate, ensemble_vote
from IPython.display import display, Markdown
from itertools import product
from numpy.random import seed
import tensorflow
import numpy as np
import pandas as pd

In [None]:
seed(42)
tensorflow.random.set_seed(42) 
logger("Initialized numpy random and tensorflow random seed at 42")

### With max_size = 10 and new data, sample_weights=10, oversample False, include_new_data=True

In [None]:
y_preds = {}

In [None]:
# prepare training data


In [None]:
train_users, y_train, test_users, y_test, train_samples, X_train, X_test = windowfy(window_size=10, max_size=100, sample_weights_size=100, is_oversample=False, include_new_data=True)
feats_train, feats_test = featurize(calculate_feats=True, include_feats=["first_prons","nssi"], 
                       train_users=train_users, test_users=test_users, discretize=True, discretize_size=50, dis_strategy="quantile", normalize=True, scale=False)
tfidf_train, tfidf_test = tfidf_featurize(train_users, test_users, max_features=50000)
feats_train_comb, feats_test_comb = combine_features([tfidf_train, feats_train], [tfidf_test, feats_test])

feats_train_comb = feats_train_comb.toarray()
feats_test_comb = feats_test_comb.toarray()

In [None]:
# traditional classifiers

In [None]:
y_pred, classifier = train_and_evaluate(feats_train_comb, y_train, feats_test_comb, y_test, train_samples, 
                                        classifier_name="svm", strategy="weights")
eval_resul = evaluate(1, 10, {"test":"test"}, y_pred=y_pred, test_users=test_users)
y_preds["svm"] = y_pred

In [None]:
y_pred, classifier = train_and_evaluate(feats_train_comb, y_train, feats_test_comb, y_test, train_samples, 
                                        classifier_name="bayes", strategy="weights")
eval_resul = evaluate(1, 10, {"test":"test"}, y_pred=y_pred, test_users=test_users)
y_preds["bayes"] = y_pred

In [None]:
# deep learning classifiers
batch_size=32
model_name = "cnn_model"
iterations = 3

In [None]:
model_resuls = {}
for i in range(0, iterations):
    y_pred = do_train(model_name=model_name, maxlen=1000, epochs=100, batch_size=batch_size,
                 shuffle=True, patience=30, feats_train=feats_train, feats_test=feats_test, 
                 X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, train_sample_weights=train_samples)
    eval_resul = evaluate(1, 10, {"test":"test"}, y_pred=y_pred, test_users=test_users)
    model_resuls[eval_resul['latency_weighted_f1']] = y_pred

y_preds[model_name] = model_resuls[max(model_resuls.keys())]

In [None]:
# deep learning classifiers
batch_size=32
model_name = "lstm_model_32"
iterations = 1

In [None]:
model_resuls = {}
for i in range(0, iterations):
    y_pred = do_train(model_name=model_name, maxlen=1000, epochs=100, batch_size=batch_size,
                 shuffle=True, patience=100, feats_train=feats_train, feats_test=feats_test, 
                 X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test, train_sample_weights=train_samples)
    eval_resul = evaluate(1, 10, {"test":"test"}, y_pred=y_pred, test_users=test_users)
    model_resuls[eval_resul['latency_weighted_f1']] = y_pred

y_preds[model_name] = model_resuls[max(model_resuls.keys())]

In [None]:
print(y_preds["cnn_model"].flatten())

In [None]:
# ensemble

In [None]:
y_pred = ensemble_vote(np.array([y_preds["bayes"], y_preds["cnn_model"].flatten(), y_preds["lstm_model_32"].flatten()]))
evaluate(1, 10, {"test":"test"}, y_pred=y_pred, test_users=test_users)

In [None]:
y_pred = ensemble_vote(np.array([y_preds["bayes"], y_preds["svm"], y_preds["lstm_model_32"].flatten()]))
evaluate(1, 10, {"test":"test"}, y_pred=y_pred, test_users=test_users)

In [None]:
y_pred = ensemble_vote(np.array([y_preds["bayes"], y_preds["svm"], y_preds["cnn_model"].flatten()]))
evaluate(1, 10, {"test":"test"}, y_pred=y_pred, test_users=test_users)

In [None]:
y_pred = ensemble_vote(np.array([y_preds["svm"], y_preds["cnn_model"], y_preds["lstm_model_32"].flatten()]))
evaluate(1, 10, {"test":"test"}, y_pred=y_pred, test_users=test_users)