# Phase 4: Fine-tune the pretrained models

In [None]:
########################################
## import packages
########################################
import os
import re
import csv
import codecs
import numpy as np
np.random.seed(1337)

import pandas as pd
import operator
import sys

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

from string import punctuation
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from iwillwin.trainer.supervised_trainer import KerasModelTrainer
from iwillwin.data_utils.data_helpers import DataTransformer, DataLoader, CharDataTransformer
from iwillwin.model.sim_zoos import *
import tensorflow as tf
from iwillwin.config import dataset_config, model_config
import keras.backend as K

from sklearn.metrics import roc_auc_score, log_loss
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import log_loss

In [None]:
NB_WORDS = 100000
EMBEDDING_DIM = 300
MAX_SEQUENCE_LENGTH = 30
OUT_SIZE = 1

## Load and prepare the data

In [None]:
data_transformer = DataTransformer(max_num_words=NB_WORDS, max_sequence_length=MAX_SEQUENCE_LENGTH, char_level=False,
                                   normalization=True, features_processed=True)
trains, tests, labels = data_transformer.prepare_data(dual=False)
print("Number of unique words", len(data_transformer.tokenizer.index_docs))

In [None]:
from keras.utils import to_categorical
from sklearn.utils import class_weight
labels = to_categorical(labels)

## Prepare word embedding

In [None]:
print("Embeddings")
print(os.listdir("../data/wordvec"))

In [None]:
data_loader = DataLoader()
sgns_bigram_embedding = data_loader.load_embedding('../data/wordvec/sgns.merge.bigram')
tencent_ai_embedding = data_loader.load_embedding('../data/wordvec/Tencent_AILab_ChineseEmbedding.txt')

In [None]:
def build_embedding_matrix(embeddings_index, embedding_size, nb_words=NB_WORDS, word_index=data_transformer.tokenizer.word_index,):
    #nb_words = min(nb_words, len(embeddings_index))
    embedding_matrix = np.random.rand(nb_words, embedding_size)
    word_index = data_transformer.tokenizer.word_index
    null_words = open('null-word.txt', 'w', encoding='utf-8')
    null_ctr = 0
    for word, i in word_index.items():
        if i >= nb_words:
            null_words.write(word + '\n')
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
        else:
            print(word)
            null_ctr += 1
            null_words.write(word + '\n')
    print('Null word embeddings: %d' % null_ctr)
    return embedding_matrix

In [None]:
sgns_bigram_matrix = build_embedding_matrix(sgns_bigram_embedding, embedding_size=300)
tencent_ai_matrix = build_embedding_matrix(tencent_ai_embedding, embedding_size=200)

# Trick Features

In [None]:
train_df = pd.read_csv('../data/dataset/train.csv')
test_df = pd.read_csv('../data/dataset/test.csv')

In [None]:
rumor_words_list = ['辟谣', '谣言', '谣传', '传谣', '澄清', '真相', '假新闻', '传言', '造谣', '假消息', '不实', '勿传', '假的', '子虚乌有', '诈骗', '骗局', '以讹传讹']

def is_rumor(text):
    if type(text) != str:
        print(text, type(text))
        return 0
    energy = 0
    for rumor_word in rumor_words_list:
        if rumor_word in text:
            energy = 1
    return energy

def has_split_symbol(text):
    if type(text) != str:
        return 0
    if '|' in text:
        return 1
    return 0

for df in [train_df, test_df]:
    df['has_|'] = df['title2_zh'].apply(has_split_symbol)
    df['has_rumor_words'] = df['title2_zh'].apply(is_rumor)

In [None]:
train_has_rumor = train_df.has_rumor_words.values
test_has_rumor = test_df.has_rumor_words.values

trick_trains_features = np.concatenate((trains[2], train_has_rumor.reshape((-1, 1))), axis=1)
trick_tests_features = np.concatenate((tests[2], test_has_rumor.reshape((-1, 1))), axis=1)

In [None]:
def _build_exact_match_sequences(sent_1, sent_2):
    sent_1_char_set = set(sent_1)
    sent_2_char_set = set(sent_2)
    intersection = sent_1_char_set & sent_2_char_set
    
    sent_1_em = np.zeros_like(sent_1)
    sent_2_em = np.zeros_like(sent_2)

    for i in range(len(sent_1)):
        if sent_1[i] == 0:
            continue
        if sent_1[i] in intersection:
            sent_1_em[i] = 1
    
    for i in range(len(sent_2)):
        if sent_2[i] == 0:
            continue        
        if sent_2[i] in intersection:
            sent_2_em[i] = 1
    
    return sent_1_em, sent_2_em

def build_exact_match_sequences(sents_1, sents_2):
    sents_1_em, sents_2_em = [], []
    for sent_1, sent_2 in zip(sents_1, sents_2):
        sent_1_em, sent_2_em = _build_exact_match_sequences(sent_1, sent_2)
        sents_1_em.append(sent_1_em)
        sents_2_em.append(sent_2_em)
    return np.array(sents_1_em), np.array(sents_2_em)

In [None]:
%%time
trains_1_ems, trains_2_ems = build_exact_match_sequences(trains[0], trains[1])
tests_1_ems, tests_2_ems = build_exact_match_sequences(tests[0], tests[1])

In [None]:
print("Shape of train em", trains_1_ems.shape, trains_2_ems.shape)
print("Shape of test em", tests_1_ems.shape, tests_2_ems.shape)

In [None]:
em_train_features = (trains_1_ems, trains_2_ems)
em_test_features = (tests_1_ems, tests_2_ems)

# Tricks ?

In [None]:
use_tricky = True

if use_tricky:
    trains = (trains[0], trains[1], trick_trains_features)
    tests = (tests[0], tests[1], trick_tests_features)

# Get Ensemble Labels

In [None]:
ensemble_submission = pd.read_csv('../data/ensemble/second_level/FirstLevelPseudoLabels.csv')
pseudo_labels = ensemble_submission[['unrelated', 'agreed', 'disagreed']].values

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import importlib

from sklearn.metrics import roc_auc_score, log_loss
from keras.callbacks import EarlyStopping, ModelCheckpoint

from iwillwin.config import model_config

class ModelTrainer(object):

    def __init__(self, model_stamp, epoch_num, learning_rate=1e-3,
                 shuffle_inputs=False, verbose_round=40, early_stopping_round=8):
        self.models = []
        self.model_stamp = model_stamp
        self.val_loss = -1
        self.auc = -1
        self.epoch_num = epoch_num
        self.learning_rate = learning_rate
        self.eps = 1e-10
        self.verbose_round = verbose_round
        self.early_stopping_round = early_stopping_round
        self.shuffle_inputs = shuffle_inputs

    def train_folds(self, X, y, fold_count, em_train_features, tests, em_test_features, pseudo_labels, batch_size, get_model_func, augments=None, skip_fold=0, patience=10, scale_sample_weight=False,
                    class_weight=None, self_aware=False, swap_input=False):
        X1, X2, features, = X
        em1, em2 = em_train_features
        features = features
        weight_val=scale_sample_weight

        fold_size = len(X1) // fold_count
        models = []
        fold_predictions = []
        score = 0

        for fold_id in range(0, fold_count):
            fold_start = fold_size * fold_id
            fold_end = fold_start + fold_size

            if fold_id == fold_count - 1:
                fold_end = len(X1)

            train_x1 = np.concatenate([X1[:fold_start], X1[fold_end:], tests[0]])
            train_x2 = np.concatenate([X2[:fold_start], X2[fold_end:], tests[1]])
            train_features = np.concatenate([features[:fold_start], features[fold_end:], tests[2]])
            
            train_em_1 = np.concatenate([em1[:fold_start], em1[fold_end:], em_test_features[0]])
            train_em_2 = np.concatenate([em2[:fold_start], em2[fold_end:], em_test_features[1]])
            
            train_y = np.concatenate([y[:fold_start], y[fold_end:], pseudo_labels])
            
            val_x1 = X1[fold_start:fold_end]
            val_x2 = X2[fold_start:fold_end]
            val_features = features[fold_start:fold_end]
            val_em1 = em1[fold_start:fold_end]
            val_em2 = em2[fold_start:fold_end]
            val_y = y[fold_start:fold_end]

            fold_pos = (np.sum(train_y) / len(train_x1))

            train_data = {
                "first_sentences": train_x1,
                "second_sentences": train_x2,
                "mata-features": train_features,
                "first_exact_match": train_em_1,
                "second_exact_match": train_em_2,
            }

            val_data = {
                "first_sentences": val_x1,
                "second_sentences": val_x2,
                "mata-features": val_features,
                "first_exact_match": val_em1,
                "second_exact_match": val_em2,
            }

            model, bst_val_score, fold_prediction = self._train_model_by_logloss(
                get_model_func(), batch_size, train_data, train_y, val_data, val_y, fold_id, patience, class_weight, weight_val=weight_val)
    
            score += bst_val_score
            models.append(model)
            fold_predictions.append(fold_prediction)

        self.models = models
        self.val_loss = score / fold_count
        return models, self.val_loss, fold_predictions

    def _train_model_by_logloss(self, model, batch_size, train_x, train_y, val_x, val_y, fold_id, patience):
        # return a list which holds [models, val_loss, auc, prediction]
        raise NotImplementedError

class KerasModelTrainer(ModelTrainer):

    def __init__(self, *args, **kwargs):
        super(KerasModelTrainer, self).__init__(*args, **kwargs)
        pass

    def _train_model_by_logloss(self, model, batch_size, train_x, train_y, val_x, val_y, fold_id, patience, class_weight, weight_val):
        early_stopping = EarlyStopping(monitor='val_weighted_accuracy', patience=patience)
        bst_model_path = self.model_stamp + "-pseudo-scaled-" + str(fold_id) + '.h5'
        print("Load weights from", bst_model_path)
        model.load_weights(bst_model_path)
        
        bst_model_path = self.model_stamp + "sec-pseudo-scaled-" + str(fold_id) + '.h5'
        val_data =  (val_x, val_y)
        model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)
        hist = model.fit(train_x, train_y,
                         validation_data=val_data,
                         epochs=self.epoch_num, batch_size=batch_size, shuffle=True,
                         class_weight={0: 1/16, 1:1/15, 2:1/5},
                         callbacks=[early_stopping, model_checkpoint],)
        bst_val_score = max(hist.history['val_weighted_accuracy'])
        model.load_weights(bst_model_path)
        predictions = model.predict(val_x)

        return model, bst_val_score, predictions

# Path Setting

In [None]:
oofs_dir = "../data/pseudo/oofs/"
output_dir = "../data/pseudo/output/"
onehot_pred_dir = "../data/pseudo/one_hot_pred/"

In [None]:
def make_predictions():
    # REFACTOR: remove this lazy solution
    test_predicts_list = []
    oofs_predictions = []
    pre_trained_models = []
    
    oofs_path = oofs_dir + model_submit_prefix
    output_path = output_dir + model_submit_prefix
    one_hot_pred_path = onehot_pred_dir + "One-Hot" + model_submit_prefix

    print("Predicting training results...")
    train_predicts = np.concatenate(folds_preds, axis=0)
    oofs = pd.DataFrame({"unrelated": train_predicts[:, 0], "agreed": train_predicts[:, 1], "disagreed": train_predicts[:, 2]})
    score = numpy_weighted_accuracy(labels, oofs[['unrelated', 'agreed', 'disagreed']].values)
    submit_path = oofs_path + "-Train-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    oofs.to_csv(submit_path, index=False)


    print("Predicting testing results...")
    test_predicts_list = []
    for fold_id, model in enumerate(models):
        test_predicts = model.predict({"first_sentences":tests[0],
                                       "second_sentences":tests[1],
                                       "mata-features":tests[2],
                                       "first_exact_match": tests_1_ems,
                                       "second_exact_match": tests_2_ems,
                                      }, batch_size=128, verbose=1)

        test_predicts_list.append(test_predicts)

    test_predicts = np.zeros(test_predicts_list[0].shape)
    for fold_predict in test_predicts_list:
        test_predicts += fold_predict
    test_predicts /= len(test_predicts_list)

    test_predicts = pd.DataFrame({"unrelated": test_predicts[:, 0], "agreed": test_predicts[:, 1], "disagreed": test_predicts[:, 2]})
    submit_path = output_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    test_predicts.to_csv(submit_path, index=False) # 0.3343

    print("Predicting labeled testing results...")
    ids = pd.read_csv("../data/dataset/test.csv")
    pred_labels = test_predicts.idxmax(axis=1)
    sub = pd.DataFrame({"Id": ids['id'].values, "Category": pred_labels})
    submit_path = one_hot_pred_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    sub.to_csv(submit_path, index=False)

# DenseCNN

## TenCent

In [None]:
fold_count = 8
embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 200

models_checkpoints_path = "WordTC-DenseCNN5Layers-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
model_submit_prefix = "PSWordSGNS-DenseCNN5Layers-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

def _agent_get_model():
    return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)

trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
models, score, folds_preds = trainer.train_folds(X=trains, y=labels, augments=None, fold_count=fold_count, batch_size=1024,
    em_train_features=em_train_features, get_model_func=_agent_get_model, patience=20)

print("score", score)
make_predictions()

## SGNS

In [None]:
fold_count = 8
embedding_matrix = sgns_bigram_matrix
EMBEDDING_DIM = 300

models_checkpoints_path = "WordSGNS-DenseCNN5Layers-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
model_submit_prefix = "PSWordSGNS-DenseCNN5Layers-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

def _agent_get_model():
    return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
    return model_func(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)

trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
models, score, folds_preds = trainer.train_folds(X=trains, y=labels, augments=None, fold_count=fold_count, batch_size=1024,
    em_train_features=em_train_features, get_model_func=_agent_get_model, patience=20)

print("score", score)
make_predictions()

# ESIM
## Tencent

In [None]:
fold_count = 8
embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 200

models_checkpoints_path = "WordTC-ESIM-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
model_submit_prefix = "PSWordTC-ESIM-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

def _agent_get_model():
    return get_ESIM(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE, lr=4e-4)

trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
models, score, folds_preds = trainer.train_folds(X=trains, y=labels, augments=None, fold_count=fold_count, batch_size=64,
    em_train_features=em_train_features, get_model_func=_agent_get_model, patience=7)

print("score", score)
make_predictions()

## SGNS

In [None]:
fold_count = 8
embedding_matrix = sgns_bigram_matrix
EMBEDDING_DIM = 300

models_checkpoints_path = "WordSGNS-ESIM-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
model_submit_prefix = "PSWordSGNS-ESIM-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

def _agent_get_model():
    return get_ESIM(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)

trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
models, score, folds_preds = trainer.train_folds(X=trains, y=labels, augments=None, fold_count=fold_count, batch_size=64,
    em_train_features=em_train_features, get_model_func=_agent_get_model, patience=10)

print("score", score)
make_predictions()

# DenseRNN

## TenCent

In [None]:
fold_count = 8
embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 200

models_checkpoints_path = "WordTC-DenseRNN-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
model_submit_prefix = "PSWordTC-DenseRNN-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

def _agent_get_model():
    return get_darnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)

trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
models, score, folds_preds = trainer.train_folds(X=trains, y=labels, augments=None, fold_count=fold_count, batch_size=1024,
    em_train_features=em_train_features, get_model_func=_agent_get_model, patience=20)

print("score", score)
make_predictions()

## SGNS

In [None]:
fold_count = 8
embedding_matrix = sgns_bigram_matrix
EMBEDDING_DIM = 300

models_checkpoints_path = "WordSGNS-DenseRNN-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
model_submit_prefix = "PSWordSGNS-DenseRNN-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

def _agent_get_model():
    return get_darnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)

trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
models, score, folds_preds = trainer.train_folds(X=trains, y=labels, augments=None, fold_count=fold_count, batch_size=1024,
    em_train_features=em_train_features, get_model_func=_agent_get_model, patience=20)

print("score", score)
make_predictions()

# Decomposable attention

In [None]:
class ModelTrainer(object):

    def __init__(self, model_stamp, epoch_num, learning_rate=1e-3,
                 shuffle_inputs=False, verbose_round=40, early_stopping_round=8):
        self.models = []
        self.model_stamp = model_stamp
        self.val_loss = -1
        self.auc = -1
        self.epoch_num = epoch_num
        self.learning_rate = learning_rate
        self.eps = 1e-10
        self.verbose_round = verbose_round
        self.early_stopping_round = early_stopping_round
        self.shuffle_inputs = shuffle_inputs

    def train_folds(self, X, y, fold_count, em_train_features, tests, em_test_features, pseudo_labels, batch_size, get_model_func, augments=None, skip_fold=0, patience=10, scale_sample_weight=False,
                    class_weight=None, self_aware=False, swap_input=False):
        X1, X2, features, = X
        em1, em2 = em_train_features
        features = features
        weight_val=scale_sample_weight

        fold_size = len(X1) // fold_count
        models = []
        fold_predictions = []
        score = 0

        for fold_id in range(0, fold_count):
            fold_start = fold_size * fold_id
            fold_end = fold_start + fold_size

            if fold_id == fold_count - 1:
                fold_end = len(X1)

            train_x1 = np.concatenate([X1[:fold_start], X1[fold_end:], tests[0]])
            train_x2 = np.concatenate([X2[:fold_start], X2[fold_end:], tests[1]])
            train_features = np.concatenate([features[:fold_start], features[fold_end:], tests[2]])
            
            train_em_1 = np.concatenate([em1[:fold_start], em1[fold_end:], em_test_features[0]])
            train_em_2 = np.concatenate([em2[:fold_start], em2[fold_end:], em_test_features[1]])
            
            train_y = np.concatenate([y[:fold_start], y[fold_end:], pseudo_labels])
            
            val_x1 = X1[fold_start:fold_end]
            val_x2 = X2[fold_start:fold_end]
            val_features = features[fold_start:fold_end]
            val_em1 = em1[fold_start:fold_end]
            val_em2 = em2[fold_start:fold_end]
            val_y = y[fold_start:fold_end]

            fold_pos = (np.sum(train_y) / len(train_x1))

            train_data = {
                "first_sentences": train_x1,
                "second_sentences": train_x2,
                "mata-features": train_features,
                "first_exact_match": train_em_1,
                "second_exact_match": train_em_2,
            }

            val_data = {
                "first_sentences": val_x1,
                "second_sentences": val_x2,
                "mata-features": val_features,
                "first_exact_match": val_em1,
                "second_exact_match": val_em2,
            }

            model, bst_val_score, fold_prediction = self._train_model_by_logloss(
                get_model_func(), batch_size, train_data, train_y, val_data, val_y, fold_id, patience, class_weight, weight_val=weight_val)
    
            score += bst_val_score
            models.append(model)
            fold_predictions.append(fold_prediction)

        self.models = models
        self.val_loss = score / fold_count
        return models, self.val_loss, fold_predictions

    def _train_model_by_logloss(self, model, batch_size, train_x, train_y, val_x, val_y, fold_id, patience):
        # return a list which holds [models, val_loss, auc, prediction]
        raise NotImplementedError

class KerasModelTrainer(ModelTrainer):

    def __init__(self, *args, **kwargs):
        super(KerasModelTrainer, self).__init__(*args, **kwargs)
        pass

    def _train_model_by_logloss(self, model, batch_size, train_x, train_y, val_x, val_y, fold_id, patience, class_weight, weight_val):
        early_stopping = EarlyStopping(monitor='val_weighted_accuracy', patience=patience)
        bst_model_path = self.model_stamp + str(fold_id) + '.h5'
        print("Load weights from", bst_model_path)
        model.load_weights(bst_model_path)
        bst_model_path = self.model_stamp + "-pseudo-scaled-" + str(fold_id) + '.h5'
        
        val_data =  (val_x, val_y)
        model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)
        hist = model.fit(train_x, train_y,
                         validation_data=val_data,
                         epochs=self.epoch_num, batch_size=batch_size, shuffle=True,
                         class_weight={0: 1/16, 1:1/15, 2:1/5},
                         callbacks=[early_stopping, model_checkpoint],)
        bst_val_score = max(hist.history['val_weighted_accuracy'])
        model.load_weights(bst_model_path)
        predictions = model.predict(val_x)

        return model, bst_val_score, predictions

## Tencent

In [None]:
fold_count = 8
embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 200

models_checkpoints_path = "WordTC-DAttn-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
model_submit_prefix = "PSWordTC-DAttn-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

def _agent_get_model():
    return get_decomposable_attention(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)

trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
models, score, folds_preds = trainer.train_folds(X=trains, y=labels, augments=None, fold_count=fold_count, batch_size=256,
    em_train_features=em_train_features, get_model_func=_agent_get_model, patience=20)

print("score", score)
make_predictions()

## SGNS

In [None]:
fold_count = 8
embedding_matrix = sgns_bigram_matrix
EMBEDDING_DIM = 300

models_checkpoints_path = "WordSGNS-DAttn-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
model_submit_prefix = "PSWordSGNS-DAttn-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

def _agent_get_model():
    return get_decomposable_attention(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)

trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
models, score, folds_preds = trainer.train_folds(X=trains, y=labels, augments=None, fold_count=fold_count, batch_size=256,
    em_train_features=em_train_features, get_model_func=_agent_get_model, patience=20)

print("score", score)
make_predictions()