In [1]:
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

# Data Encoding

In [2]:
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer

from tqdm import tqdm
import numpy as np
from nltk import word_tokenize
from nltk.corpus import stopwords
stop_words = stopwords.words('english')

In [3]:
with open('../data.nosync/ebert_test_train_data.pkl', 'rb') as f:
    train_data, test_data = pickle.load(f)

In [4]:
train_data, valid_data = train_test_split(train_data, stratify=train_data['stars'], test_size=0.1, random_state=42)

X_train = train_data['review']
y_train = train_data['stars']
X_valid = valid_data['review']
y_valid = valid_data['stars']
X_test = test_data['review']
y_test = test_data['stars']

print(len(X_train), len(y_train), len(X_valid), len(y_valid), len(X_test), len(y_test))

lbl_enc = preprocessing.LabelEncoder()
y_encoded = lbl_enc.fit_transform(list(y_train) + list(y_valid) + list(y_test))
y_train_enc = y_encoded[0:len(y_train)]
y_valid_enc = y_encoded[len(y_train):len(y_train) + len(y_valid)]
y_test_enc = y_encoded[len(y_train) + len(y_valid):len(y_train) + len(y_valid) + len(y_test)]

# Always start with these features. They work (almost) everytime!
tfv = TfidfVectorizer(min_df=3,  max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3), use_idf=1,smooth_idf=1,sublinear_tf=1,
            stop_words = 'english')

# Fitting TF-IDF to both training and test sets (semi-supervised learning)
tfv.fit(list(X_train) + list(X_valid))
xtrain_tfv =  tfv.transform(X_train) 
xvalid_tfv = tfv.transform(X_valid)

# from http://www-nlp.stanford.edu/data/glove.840B.300d.zip

embeddings_index = {}
missed_values = []
with open('../../glove.840B.300d.txt') as f:
    i = 0
    for line in tqdm(f):
        try:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
        except Exception as e:
            missed_values.append(values)
        i += 1
        
print("Found %s words vectors" % len(embeddings_index))

5204 5204 579 579 1928 1928


2196017it [02:13, 16500.74it/s]

Found 2195884 words vectors





In [14]:
def sentence2vector(s):
    words = str(s).lower()
    words = word_tokenize(words)
    words = [w for w in words if w not in stop_words]
    words = [w for w in words if w.isalpha()]
    M = []
    for w in words:
        try:
            M.append(embeddings_index[w])
        except:
            continue
    M = np.array(M)
    v = M.sum(axis=0)
    if type(v) != np.ndarray:
        return np.zeros(300)
    else:
        return v / np.sqrt((v ** 2).sum())

In [18]:
x_train_glove = [sentence2vector(x) for x in X_train]
x_valid_glove = [sentence2vector(x) for x in X_valid]

In [19]:
x_train_glove = np.array(xtrain_glove)
x_valid_glove = np.array(xvalid_glove)

In [5]:
set(train_data['stars']), set(test_data['stars'])

({0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0},
 {0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0})

# Deep Learning

In [42]:
from keras import utils
from keras.models import Sequential
from keras.layers import LSTM, GRU, Dense, Activation, Dropout, Embedding, BatchNormalization, GlobalMaxPooling1D, Conv1D, MaxPooling1D, Flatten, Bidirectional, SpatialDropout1D
from keras.callbacks import EarlyStopping

In [21]:
scl = preprocessing.StandardScaler()
x_train_glove_scl = scl.fit_transform(x_train_glove)
x_valid_glove_scl = scl.fit_transform(y_train_glove)

In [22]:
y_train_enc = utils.to_categorical(y_train)
y_valid_enc = utils.to_categorical(y_valid)

In [29]:
# create a simple 3 layer sequential neural net
model = Sequential()

model.add(Dense(300, input_dim=300, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(300, activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())

model.add(Dense(5))
model.add(Activation('softmax'))

# compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [30]:
model.fit(x_train_glove_scl, y=y_train_enc, batch_size=64, 
          epochs=5, verbose=1, 
          validation_data=(x_valid_glove_scl, y_valid_enc))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7fa2fae7e070>

## Tokenize

In [31]:
from keras.preprocessing import sequence, text

In [34]:
# using keras tokenizer here
token = text.Tokenizer(num_words=None)
max_len = 70

token.fit_on_texts(list(X_train) + list(X_valid))
x_train_seq = token.texts_to_sequences(X_train)
x_valid_seq = token.texts_to_sequences(X_valid)

# zero pad the sequences
x_train_pad = sequence.pad_sequences(x_train_seq, maxlen=max_len)
x_valid_pad = sequence.pad_sequences(x_valid_seq, maxlen=max_len)

word_index = token.word_index

In [35]:
# create an embedding matrix for the words we have in the dataset
embedding_matrix = np.zeros((len(word_index) + 1, 300))
for word, i in tqdm(word_index.items()):
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

100%|██████████████████████████████████| 85752/85752 [00:00<00:00, 86024.37it/s]


In [39]:
# A simple LSTM with glove embeddings and two dense layers
model = Sequential()
model.add(Embedding(len(word_index) + 1,
                     300,
                     weights=[embedding_matrix],
                     input_length=max_len,
                     trainable=False))
model.add(SpatialDropout1D(0.3))
model.add(LSTM(100, dropout=0.3, recurrent_dropout=0.3))

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [40]:
model.fit(
    x_train_pad, 
    y=y_train_enc, 
    batch_size=512, 
    epochs=100, 
    verbose=1, 
    validation_data=(x_valid_pad, y_valid_enc)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7fa300ed20a0>

### Early-Stopping

In [43]:
# A simple LSTM with glove embeddings and two dense layers
model = Sequential()
model.add(Embedding(len(word_index) + 1,
                     300,
                     weights=[embedding_matrix],
                     input_length=max_len,
                     trainable=False))
model.add(SpatialDropout1D(0.3))
model.add(LSTM(300, dropout=0.3, recurrent_dropout=0.3))

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Fit the model with early stopping callback
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=0, mode='auto')
model.fit(x_train_pad, y=y_train_enc, batch_size=512, epochs=100, 
          verbose=1, validation_data=(x_valid_pad, y_valid_enc), callbacks=[earlystop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


<keras.src.callbacks.History at 0x7fa30e2f80d0>

In [46]:
# A simple bidirectional LSTM with glove embeddings and two dense layers
model = Sequential()
model.add(Embedding(len(word_index) + 1,
                     300,
                     weights=[embedding_matrix],
                     input_length=max_len,
                     trainable=False))
model.add(SpatialDropout1D(0.3))
model.add(Bidirectional(LSTM(300, dropout=0.3, recurrent_dropout=0.3)))

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Fit the model with early stopping callback
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=0, mode='auto')
model.fit(x_train_pad, y=y_train_enc, batch_size=512, epochs=100, 
          verbose=1, validation_data=(x_valid_pad, y_valid_enc), callbacks=[earlystop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


<keras.src.callbacks.History at 0x7fa188603be0>

In [48]:
# GRU with glove embeddings and two dense layers
model = Sequential()
model.add(Embedding(len(word_index) + 1,
                     300,
                     weights=[embedding_matrix],
                     input_length=max_len,
                     trainable=False))
model.add(SpatialDropout1D(0.3))
model.add(GRU(300, dropout=0.3, recurrent_dropout=0.3, return_sequences=True))
model.add(GRU(300, dropout=0.3, recurrent_dropout=0.3))

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Fit the model with early stopping callback
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=0, mode='auto')
model.fit(x_train_pad, y=y_train_enc, batch_size=512, epochs=100, 
          verbose=1, validation_data=(x_valid_pad, y_valid_enc), callbacks=[earlystop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100


<keras.src.callbacks.History at 0x7fa1280dd2b0>

# Ensembling

In [49]:
# this is the main ensembling class. how to use it is in the next cell!
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, KFold
import pandas as pd
import os
import sys
import logging

logging.basicConfig(
    level=logging.DEBUG,
    format="[%(asctime)s] %(levelname)s %(message)s",
    datefmt="%H:%M:%S", stream=sys.stdout)
logger = logging.getLogger(__name__)


class Ensembler(object):
    def __init__(self, model_dict, num_folds=3, task_type='classification', optimize=roc_auc_score,
                 lower_is_better=False, save_path=None):
        """
        Ensembler init function
        :param model_dict: model dictionary, see README for its format
        :param num_folds: the number of folds for ensembling
        :param task_type: classification or regression
        :param optimize: the function to optimize for, e.g. AUC, logloss, etc. Must have two arguments y_test and y_pred
        :param lower_is_better: is lower value of optimization function better or higher
        :param save_path: path to which model pickles will be dumped to along with generated predictions, or None
        """

        self.model_dict = model_dict
        self.levels = len(self.model_dict)
        self.num_folds = num_folds
        self.task_type = task_type
        self.optimize = optimize
        self.lower_is_better = lower_is_better
        self.save_path = save_path

        self.training_data = None
        self.test_data = None
        self.y = None
        self.lbl_enc = None
        self.y_enc = None
        self.train_prediction_dict = None
        self.test_prediction_dict = None
        self.num_classes = None

    def fit(self, training_data, y, lentrain):
        """
        :param training_data: training data in tabular format
        :param y: binary, multi-class or regression
        :return: chain of models to be used in prediction
        """

        self.training_data = training_data
        self.y = y

        if self.task_type == 'classification':
            self.num_classes = len(np.unique(self.y))
            logger.info("Found %d classes", self.num_classes)
            self.lbl_enc = LabelEncoder()
            self.y_enc = self.lbl_enc.fit_transform(self.y)
            kf = StratifiedKFold(n_splits=self.num_folds)
            train_prediction_shape = (lentrain, self.num_classes)
        else:
            self.num_classes = -1
            self.y_enc = self.y
            kf = KFold(n_splits=self.num_folds)
            train_prediction_shape = (lentrain, 1)

        self.train_prediction_dict = {}
        for level in range(self.levels):
            self.train_prediction_dict[level] = np.zeros((train_prediction_shape[0],
                                                          train_prediction_shape[1] * len(self.model_dict[level])))

        for level in range(self.levels):

            if level == 0:
                temp_train = self.training_data
            else:
                temp_train = self.train_prediction_dict[level - 1]

            for model_num, model in enumerate(self.model_dict[level]):
                validation_scores = []
                foldnum = 1
                for train_index, valid_index in kf.split(self.train_prediction_dict[0], self.y_enc):
                    logger.info("Training Level %d Fold # %d. Model # %d", level, foldnum, model_num)

                    if level != 0:
                        l_training_data = temp_train[train_index]
                        l_validation_data = temp_train[valid_index]
                        model.fit(l_training_data, self.y_enc[train_index])
                    else:
                        l0_training_data = temp_train[0][model_num]
                        if type(l0_training_data) == list:
                            l_training_data = [x[train_index] for x in l0_training_data]
                            l_validation_data = [x[valid_index] for x in l0_training_data]
                        else:
                            l_training_data = l0_training_data[train_index]
                            l_validation_data = l0_training_data[valid_index]
                        model.fit(l_training_data, self.y_enc[train_index])

                    logger.info("Predicting Level %d. Fold # %d. Model # %d", level, foldnum, model_num)

                    if self.task_type == 'classification':
                        temp_train_predictions = model.predict_proba(l_validation_data)
                        self.train_prediction_dict[level][valid_index,
                        (model_num * self.num_classes):(model_num * self.num_classes) +
                                                       self.num_classes] = temp_train_predictions

                    else:
                        temp_train_predictions = model.predict(l_validation_data)
                        self.train_prediction_dict[level][valid_index, model_num] = temp_train_predictions
                    validation_score = self.optimize(self.y_enc[valid_index], temp_train_predictions)
                    validation_scores.append(validation_score)
                    logger.info("Level %d. Fold # %d. Model # %d. Validation Score = %f", level, foldnum, model_num,
                                validation_score)
                    foldnum += 1
                avg_score = np.mean(validation_scores)
                std_score = np.std(validation_scores)
                logger.info("Level %d. Model # %d. Mean Score = %f. Std Dev = %f", level, model_num,
                            avg_score, std_score)

            logger.info("Saving predictions for level # %d", level)
            train_predictions_df = pd.DataFrame(self.train_prediction_dict[level])
            train_predictions_df.to_csv(os.path.join(self.save_path, "train_predictions_level_" + str(level) + ".csv"),
                                        index=False, header=None)

        return self.train_prediction_dict

    def predict(self, test_data, lentest):
        self.test_data = test_data
        if self.task_type == 'classification':
            test_prediction_shape = (lentest, self.num_classes)
        else:
            test_prediction_shape = (lentest, 1)

        self.test_prediction_dict = {}
        for level in range(self.levels):
            self.test_prediction_dict[level] = np.zeros((test_prediction_shape[0],
                                                         test_prediction_shape[1] * len(self.model_dict[level])))
        self.test_data = test_data
        for level in range(self.levels):
            if level == 0:
                temp_train = self.training_data
                temp_test = self.test_data
            else:
                temp_train = self.train_prediction_dict[level - 1]
                temp_test = self.test_prediction_dict[level - 1]

            for model_num, model in enumerate(self.model_dict[level]):

                logger.info("Training Fulldata Level %d. Model # %d", level, model_num)
                if level == 0:
                    model.fit(temp_train[0][model_num], self.y_enc)
                else:
                    model.fit(temp_train, self.y_enc)

                logger.info("Predicting Test Level %d. Model # %d", level, model_num)

                if self.task_type == 'classification':
                    if level == 0:
                        temp_test_predictions = model.predict_proba(temp_test[0][model_num])
                    else:
                        temp_test_predictions = model.predict_proba(temp_test)
                    self.test_prediction_dict[level][:, (model_num * self.num_classes): (model_num * self.num_classes) +
                                                                                        self.num_classes] = temp_test_predictions

                else:
                    if level == 0:
                        temp_test_predictions = model.predict(temp_test[0][model_num])
                    else:
                        temp_test_predictions = model.predict(temp_test)
                    self.test_prediction_dict[level][:, model_num] = temp_test_predictions

            test_predictions_df = pd.DataFrame(self.test_prediction_dict[level])
            test_predictions_df.to_csv(os.path.join(self.save_path, "test_predictions_level_" + str(level) + ".csv"),
                                       index=False, header=None)

        return self.test_prediction_dict

### Count Vectorizer

In [50]:
from sklearn.feature_extraction.text import CountVectorizer

In [51]:
ctv = CountVectorizer(analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3), stop_words = 'english')

# Fitting Count Vectorizer to both training and test sets (semi-supervised learning)
ctv.fit(list(X_train) + list(X_valid))
xtrain_ctv =  ctv.transform(X_train) 
xvalid_ctv = ctv.transform(X_valid)

## Ensemble Training

In [56]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
import xgboost as xgb
from nep_loss import multiclass_logloss
from sklearn.metrics import accuracy_score

In [None]:
# specify the data to be used for every level of ensembling:
train_data_dict = {0: [xtrain_tfv, xtrain_ctv, xtrain_tfv, xtrain_ctv], 1: [xtrain_glove]}
test_data_dict = {0: [xvalid_tfv, xvalid_ctv, xvalid_tfv, xvalid_ctv], 1: [xvalid_glove]}

model_dict = {0: [LogisticRegression(), LogisticRegression(), MultinomialNB(alpha=0.1), MultinomialNB()],

              1: [xgb.XGBClassifier(silent=True, n_estimators=120, max_depth=7)]}

ens = Ensembler(model_dict=model_dict, num_folds=3, task_type='classification',
                optimize=multiclass_logloss, lower_is_better=True, save_path='')

ens.fit(train_data_dict, y_train, lentrain=x_train_glove.shape[0])
preds = ens.predict(test_data_dict, lentest=x_valid_glove.shape[0])

[16:34:55] INFO Found 9 classes
[16:34:55] INFO Training Level 0 Fold # 1. Model # 0
[16:35:13] INFO Predicting Level 0. Fold # 1. Model # 0
[16:35:14] INFO Level 0. Fold # 1. Model # 0. Validation Score = 1.732681
[16:35:14] INFO Training Level 0 Fold # 2. Model # 0
[16:35:33] INFO Predicting Level 0. Fold # 2. Model # 0
[16:35:33] INFO Level 0. Fold # 2. Model # 0. Validation Score = 1.726837
[16:35:33] INFO Training Level 0 Fold # 3. Model # 0
[16:35:49] INFO Predicting Level 0. Fold # 3. Model # 0
[16:35:49] INFO Level 0. Fold # 3. Model # 0. Validation Score = 1.727804
[16:35:49] INFO Level 0. Model # 0. Mean Score = 1.729107. Std Dev = 0.002558
[16:35:49] INFO Training Level 0 Fold # 1. Model # 1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[16:43:59] INFO Predicting Level 0. Fold # 1. Model # 1
[16:43:59] INFO Level 0. Fold # 1. Model # 1. Validation Score = 1.844908
[16:43:59] INFO Training Level 0 Fold # 2. Model # 1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[16:52:40] INFO Predicting Level 0. Fold # 2. Model # 1
[16:52:40] INFO Level 0. Fold # 2. Model # 1. Validation Score = 1.814251
[16:52:40] INFO Training Level 0 Fold # 3. Model # 1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[17:00:44] INFO Predicting Level 0. Fold # 3. Model # 1
[17:00:45] INFO Level 0. Fold # 3. Model # 1. Validation Score = 1.841146
[17:00:45] INFO Level 0. Model # 1. Mean Score = 1.833435. Std Dev = 0.013652
[17:00:45] INFO Training Level 0 Fold # 1. Model # 2
[17:00:45] INFO Predicting Level 0. Fold # 1. Model # 2
[17:00:45] INFO Level 0. Fold # 1. Model # 2. Validation Score = 2.475651
[17:00:45] INFO Training Level 0 Fold # 2. Model # 2
[17:00:45] INFO Predicting Level 0. Fold # 2. Model # 2
[17:00:45] INFO Level 0. Fold # 2. Model # 2. Validation Score = 2.457662
[17:00:45] INFO Training Level 0 Fold # 3. Model # 2
[17:00:45] INFO Predicting Level 0. Fold # 3. Model # 2
[17:00:45] INFO Level 0. Fold # 3. Model # 2. Validation Score = 2.465671
[17:00:45] INFO Level 0. Model # 2. Mean Score = 2.466328. Std Dev = 0.007359
[17:00:45] INFO Training Level 0 Fold # 1. Model # 3
[17:00:47] INFO Predicting Level 0. Fold # 1. Model # 3
[17:00:47] INFO Level 0. Fold # 1. Model # 3. Validation

Parameters: { "silent" } are not used.



[17:00:56] INFO Predicting Level 1. Fold # 1. Model # 0
[17:00:56] INFO Level 1. Fold # 1. Model # 0. Validation Score = 2.056281
[17:00:56] INFO Training Level 1 Fold # 2. Model # 0


Parameters: { "silent" } are not used.



[17:01:01] INFO Predicting Level 1. Fold # 2. Model # 0
[17:01:01] INFO Level 1. Fold # 2. Model # 0. Validation Score = 1.974962
[17:01:01] INFO Training Level 1 Fold # 3. Model # 0


Parameters: { "silent" } are not used.



[17:01:06] INFO Predicting Level 1. Fold # 3. Model # 0
[17:01:06] INFO Level 1. Fold # 3. Model # 0. Validation Score = 1.891059
[17:01:06] INFO Level 1. Model # 0. Mean Score = 1.974101. Std Dev = 0.067454
[17:01:06] INFO Saving predictions for level # 1
[17:01:06] INFO Training Fulldata Level 0. Model # 0
[17:01:34] INFO Predicting Test Level 0. Model # 0
[17:01:34] INFO Training Fulldata Level 0. Model # 1


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# check error:
multiclass_logloss(y_valid, preds[1])

In [None]:
accuracy_score(y_valid, preds[1])