In [1]:
import gc
import numpy as np
import tensorflow as tf
import yaml
import joblib
import random
import xgboost
from pprint import pprint
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier

from tensorflow.python.keras import backend as K
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras import optimizers
from keras import regularizers
from keras.layers import Bidirectional
from keras.layers import Convolution1D
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Embedding
from keras.layers import GRU
from keras.layers import GlobalMaxPooling1D
from keras.layers import MaxPooling1D
from keras.layers import GlobalAveragePooling1D
from keras.layers import AveragePooling1D
from keras.layers import Input
from keras.layers import SpatialDropout1D
from keras.layers import BatchNormalization
from keras.models import Model

In [2]:
# load constants and config
config_path = '../config/20news.yaml'
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)
# end with
pprint('=' * 20 + 'Configs' + '=' * 20)


LB, L, T, TEXT_EMBEDDING_MATRIX = None, None, None, None
LR = 0.001 # learning rate
random.seed(config['seed'])
np.random.seed(config['seed'])
tf.random.set_seed(config['seed'])
LB = joblib.load(config['binarizer_out'])
LE = joblib.load(config['encoder_out'])
L = joblib.load(config['labeled_train_out'])
T = joblib.load(config['test_out'])
random.shuffle(L)



In [3]:
def build_model(
    text_input_shape,
    output_shape,
    dropout_rate=0,
    kernel_regularizer=0,
    activity_regularizer=0,
    bias_regularizer=0,
    **kwargs):

    text_input = Input(text_input_shape, name='text_input')
    
    text = Dense(
        512,
        activation='relu',
        kernel_regularizer=regularizers.l2(kernel_regularizer),
        bias_regularizer=regularizers.l2(bias_regularizer))(text_input)  # down size the learnt representation
    # text = BatchNormalization()(text)
    text = Dropout(dropout_rate)(text)
    # x = Dense(
    #     64,
    #     activation='relu',
    #     kernel_regularizer=regularizers.l2(kernel_regularizer),
    #     bias_regularizer=regularizers.l2(bias_regularizer))(text)  # down size the learnt representation
    # x = BatchNormalization()(x)
    # x = Dropout(dropout_rate)(x)
    # x = Dense(
    #     64,
    #     activation='relu',
    #     kernel_regularizer=regularizers.l2(kernel_regularizer),
    #     bias_regularizer=regularizers.l2(bias_regularizer))(x)  # down size the learnt representation
    # x = Dropout(dropout_rate)(x)

    output = Dense(output_shape, activation='softmax', name='output')(text)

    model = Model(inputs=[text_input], outputs=[output])
    model.compile(
        optimizer=optimizers.Adam(LR, decay=1e-6),
        loss='categorical_crossentropy',
        metrics=['categorical_accuracy'])

    return model
# end def

In [4]:
def build_conv_model(
    text_input_shape,
    output_shape,
    dropout_rate=0,
    kernel_regularizer=0,
    activity_regularizer=0,
    bias_regularizer=0,
    **kwargs):

    text_input = Input(text_input_shape, name='text_input')
    text = Convolution1D(256, 3, activation="relu")(text_input)
    # text = MaxPooling1D()(text)
    # text = Convolution1D(512, 3, activation="relu")(text)
    text = GlobalMaxPooling1D()(text)

    # x = Dense(
    #     64,
    #     activation='relu',
    #     kernel_regularizer=regularizers.l2(kernel_regularizer),
    #     bias_regularizer=regularizers.l2(bias_regularizer))(text)  # down size the learnt representation
    # # x = BatchNormalization()(x)
    # x = Dropout(dropout_rate)(x)
    # x = Dense(
    #     64,
    #     activation='relu',
    #     kernel_regularizer=regularizers.l2(kernel_regularizer),
    #     bias_regularizer=regularizers.l2(bias_regularizer))(x)  # down size the learnt representation
    # # x = BatchNormalization()(x)
    # x = Dropout(dropout_rate)(x)

    output = Dense(output_shape, activation='softmax', name='output')(text)

    model = Model(inputs=[text_input], outputs=[output])
    model.compile(
        optimizer=optimizers.Adam(LR, decay=1e-6),
        loss='categorical_crossentropy',
        metrics=['categorical_accuracy'])

    return model
# end def

In [5]:
def train_svm(rep):
    print('='*20 + rep + 'svm' + '='*20, flush=True)
    global LE, LB, U, L, U_prime, n, k, u, config
    train_train = np.array([np.array(item[rep]) for item in L])
    y_train = np.array([np.array(item['cat_en']) for item in L])

    # Use L1 to train a classifier h1 that considers only the use representation of doc
    h1 = LinearSVC(random_state=0, tol=1e-5)
    h1.fit(train_train, y_train)

    # define the third combined classifier from h1 and h2,
    # and test the performance of h1 and h2 on test set
    print('=' * 50 + 'Predicting on Test Set....' + '=' * 50, flush=True)

    T_test = np.array([np.array(item[rep]) for item in T])
    pprint(T[0]['pooledbert'].shape)
    pprint(T[0]['seqbert'].shape)
    pprint(T[0]['doc2vec'].shape)
    h1_y_pred = h1.predict(T_test)
    del T_test
    gc.collect()

    h1_y_pred = LE.inverse_transform(h1_y_pred)

    y_true = np.array([item['cat_bin'] for item in T])
    y_true = LB.inverse_transform(y_true)

    print(accuracy_score(y_true, h1_y_pred), flush=True)

    print(classification_report(y_true, h1_y_pred), flush=True)
# end def

In [6]:
def train_mlp(rep):
    print('='*20 + rep + 'mlp' + '='*20, flush=True)
    global LB, L, T, TEXT_EMBEDDING_MATRIX, config
    T_test = np.array([np.array(item[rep]).flatten() for item in T])

    train = np.array([np.array(item[rep]).flatten() for item in L])
    y_train = np.array([np.array(item['cat_bin']) for item in L])

    h1 = build_model(
        text_input_shape=train.shape[1:],
        output_shape=LB.classes_.shape[0],
        **config[rep + '_kwargs'])
    print(h1.summary(), flush=True)

    h1_train_input_dict = dict(
        name=rep,
        model=h1,
        X_text_train=train,
        y_train=y_train,
        class_weight=None,
        batch_size=config['normal_batch_size'],
        epochs=config['normal_' + rep + '_epochs'],
        validation_split=config['normal_val_split'])

    h1 = train_nn(**h1_train_input_dict)

    test_input_dict = dict(
        model=h1,
        X_text_test=T_test,
        batch_size=config['normal_batch_size'])
    h1_y_pred = test_nn(**test_input_dict)
    del T_test
    gc.collect()

    # test the performance of h1 on test set
    print('=' * 50 + 'Predicting on Test Set....' + '=' * 50, flush=True)

    h1_y_pred = np.array([[1 if j >= max(y) else 0 for j in y] for y in h1_y_pred])
    h1_y_pred = LB.inverse_transform(h1_y_pred)

    y_true = np.array([item['cat_bin'] for item in T])
    y_true = LB.inverse_transform(y_true)

    print(accuracy_score(y_true, h1_y_pred), flush=True)

    print(classification_report(y_true, h1_y_pred), flush=True)
#end def

In [7]:
def train_rfc(rep):
    pprint('='*20 + rep + 'rfc' + '='*20)
    global LE, L, config
    train = np.array([np.array(item[rep]).flatten() for item in L])
    y_train = np.array([np.array(item['cat_en']) for item in L])

    rfc_param = dict(
        n_jobs=8,
        criterion='entropy',
        max_depth=40,
        n_estimators=100,
        max_features='auto',
        random_state=config['seed'],
        min_impurity_decrease=0.001,
        )

    h1 = RandomForestClassifier(**rfc_param)
    h1.fit(train, y_train)

    # define the third combined classifier from h1 and h2,
    # and test the performance of h1 and h2 on test set
    print('=' * 50 + 'Predicting on Test Set....' + '=' * 50)
    T_test = np.array([np.array(item[rep]).flatten() for item in T])
    h1_y_pred = h1.predict_proba(T_test)
    del T_test
    gc.collect()

    h1_y_pred = np.array([y.argmax() for y in h1_y_pred])
    h1_y_pred = LE.inverse_transform(h1_y_pred)

    y_true = np.array([item['cat_en'] for item in T])
    y_true = LE.inverse_transform(y_true)

    print(accuracy_score(y_true, h1_y_pred), flush=True)

    print(classification_report(y_true, h1_y_pred), flush=True)
# end def

In [8]:
def train_xgb(rep):
    pprint('='*20 + rep + 'xgb' + '='*20)
    global LE, L, config
    params_xgb = {
        'eta': 0.01,
        'max_depth': 15,
        'subsample': 0.9,
        'colsample_bytree': 0.75,
        'min_child_weight': 3,
        'objective': 'multi:softprob',
        'num_class': LE.classes_.shape[0],
        'eval_metric': 'mlogloss',
        'seed': config['seed'],
        'silent': 1,
    }

    train = np.array([np.array(item[rep]).flatten() for item in L])
    y_train = np.array([np.array(item['cat_en']) for item in L])

    train = xgboost.DMatrix(train, label=y_train)
    h1 = xgboost.train(params_xgb, train, 30)

    # define the third combined classifier from h1 and h2,
    # and test the performance of h1 and h2 on test set
    print('=' * 50 + 'Predicting on Test Set....' + '=' * 50)
    T_test = np.array([np.array(item[rep]).flatten() for item in T])
    T_test = xgboost.DMatrix(T_test)
    h1_y_pred = h1.predict(T_test)
    del T_test
    gc.collect()

    h1_y_pred = np.array([y.argmax() for y in h1_y_pred])
    h1_y_pred = LE.inverse_transform(h1_y_pred)

    y_true = np.array([item['cat_en'] for item in T])
    y_true = LE.inverse_transform(y_true)

    print(accuracy_score(y_true, h1_y_pred), flush=True)

    print(classification_report(y_true, h1_y_pred), flush=True)
# end def

In [9]:
def train_conv(rep):
    print('='*20 + rep + 'conv' + '='*20, flush=True)
    global LB, L, T, TEXT_EMBEDDING_MATRIX, config
    T_test = np.array([np.asarray(item[rep]) for item in T])

    train = np.array([np.asarray(item[rep]) for item in L])

    y_train = np.array([np.array(item['cat_bin']) for item in L])

    h1 = build_conv_model(
        text_input_shape=train.shape[1:],
        output_shape=LB.classes_.shape[0],
        **config[rep + '_kwargs'])
    print(h1.summary(), flush=True)

    h1_train_input_dict = dict(
        name=rep,
        model=h1,
        X_text_train=train,
        y_train=y_train,
        class_weight=None,
        batch_size=config['normal_batch_size'],
        epochs=config['normal_' + rep + '_epochs'],
        validation_split=config['normal_val_split'])

    h1 = train_nn(**h1_train_input_dict)

    test_input_dict = dict(
        model=h1,
        X_text_test=T_test,
        batch_size=config['normal_batch_size'])
    h1_y_pred = test_nn(**test_input_dict)
    del T_test
    gc.collect()

    # test the performance of h1 on test set
    print('=' * 50 + 'Predicting on Test Set....' + '=' * 50)

    h1_y_pred = np.array([[1 if j >= max(y) else 0 for j in y] for y in h1_y_pred])
    h1_y_pred = LB.inverse_transform(h1_y_pred)

    y_true = np.array([item['cat_bin'] for item in T])
    y_true = LB.inverse_transform(y_true)

    print(accuracy_score(y_true, h1_y_pred), flush=True)

    print(classification_report(y_true, h1_y_pred), flush=True)
#end def

In [14]:
# support function for training nn-related algorithms
def train_nn(
    name,
    model,
    X_text_train,
    y_train,
    batch_size=128,
    epochs=32,
    validation_split=0.1,
    **kwargs):
    # define early stopping callback
    callbacks_list = []
    early_stopping = dict(monitor='val_loss',
                            patience=3,
                            min_delta=0.001, 
                            verbose=0,
                            restore_best_weights=True)
    model_checkpoint = dict(filepath='../models/20news/' + name + '_{val_loss:.5f}_{epoch:04d}.weights.h5',
                            save_best_only=True,
                            save_weights_only=True,
                            mode='auto',
                            period=1,
                            verbose=0)

    earlystop = EarlyStopping(**early_stopping)
    callbacks_list.append(earlystop)

    checkpoint = ModelCheckpoint(**model_checkpoint)
    callbacks_list.append(checkpoint)

    x_dict = dict(text_input=X_text_train)
    y_dict = dict(output=y_train)
    model.fit(x_dict, y_dict,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=callbacks_list,
        validation_split=validation_split,
        verbose=0)
    return model
# end def

In [15]:
def test_nn(
    model,
    X_text_test,
    batch_size=128,
    **kwargs):

    x_dict = dict(text_input=X_text_test)
    return model.predict(x_dict, batch_size=batch_size)
# end def

In [15]:
train_svm('doc2vec')

0.4318906001062135
                          precision    recall  f1-score   support

             alt.atheism       0.26      0.32      0.29       319
           comp.graphics       0.33      0.40      0.36       389
 comp.os.ms-windows.misc       0.30      0.31      0.30       394
comp.sys.ibm.pc.hardware       0.34      0.36      0.35       392
   comp.sys.mac.hardware       0.29      0.25      0.26       385
          comp.windows.x       0.44      0.42      0.43       395
            misc.forsale       0.55      0.57      0.56       390
               rec.autos       0.53      0.47      0.50       396
         rec.motorcycles       0.53      0.54      0.53       398
      rec.sport.baseball       0.39      0.62      0.48       397
        rec.sport.hockey       0.62      0.64      0.63       399
               sci.crypt       0.71      0.51      0.59       396
         sci.electronics       0.34      0.30      0.32       393
                 sci.med       0.66      0.59      0.62 

In [35]:
train_mlp('doc2vec')

Model: "functional_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_input (InputLayer)      [(None, 300)]             0         
_________________________________________________________________
dense_11 (Dense)             (None, 512)               154112    
_________________________________________________________________
dropout_11 (Dropout)         (None, 512)               0         
_________________________________________________________________
output (Dense)               (None, 20)                10260     
Total params: 164,372
Trainable params: 164,372
Non-trainable params: 0
_________________________________________________________________
None
0.5047796070100903
                          precision    recall  f1-score   support

             alt.atheism       0.23      0.53      0.32       319
           comp.graphics       0.39      0.45      0.42       389
 comp.os.ms-windows.mi

In [37]:
train_rfc('doc2vec')

0.34572490706319703
                          precision    recall  f1-score   support

             alt.atheism       0.17      0.19      0.18       319
           comp.graphics       0.22      0.23      0.22       389
 comp.os.ms-windows.misc       0.21      0.30      0.25       394
comp.sys.ibm.pc.hardware       0.26      0.37      0.30       392
   comp.sys.mac.hardware       0.20      0.13      0.16       385
          comp.windows.x       0.34      0.35      0.35       395
            misc.forsale       0.43      0.53      0.47       390
               rec.autos       0.38      0.45      0.41       396
         rec.motorcycles       0.30      0.35      0.32       398
      rec.sport.baseball       0.43      0.45      0.44       397
        rec.sport.hockey       0.41      0.58      0.48       399
               sci.crypt       0.61      0.40      0.49       396
         sci.electronics       0.31      0.24      0.27       393
                 sci.med       0.56      0.57      0.56

In [40]:
train_xgb('doc2vec')

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


0.3056293149229952
                          precision    recall  f1-score   support

             alt.atheism       0.24      0.19      0.21       319
           comp.graphics       0.22      0.26      0.24       389
 comp.os.ms-windows.misc       0.22      0.25      0.24       394
comp.sys.ibm.pc.hardware       0.32      0.25      0.28       392
   comp.sys.mac.hardware       0.24      0.18      0.21       385
          comp.windows.x       0.28      0.21      0.24       395
            misc.forsale       0.38      0.48      0.42       390
               rec.autos       0.33      0.37      0.35       396
         rec.motorcycles       0.24      0.28      0.25       398
      rec.sport.baseball       0.36   

In [43]:
train_svm('tfidf')

0.6578597981943707
                          precision    recall  f1-score   support

             alt.atheism       0.55      0.46      0.50       319
           comp.graphics       0.53      0.51      0.52       389
 comp.os.ms-windows.misc       0.59      0.62      0.61       394
comp.sys.ibm.pc.hardware       0.52      0.48      0.50       392
   comp.sys.mac.hardware       0.68      0.65      0.66       385
          comp.windows.x       0.66      0.67      0.67       395
            misc.forsale       0.68      0.83      0.75       390
               rec.autos       0.67      0.70      0.68       396
         rec.motorcycles       0.69      0.66      0.67       398
      rec.sport.baseball       0.74      0.77      0.75       397
        rec.sport.hockey       0.81      0.85      0.83       399
               sci.crypt       0.83      0.82      0.83       396
         sci.electronics       0.50      0.47      0.48       393
                 sci.med       0.72      0.64      0.68 

In [44]:
train_mlp('tfidf')

Model: "functional_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_input (InputLayer)      [(None, 10000)]           0         
_________________________________________________________________
dense_12 (Dense)             (None, 512)               5120512   
_________________________________________________________________
dropout_12 (Dropout)         (None, 512)               0         
_________________________________________________________________
output (Dense)               (None, 20)                10260     
Total params: 5,130,772
Trainable params: 5,130,772
Non-trainable params: 0
_________________________________________________________________
None
0.6517525225703664
                          precision    recall  f1-score   support

             alt.atheism       0.66      0.42      0.51       319
           comp.graphics       0.61      0.48      0.54       389
 comp.os.ms-window

In [45]:
train_rfc('tfidf')

0.44848645778013807
                          precision    recall  f1-score   support

             alt.atheism       0.45      0.26      0.33       319
           comp.graphics       0.31      0.24      0.27       389
 comp.os.ms-windows.misc       0.41      0.61      0.49       394
comp.sys.ibm.pc.hardware       0.31      0.33      0.32       392
   comp.sys.mac.hardware       0.39      0.32      0.35       385
          comp.windows.x       0.37      0.36      0.37       395
            misc.forsale       0.58      0.77      0.66       390
               rec.autos       0.38      0.48      0.43       396
         rec.motorcycles       0.23      0.49      0.31       398
      rec.sport.baseball       0.49      0.47      0.48       397
        rec.sport.hockey       0.64      0.65      0.64       399
               sci.crypt       0.69      0.68      0.69       396
         sci.electronics       0.28      0.18      0.21       393
                 sci.med       0.28      0.19      0.23

In [46]:
train_xgb('tfidf')

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


0.2661975570897504
                          precision    recall  f1-score   support

             alt.atheism       0.13      0.11      0.12       319
           comp.graphics       0.15      0.16      0.16       389
 comp.os.ms-windows.misc       0.52      0.53      0.52       394
comp.sys.ibm.pc.hardware       0.21      0.22      0.22       392
   comp.sys.mac.hardware       0.09      0.19      0.12       385
          comp.windows.x       0.21      0.21      0.21       395
            misc.forsale       0.64      0.63      0.63       390
               rec.autos       0.32      0.34      0.33       396
         rec.motorcycles       0.18      0.10      0.13       398
      rec.sport.baseball       0.18   

In [11]:
train_svm('use')

0.676048858204992
                          precision    recall  f1-score   support

             alt.atheism       0.51      0.45      0.47       319
           comp.graphics       0.52      0.52      0.52       389
 comp.os.ms-windows.misc       0.57      0.54      0.56       394
comp.sys.ibm.pc.hardware       0.48      0.46      0.47       392
   comp.sys.mac.hardware       0.52      0.52      0.52       385
          comp.windows.x       0.61      0.64      0.62       395
            misc.forsale       0.73      0.86      0.79       390
               rec.autos       0.80      0.82      0.81       396
         rec.motorcycles       0.77      0.75      0.76       398
      rec.sport.baseball       0.89      0.87      0.88       397
        rec.sport.hockey       0.88      0.91      0.90       399
               sci.crypt       0.71      0.69      0.70       396
         sci.electronics       0.61      0.50      0.55       393
                 sci.med       0.80      0.85      0.82  

In [10]:
train_mlp('use')

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_input (InputLayer)      [(None, 512)]             0         
_________________________________________________________________
dense (Dense)                (None, 512)               262656    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
output (Dense)               (None, 20)                10260     
Total params: 272,916
Trainable params: 272,916
Non-trainable params: 0
_________________________________________________________________
None
0.6881306425916092
                          precision    recall  f1-score   support

             alt.atheism       0.45      0.59      0.51       319
           comp.graphics       0.56      0.57      0.56       389
 comp.os.ms-windows.mis

In [13]:
train_rfc('use')

0.641927774827403
                          precision    recall  f1-score   support

             alt.atheism       0.52      0.36      0.43       319
           comp.graphics       0.50      0.43      0.46       389
 comp.os.ms-windows.misc       0.52      0.63      0.57       394
comp.sys.ibm.pc.hardware       0.48      0.48      0.48       392
   comp.sys.mac.hardware       0.46      0.38      0.42       385
          comp.windows.x       0.57      0.61      0.59       395
            misc.forsale       0.69      0.66      0.67       390
               rec.autos       0.74      0.85      0.79       396
         rec.motorcycles       0.67      0.71      0.69       398
      rec.sport.baseball       0.87      0.86      0.86       397
        rec.sport.hockey       0.89      0.88      0.88       399
               sci.crypt       0.71      0.67      0.69       396
         sci.electronics       0.58      0.50      0.54       393
                 sci.med       0.72      0.85      0.78  

In [14]:
train_xgb('use')

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


0.5823154540626659
                          precision    recall  f1-score   support

             alt.atheism       0.45      0.41      0.43       319
           comp.graphics       0.44      0.39      0.41       389
 comp.os.ms-windows.misc       0.54      0.47      0.50       394
comp.sys.ibm.pc.hardware       0.42      0.44      0.43       392
   comp.sys.mac.hardware       0.39      0.29      0.34       385
          comp.windows.x       0.52      0.61      0.56       395
            misc.forsale       0.50      0.70      0.58       390
               rec.autos       0.76      0.74      0.75       396
         rec.motorcycles       0.63      0.62      0.62       398
      rec.sport.baseball       0.82   

In [41]:
train_svm('pooledbert')

(768,)
(16, 768)
(300,)
0.40626659585767394




                          precision    recall  f1-score   support

             alt.atheism       0.43      0.42      0.43       319
           comp.graphics       0.25      0.24      0.24       389
 comp.os.ms-windows.misc       0.27      0.23      0.25       394
comp.sys.ibm.pc.hardware       0.23      0.24      0.23       392
   comp.sys.mac.hardware       0.25      0.18      0.21       385
          comp.windows.x       0.34      0.39      0.36       395
            misc.forsale       0.51      0.67      0.58       390
               rec.autos       0.47      0.52      0.50       396
         rec.motorcycles       0.39      0.32      0.35       398
      rec.sport.baseball       0.45      0.43      0.44       397
        rec.sport.hockey       0.53      0.54      0.54       399
               sci.crypt       0.47      0.41      0.44       396
         sci.electronics       0.25      0.21      0.23       393
                 sci.med       0.51      0.60      0.55       396
         

In [13]:
with tf.device("/cpu:0"): # to offset Blas GEMM launch failed
    train_mlp('pooledbert')

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_input (InputLayer)      [(None, 768)]             0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               393728    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
output (Dense)               (None, 20)                10260     
Total params: 403,988
Trainable params: 403,988
Non-trainable params: 0
_________________________________________________________________
None
0.2622145512480085
                          precision    recall  f1-score   support

             alt.atheism       0.21      0.29      0.25       319
           comp.graphics       0.00      0.00      0.00       389
 comp.os.ms-windows.mis

  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
train_rfc('pooledbert')

0.20300053106744556
                          precision    recall  f1-score   support

             alt.atheism       0.26      0.22      0.24       319
           comp.graphics       0.10      0.13      0.11       389
 comp.os.ms-windows.misc       0.13      0.18      0.15       394
comp.sys.ibm.pc.hardware       0.14      0.11      0.12       392
   comp.sys.mac.hardware       0.11      0.09      0.10       385
          comp.windows.x       0.22      0.28      0.25       395
            misc.forsale       0.33      0.36      0.34       390
               rec.autos       0.18      0.15      0.16       396
         rec.motorcycles       0.11      0.11      0.11       398
      rec.sport.baseball       0.23      0.22      0.23       397
        rec.sport.hockey       0.22      0.29      0.25       399
               sci.crypt       0.19      0.20      0.20       396
         sci.electronics       0.12      0.12      0.12       393
                 sci.med       0.28      0.24      0.26

In [15]:
train_xgb('pooledbert')

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


0.2134891131173659
                          precision    recall  f1-score   support

             alt.atheism       0.24      0.19      0.21       319
           comp.graphics       0.13      0.09      0.11       389
 comp.os.ms-windows.misc       0.18      0.17      0.17       394
comp.sys.ibm.pc.hardware       0.16      0.09      0.11       392
   comp.sys.mac.hardware       0.10      0.11      0.10       385
          comp.windows.x       0.22      0.28      0.24       395
            misc.forsale       0.35      0.38      0.36       390
               rec.autos       0.20      0.19      0.20       396
         rec.motorcycles       0.19      0.21      0.20       398
      rec.sport.baseball       0.20   

In [16]:
train_conv('seqbert')

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_input (InputLayer)      [(None, 16, 768)]         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 14, 256)           590080    
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 256)               0         
_________________________________________________________________
output (Dense)               (None, 20)                5140      
Total params: 595,220
Trainable params: 595,220
Non-trainable params: 0
_________________________________________________________________
None
0.5345193839617631
                          precision    recall  f1-score   support

             alt.atheism       0.72      0.35      0.47       319
           comp.graphics       0.46      0.22      0.30       389
 comp.os.ms-windows.misc    