In [None]:
!pwd
!ls ./Dataset

# Text Classification Based on Chinese Dataset THUCNEWs

In [None]:
import tensorflow as tf
print(f'Tensorflow Version: {tf.__version__}')
gpus = tf.config.list_physical_devices('GPU')
print(f'GPUs: {gpus}')

In [None]:
from typing import List, Dict

## Read Dataset

I used a subset of this dataset, processed it, and saved it as three files: cnews.test.txt  cnews.train.txt cnews.val.txt   cnews.vocab.txt.

### Path Definition

In [None]:
import os

dataset_path = './DataSet'
train_path = os.path.join(dataset_path, 'cnews.train.txt')
test_path  = os.path.join(dataset_path, 'cnews.test.txt')
val_path   = os.path.join(dataset_path, 'cnews.val.txt')

In [None]:
stopwords_path  = './ChineseStopWords.txt'

### Read parts files of da

In [None]:
def read_file(filename):
    contents, labels = [], []
    with open(filename, 'r') as f:
        for line in f:
            label, content = line.strip().split('\t')
            if content:
                contents.append(content)
                labels.append(label)
    return contents, labels

In [None]:
import time
from collections import Counter

t = time.time()
train_contents, train_labels = read_file(train_path)
test_contents, test_labels = read_file(test_path)
val_contents, val_labels = read_file(val_path)
print(f'Dataset: Trainset({len(train_contents)})、Testset({len(test_contents)})、'
      f'Validationset({len(val_contents)})')
print(f'Trainset     : {Counter(train_labels)}')
print(f'Testset      : {Counter(test_labels)}')
print(f'Validationset: {Counter(val_labels)}')
print(f'\nCost time: {time.time() - t: .6f}s')

## preprocessing data

In [None]:
VOCAB_SIZE = 5_000
SENTENCE_LEN = 512
BATCH = 128

### remove special characters

In [None]:
import re

def clear_character(sentence):
    pattern1 = '\[.*?\]' 
    pattern2 = re.compile('[^\u4e00-\u9fa5^a-z^A-Z^0-9]')
    line1 = re.sub(pattern1, '', sentence)
    line2 = re.sub(pattern2, '', line1)
    new_sentence = ''.join(line2.split())
    return new_sentence

t = time.time()
train_text = list(map(lambda s: clear_character(s), train_contents))
test_text  = list(map(lambda s: clear_character(s), test_contents))
val_text   = list(map(lambda s: clear_character(s), val_contents))
print(f'Cost time: {time.time() - t: .2f}s')

### token

In [None]:
token_path = './token_data'
if not os.path.exists(token_path):
    os.mkdir(token_path)

train_token_file = os.path.join(token_path, 'train_token.pkl')
test_token_file  = os.path.join(token_path, 'test_token.pkl')
val_token_file   = os.path.join(token_path, 'val_token.pkl')

train_token_file, test_token_file, val_token_file

In [None]:
import jieba, pickle

with tf.device("/gpu:0"):
    t = time.time()
    if not os.path.exists(train_token_file):
        print('- Trainset token...', end='')
        train_seg_text = list(map(lambda s: jieba.lcut(s), train_text))
        pickle.dump(train_seg_text, open(train_token_file, 'wb'))
        print('Over!')
    else:
        train_seg_text = pickle.load(open(train_token_file, 'rb'))
        print(f'The token file for the training set already exists, loaded!')

    if not os.path.exists(test_token_file):
        print('- Testset token...', end='')
        test_seg_text  = list(map(lambda s: jieba.lcut(s), test_text))
        pickle.dump(test_seg_text, open(test_token_file, 'wb'))
        print('Over!')
    else:
        test_seg_text = pickle.load(open(test_token_file, 'rb'))
        print(f'The token file for the test set already exists, loaded!')

    if not os.path.exists(val_token_file):
        print('- Validationset token...', end='')
        val_seg_text   = list(map(lambda s: jieba.lcut(s), val_text))
        pickle.dump(val_seg_text, open(val_token_file, 'wb'))
        print('Over!')
    else:
        val_seg_text = pickle.load(open(val_token_file, 'rb'))
        print(f'The token file for the validation set already exists, loaded!')

    print(f'\nCost time: {time.time() - t: .2f}s')

### Remove Stopwords

In [None]:
def get_stop_words():
    file = open(stopwords_path, 'rb').read().decode('utf-8').split('\r\n')
    return set(file)

stopwords = get_stop_words()


def drop_stopwords(line, stopwords):
    line_clean = []
    for word in line:
        if word in stopwords:
            continue
        line_clean.append(word)
    return line_clean

t = time.time()
train_sentence_words = list(map(lambda s: drop_stopwords(s, stopwords), train_seg_text))
test_sentence_words  = list(map(lambda s: drop_stopwords(s, stopwords), test_seg_text))
val_sentence_words   = list(map(lambda s: drop_stopwords(s, stopwords), val_seg_text))
print(f'\nCost time: {time.time() - t: .2f}s')

### make vocabulary

#### Flatten the dataset

In [None]:
from tqdm.notebook import tqdm

def flatten(dataset: List[list]) -> List[str]:
    content = []
    for sentence_words in tqdm(dataset):
        content.extend(sentence_words)
    return content


t = time.time()
print(f'Flatten train set...', end='')
train_words = flatten(train_sentence_words)
print(f'Over! Cost: {time.time() - t:.2f}s。')

# t1 = time.time()
# print(f'Flatten test set...', end='')
# test_words = flatten(test_sentence_words)
# print(f'Over! Cost: {time.time() - t:.2f}s。')

# t2 = time.time()
# print(f'Flatten validation set...', end='')
# val_words = flatten(val_sentence_words)
# print(f'Over! Cost: {time.time() - t:.2f}s。')

# dataset_words = train_words + test_words + val_words
dataset_words = train_words
print(f'\nCost time: {time.time() - t: .2f}s')

#### Make vocab

In [None]:
def get_vocabulary(dataset, vocab_size=5000) -> Dict:
    vocab_keys = ['<PAD>', '<UNK>']
    part_vocabs = []
    counter = Counter(dataset)
    for item in counter.most_common(vocab_size - 2):
        part_vocabs.append(item[0])
    vocab_key = vocab_keys + part_vocabs
    vocab = {}
    for value, key in enumerate(vocab_key):
        vocab.update({key: value})
    return vocab

vocab_path = './vocabulary.pkl'
if not os.path.exists(vocab_path):
    vocabulary = get_vocabulary(dataset_words, VOCAB_SIZE)
    pickle.dump(vocabulary, open(vocab_path, 'wb'))
else:
    vocabulary = pickle.load(open(vocab_path, 'rb'))
    print(f'Vocab already exists, loaded!')

### Text length alignment

In [None]:
vocabulary['<UNK>'], vocabulary['<PAD>']

In [None]:
def cut_sentence(dataset: List[list], max_length) -> List[list]:
    cutted_dataset = []
    for sentence in dataset:
        if len(sentence) >= max_length:
            cutted_sentence = sentence[:max_length]
        else:
            cutted_sentence = sentence
        cutted_dataset.append(cutted_sentence)
    return cutted_dataset


t = time.time()
train_dataset_cutted = cut_sentence(train_sentence_words, SENTENCE_LEN)
print(f'Trainset Align: {time.time() - t:.6f}s')

t1 = time.time()
test_dataset_cutted = cut_sentence(test_sentence_words, SENTENCE_LEN)
print(f'Testset Align: {time.time() - t1:.6f}s')

t2 = time.time()
val_dataset_cutted = cut_sentence(val_sentence_words, SENTENCE_LEN)
print(f'validationset Align: {time.time() - t2:.6f}s')

print(f'\nCost time: {time.time() - t: .6f}s')

### word2Index

In [None]:
def preprocess_dataset(dataset: List[list], vocab: Dict, 
                       max_length: int = 512) -> List[list]:
    # 将一句话中的词替换为字典中对应的索引
    def replace_by_index(sentence):
        index_sentence = []
        for word in sentence:
            if word in vocab.keys():
                index_sentence.append(vocab[word])
            else:
                index_sentence.append(vocab['<UNK>'])
        return index_sentence

    index_dataset = []
    for sentence in dataset:
        if len(sentence) >= max_length:
            index_dataset.append(replace_by_index(sentence))
        else:
            index_dataset.append(replace_by_index(sentence) + \
                                 [vocab['<PAD>']] * (max_length - len(sentence)))

    return index_dataset

t = time.time()
train_index = preprocess_dataset(train_dataset_cutted, vocab=vocabulary, 
                                 max_length=SENTENCE_LEN)
print(f'Trainset index: {time.time() - t:.2f}s')

t1 = time.time()
test_index  = preprocess_dataset(test_dataset_cutted, vocab=vocabulary, 
                                 max_length=SENTENCE_LEN)
print(f'Testset index: {time.time() - t1:.2f}s')

t2 = time.time()
val_index   = preprocess_dataset(val_dataset_cutted, vocab=vocabulary, 
                                 max_length=SENTENCE_LEN)
print(f'validationset index: {time.time() - t2:.2f}s')

print(f'\nCost time: {time.time() - t: .2f}s')

In [None]:
word_index = 222
key_ = filter(lambda x: word_index==x[1], vocabulary.items())
for (key,value) in key_:
  print('%s: %s' % (key,value))

## label map

In [None]:
text_labels = list(dict(Counter(train_labels)).keys())
text_index  = list(range(len(text_labels)))
labels_map = dict(zip(text_labels, text_index))

def label2index(dataset_labels: List, labels_map: Dict) -> List:
    return [labels_map.get(label) for label in tqdm(dataset_labels)]

train_label_index = label2index(train_labels, labels_map)
test_label_index = label2index(test_labels, labels_map)
val_label_index = label2index(val_labels, labels_map)

In [None]:
train_labels_onehot = tf.one_hot(train_label_index, depth=10)
test_labels_onehot = tf.one_hot(test_label_index, depth=10)
val_labels_onehot = tf.one_hot(val_label_index, depth=10)

## packaging

In [None]:
import numpy as np

np.array(train_index).shape, np.array(train_labels_onehot).shape

In [None]:
with tf.device("/gpu:0"):
    t_db = time.time()
    train_db = (tf.data.Dataset
                .from_tensor_slices((train_index, train_labels_onehot))
                .shuffle(len(train_index)).batch(BATCH, drop_remainder=True))
    test_db  = (tf.data.Dataset
                .from_tensor_slices((test_index, test_labels_onehot))
                .shuffle(len(test_index)).batch(BATCH, drop_remainder=True))
    val_db   = (tf.data.Dataset
                .from_tensor_slices((val_index, val_labels_onehot))
                .shuffle(len(val_index)).batch(BATCH, drop_remainder=True))
    print(f'\nCost time: {time.time() - t_db: .2f}s')  # CPU: 90s. GPU: 73s

In [None]:
train_db.batch

# Model & Learning

In [None]:
from keras  import optimizers, losses
from keras.models import Sequential
from keras.layers import Embedding, Dense, Flatten, SimpleRNN, LSTM, Bidirectional

## Super parameters

In [None]:
EPOCHS = 10
LR = 1e-3
STATE_DIM = 32
CLASSES_NUM = 10

## Model1: LR

In [None]:
LR_model = Sequential()
LR_model.add(Embedding(input_dim=VOCAB_SIZE, 
                       output_dim=STATE_DIM, 
                       input_length=SENTENCE_LEN))
LR_model.add(Flatten())
LR_model.add(Dense(CLASSES_NUM, activation='softmax'))

LR_model.summary()

In [None]:
LR_model.compile(optimizer=tf.keras.optimizers.RMSprop(LR), 
                 loss=tf.keras.losses.CategoricalCrossentropy(), 
                 metrics=['acc'])

In [None]:
LR_history = LR_model.fit(train_db, epochs=EPOCHS, validation_data=val_db)

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure(dpi=100, figsize=(6, 3))
plt.plot(LR_history.history['loss'], 'bo:', label='Loss')
plt.plot(LR_history.history['acc'], 'r', label='Acc')
plt.xlabel('Epoch')
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=5)
plt.title('LR model')
plt.show()

In [None]:
loss_and_acc = LR_model.evaluate(test_db)
print(f'loss: {str(loss_and_acc[0])}')
print(f'acc : {str(loss_and_acc[1])}')

## Model2: RNN

In [None]:
#@title
RNN_model = Sequential()
RNN_model.add(Embedding(input_dim=VOCAB_SIZE, 
                        output_dim=STATE_DIM, 
                        input_length=SENTENCE_LEN))
RNN_model.add(SimpleRNN(STATE_DIM, return_sequences=False))
RNN_model.add(Dense(CLASSES_NUM, activation='softmax'))

RNN_model.summary()

In [None]:
RNN_model.compile(optimizer=tf.keras.optimizers.RMSprop(LR), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['acc'])

In [None]:
RNN_history = RNN_model.fit(train_db, epochs=EPOCHS, validation_data=val_db)

In [None]:
loss_and_acc = RNN_model.evaluate(test_db)
print(f'loss: {str(loss_and_acc[0])}')
print(f'acc : {str(loss_and_acc[1])}')

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure(dpi=100, figsize=(6, 3))
plt.plot(RNN_history.history['loss'], 'bo:', label='Loss')
plt.plot(RNN_history.history['acc'], 'r', label='Acc')
plt.xlabel('Epoch')
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=1)
plt.title('RNN model')
plt.show()

## Model3: RNN-return sequences

In [None]:
RNN_r_model = Sequential()
RNN_r_model.add(Embedding(input_dim=VOCAB_SIZE, 
                          output_dim=STATE_DIM, 
                          input_length=SENTENCE_LEN))
RNN_r_model.add(SimpleRNN(STATE_DIM, return_sequences=True))
RNN_r_model.add(Flatten())
RNN_r_model.add(Dense(CLASSES_NUM, activation='softmax'))

RNN_r_model.summary()

In [None]:
RNN_r_model.compile(optimizer=tf.keras.optimizers.RMSprop(LR), 
                    loss=tf.keras.losses.CategoricalCrossentropy(), 
                    metrics=['acc'])

In [None]:
RNN_r_history = RNN_r_model.fit(train_db, epochs=EPOCHS, validation_data=val_db)

In [None]:
loss_and_acc = RNN_r_model.evaluate(test_db)
print(f'loss: {str(loss_and_acc[0])}')
print(f'acc : {str(loss_and_acc[1])}')

In [None]:
fig = plt.figure(dpi=100, figsize=(6, 3))
plt.plot(RNN_r_history.history['loss'], 'bo:', label='Loss')
plt.plot(RNN_r_history.history['acc'], 'r', label='Acc')
plt.xlabel('Epoch')
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=5)
plt.title('RNN_r model')
plt.show()

## Model4: LSTM

In [None]:
LSTM_model = Sequential()
LSTM_model.add(Embedding(input_dim=VOCAB_SIZE, 
                         output_dim=STATE_DIM, 
                         input_length=SENTENCE_LEN))
LSTM_model.add(LSTM(STATE_DIM, return_sequences=False))
LSTM_model.add(Dense(CLASSES_NUM, activation='softmax'))

LSTM_model.summary()

In [None]:
LSTM_model.compile(optimizer=tf.keras.optimizers.RMSprop(LR), 
                   loss=tf.keras.losses.CategoricalCrossentropy(), 
                   metrics=['acc'])

In [None]:
LSTM_history = LSTM_model.fit(train_db, epochs=EPOCHS, validation_data=val_db)

In [None]:
loss_and_acc = LSTM_model.evaluate(test_db)
print(f'loss: {str(loss_and_acc[0])}')
print(f'acc : {str(loss_and_acc[1])}')

In [None]:
fig = plt.figure(dpi=100, figsize=(6, 3))
plt.plot(LSTM_history.history['loss'], 'bo:', label='Loss')
plt.plot(LSTM_history.history['acc'], 'r', label='Acc')
plt.xlabel('Epoch')
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=1)
plt.title('LSTM model')
plt.show()

## Model5: LSTM-return sequences

In [None]:
LSTM_r_model = Sequential()
LSTM_r_model.add(Embedding(input_dim=VOCAB_SIZE, 
                           output_dim=STATE_DIM, 
                           input_length=SENTENCE_LEN))
LSTM_r_model.add(LSTM(STATE_DIM, return_sequences=True))
LSTM_r_model.add(Flatten())
LSTM_r_model.add(Dense(CLASSES_NUM, activation='softmax'))

LSTM_r_model.summary()

In [None]:
LSTM_r_model.compile(optimizer=tf.keras.optimizers.RMSprop(LR), 
                     loss=tf.keras.losses.CategoricalCrossentropy(), 
                     metrics=['acc'])

In [None]:
LSTM_r_history = LSTM_r_model.fit(train_db, epochs=EPOCHS, validation_data=val_db)

In [None]:
loss_and_acc = LSTM_r_model.evaluate(test_db)
print(f'loss: {str(loss_and_acc[0])}')
print(f'acc : {str(loss_and_acc[1])}')

In [None]:
fig = plt.figure(dpi=100, figsize=(6, 3))
plt.plot(LSTM_r_history.history['loss'], 'bo:', label='Loss')
plt.plot(LSTM_r_history.history['acc'], 'r', label='Acc')
plt.xlabel('Epoch')
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=5)
plt.title('LSTM_r model')
plt.show()

## Model6: Stacked LSTM

In [None]:
sLSTM_model = Sequential()
sLSTM_model.add(Embedding(input_dim=VOCAB_SIZE, 
                          output_dim=STATE_DIM, 
                          input_length=SENTENCE_LEN))
sLSTM_model.add(LSTM(STATE_DIM, return_sequences=True, dropout=0.2))
sLSTM_model.add(LSTM(STATE_DIM, return_sequences=True, dropout=0.2))
sLSTM_model.add(LSTM(STATE_DIM, return_sequences=False, dropout=0.2))
sLSTM_model.add(Dense(CLASSES_NUM, activation='softmax'))

sLSTM_model.summary()

In [None]:
sLSTM_model.compile(optimizer=tf.keras.optimizers.RMSprop(LR), 
                    loss=tf.keras.losses.CategoricalCrossentropy(), 
                    metrics=['acc'])

In [None]:
sLSTM_history = sLSTM_model.fit(train_db, epochs=EPOCHS, validation_data=val_db)

In [None]:
loss_and_acc = sLSTM_model.evaluate(test_db)
print(f'loss: {str(loss_and_acc[0])}')
print(f'acc : {str(loss_and_acc[1])}')

In [None]:
fig = plt.figure(dpi=100, figsize=(6, 3))
plt.plot(sLSTM_history.history['loss'], 'bo:', label='Loss')
plt.plot(sLSTM_history.history['acc'], 'r', label='Acc')
plt.xlabel('Epoch')
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=1)
plt.title('Stacked LSTM model')
plt.show()

## Model7: BiLSTM

In [None]:
BiLSTM_model = Sequential()
BiLSTM_model.add(Embedding(input_dim=VOCAB_SIZE, 
                           output_dim=STATE_DIM, 
                           input_length=SENTENCE_LEN))
BiLSTM_model.add(
    Bidirectional(LSTM(STATE_DIM, return_sequences=False, dropout=0.2)))
BiLSTM_model.add(Dense(CLASSES_NUM, activation='softmax'))

BiLSTM_model.summary()

In [None]:
BiLSTM_model.compile(optimizer=tf.keras.optimizers.RMSprop(LR), 
                     loss=tf.keras.losses.CategoricalCrossentropy(), 
                     metrics=['acc'])

In [None]:
BiLSTM_history = BiLSTM_model.fit(train_db, epochs=EPOCHS, validation_data=val_db)

In [None]:
loss_and_acc = BiLSTM_model.evaluate(test_db)
print(f'loss: {str(loss_and_acc[0])}')
print(f'acc : {str(loss_and_acc[1])}')

In [None]:
fig = plt.figure(dpi=100, figsize=(6, 3))
plt.plot(BiLSTM_history.history['loss'], 'bo:', label='Loss')
plt.plot(BiLSTM_history.history['acc'], 'r', label='Acc')
plt.xlabel('Epoch')
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=1)
plt.title('BiLSTM model')
plt.show()

# SUMMARY

In [None]:
fig = plt.figure(dpi=100, figsize=(5.95, 3))
# LR
plt.plot(LR_history.history['loss'], 'r', label='LR')

# RNN
plt.plot(RNN_history.history['loss'], 'g', label='RNN')

# RNN-r
plt.plot(RNN_r_history.history['loss'], 'b', label='RNN-r')

# LSTM
plt.plot(LSTM_history.history['loss'], 'y', label='LSTM')

# LSTM-r
plt.plot(LSTM_r_history.history['loss'], 'c', label='LSTM-r')

# sLSTM
plt.plot(sLSTM_history.history['loss'], 'm', label='LSTM-s')

# BiLSTM
plt.plot(BiLSTM_history.history['loss'], 'k', label='BiLSTM')

title_xl = {
    "fontfamily": 'times new roman',
    "fontsize": 14,
    "fontweight": 'bold',
}
plt.xlabel('Epoch', title_xl)
plt.ylabel('Loss', title_xl)
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=1, bbox_to_anchor=(1.25, .7))
title_d = {
    "fontsize": 16,
    "fontweight": 'bold',
}
plt.title('THUCNEWs Losses', title_d)

plt.savefig(f'./drive/MyDrive/THUCNEWs/Thucnews_{EPOCHS}Epochs_Loss_Result.png', dpi=200, bbox_inches='tight')
plt.show()

In [None]:
fig = plt.figure(dpi=100, figsize=(5.95, 3))
# LR
plt.plot(LR_history.history['acc'], 'r', label='LR')

# RNN
plt.plot(RNN_history.history['acc'], 'g', label='RNN')

# RNN-r
plt.plot(RNN_r_history.history['acc'], 'b', label='RNN-r')

# LSTM
plt.plot(LSTM_history.history['acc'], 'y', label='LSTM')

# LSTM-r
plt.plot(LSTM_r_history.history['acc'], 'c', label='LSTM-r')

# sLSTM
plt.plot(sLSTM_history.history['acc'], 'm', label='LSTM-s')

# BiLSTM
plt.plot(BiLSTM_history.history['acc'], 'k', label='BiLSTM')

title_xl = {
    "fontsize": 12,
    "fontweight": 'bold',
}
plt.xlabel('Epoch', title_xl)
plt.ylabel('Accuracy Rate', title_xl)
plt.xticks(np.arange(0, EPOCHS))

plt.legend(loc=1, bbox_to_anchor=(1.25, .7))
title_d = {
    "fontsize": 16,
    "fontweight": 'bold',
}
plt.title('THUCNEWs Accuracy', title_d)

plt.savefig(f'./drive/MyDrive/THUCNEWs/Thucnews_{EPOCHS}Epochs_Acc_Result.png', dpi=200, bbox_inches='tight')
plt.show()

# SAVE

In [None]:
t = time.time()
result_path = '/Result'
if not os.path.exists(result_path):
    os.mkdir(result_path)

# LR
lr_path = os.path.join(result_path, 'lr.pkl')
if not os.path.exists(lr_path):
    pickle.dump(LR_history, open(lr_path, 'wb'))

# RNN
rnn_path = os.path.join(result_path, 'rnn.pkl')
if not os.path.exists(rnn_path):
    pickle.dump(RNN_history, open(rnn_path, 'wb'))

# RNN-r
rnnr_path = os.path.join(result_path, 'rnn-r.pkl')
if not os.path.exists(rnnr_path):
    pickle.dump(RNN_r_history, open(rnnr_path, 'wb'))

# LSTM
lstm_path = os.path.join(result_path, 'lstm.pkl')
if not os.path.exists(lstm_path):
    pickle.dump(LSTM_history, open(lstm_path, 'wb'))

# LSTM-r
lstmr_path = os.path.join(result_path, 'lstm-r.pkl')
if not os.path.exists(lstmr_path):
    pickle.dump(LSTM_r_history, open(lstmr_path, 'wb'))

# LSTM-s
lstms_path = os.path.join(result_path, 'lstm-s.pkl')
if not os.path.exists(lstms_path):
    pickle.dump(sLSTM_history, open(lstms_path, 'wb'))

# BiLSTM
bilstm_path = os.path.join(result_path, 'bilstm.pkl')
if not os.path.exists(bilstm_path):
    pickle.dump(BiLSTM_history, open(bilstm_path, 'wb'))
print(f'Finished! Cost: {time.time() - t:.2f}s')