# Experiment Data Preprocessing

In [None]:
# import pandas as pd

# df = pd.read_csv('train.csv') ### local check not on the server

# df['text_1'] = df['text_1'].fillna('')  # detect NaN
# df['text_2'] = df['text_2'].fillna('')  # detect NaN

# df['text1_word_count'] = df['text_1'].str.split().apply(len)
# df['text2_word_count'] = df['text_2'].str.split().apply(len)

# text1_avg = df['text1_word_count'].mean()
# text2_avg = df['text2_word_count'].mean()

# print(f"Average word count for 'text1': {text1_avg}")  # 101
# print(f"Average word count for 'text2': {text2_avg}")  # 101

In [1]:
import numpy as np
import pandas as pd
import os.path as osp
import argparse
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from transformers import DistilBertModel, DistilBertTokenizer

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


class BaseDataset(Dataset):
    def __init__(self, file_path, split, max_length=256):  
        self.max_length = max_length  # the maximum number of tokens in the sequence
        self.split = split  # from inherited classes

        df = pd.read_csv(osp.join(file_path, split + '.csv'), dtype={'text_1':str, 'text_2': str, 'label': int})  # get data in specific types
        text1 = list(df['text_1'])  # text1
        text2 = list(df['text_2'])  # text2

        if split != 'test':
            label = list(df['label'])  # "Note that the test data will follow the same format as the files in the trial dataset, except that the label column will not be provided"

        self.data = []
        for i in range(len(text1)):
            if type(text1[i]) != str or type(text2[i]) != str:
                continue  # non-str data was not considered
            if len(text1[i]) < 5 or len(text2[i]) < 5:
                continue  # sentences less than 5 were not considered, as unvalid ones
            self.data.append({
                'text1': text1[i],
                'text2': text2[i],  
            })

            if split != 'test':
                self.data[-1]['label'] = label[i]  # preserve the label column

        self.tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')  # DistilBERT tokenization

        # extract the specific index of two special tokens [UNK] and [SEP] from the token set of DistilBERT tokenization
        # prepare for the subsequent data augmentation
        self.unk_token_idx = self.tokenizer.convert_tokens_to_ids(self.tokenizer.unk_token)  
        self.sep_token_idx = self.tokenizer.convert_tokens_to_ids(self.tokenizer.sep_token)  

    def __len__(self):
        return len(self.data)  # return length

    def preprocess(self, text):
        tokens = self.tokenizer(text, padding='max_length', max_length=self.max_length, truncation=True)  # setting the tokenizer
        input_ids = tokens['input_ids']  # gain a series of index values of tokens, after tokenizing sequences
        input_ids = self.aug(input_ids) # data augmentation
        input_ids = torch.tensor(input_ids)  # convert into the tensor format that PyTorch could understand

        return input_ids

    def __getitem__(self, idx):
        # get data by random idx generated by pytorch itself
        token_idx_1 = self.preprocess(self.data[idx]['text1']) 
        token_idx_2 = self.preprocess(self.data[idx]['text2'])  

        if self.split == 'test':
            return token_idx_1, token_idx_2   # no label column, if it was the test mode

        label = self.data[idx]['label']
        return token_idx_1, token_idx_2, label  # the label column was required during training


class TestDataset(BaseDataset):
    def __init__(self, split, *args, **kwargs):
        assert split in ['dev', 'test']  # mainly for validation
        super(TestDataset, self).__init__(split=split, *args, **kwargs)

    def aug(self, input_ids, p=0.5): 
        return input_ids


# the basic class for training dataset
class TrainDataset(BaseDataset):
    def __init__(self, *args, **kwargs):
        super(TrainDataset, self).__init__(split='train', *args, **kwargs)

    # according to instructions from: Xie, Ziang, et al. "Data noising as smoothing in neural network language models." arXiv preprint arXiv:1703.02573 (2017).
    # data augmentation during training the model, using "blank nosing"
    def aug(self, input_ids, p=0.5):
        if np.random.rand() < p:  # 50% probability but with the fixed random seed
            length = input_ids.index(self.sep_token_idx)  # determine the position of [SEP]
            if length < 5:  # not necessary if the sequence less than 5 
                return input_ids
            # for the entire input sequence, except for [CLS], [SEP], and the ending [PAD]s, randomly replace
            # the rest of positions, by changing the original index to the index of [UNK],
            # artificially introducing blank noises to enhance model robustness, avoiding over-fitting
            for i in range(np.random.randint(1,5)):  # how many times for data augmentation?
                input_ids[np.random.randint(1, length - 1)] = self.unk_token_idx  # position of [SEP] was used here
        return input_ids

# Experiment Evaluation Metric on Development Set

In [2]:
class Evaluator(object):
    def __init__(self) -> None:
        super(Evaluator, self).__init__()
        self.preds = []
        self.targets = []

    def add_batch(self, preds, targets):
        # if predictions were the Tensor data type, convert them into the CPU type that NumPy could understand
        if isinstance(preds, torch.Tensor):
            preds = preds.cpu().numpy()  
            targets = targets.cpu().numpy()
        # load data preparing for the evaluation
        self.preds.append(preds)
        self.targets.append(targets)

    def run(self):
        # flatten corresponding predictions and true labels along the axis=0 direction, to facilitate comparison
        preds = np.concatenate(self.preds, axis=0)  
        targets = np.concatenate(self.targets, axis=0)  
        preds_cls = preds > 0.2  # from 0.2, 0.3, 0.4, and 0.5

        res = {
            'auc': roc_auc_score(targets, preds),
            'accuracy': accuracy_score(targets, preds_cls),
            'precision': precision_score(targets, preds_cls),
            'recall': recall_score(targets, preds_cls),
            'f1': f1_score(targets, preds_cls)
        }

        return res

    # reset predictions and targets to prepare for further evaluations
    # delete previous objects and set up new ones
    def reset(self):
        del self.preds 
        del self.targets 
        self.preds = []
        self.targets = []


# Model Architecture

In [3]:
class BertVerifier(nn.Module):
    def __init__(self, output_size, hidden_size=768, dropout=0.0):
        super(BertVerifier, self).__init__()

        self.encoder = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.pooling_fn = self.pooling_fn_cls_token  
        for p in self.encoder.embeddings.parameters():
            p.requires_grad = False  # froze the word embedding to prevent overfitting
        for m in self.encoder.transformer.layer[:3]:
            # low-level layers would pay more attention to the basic linguistics knowledge, from "https://aclanthology.org/N18-1202/"
            for p in m.parameters():  # froze parameters of the first three layers to prevent overfitting
                p.requires_grad = False  # not enough training data 

        self.fc1 = nn.Linear(768, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def pooling_fn_cls_token(self, outputs):
        # extract "sequence of hidden-states at the output of the last layer of the model"
        # https://huggingface.co/docs/transformers/main/en/model_doc/distilbert#transformers.DistilBertModel
        return outputs[0][:, 0] 

    def forward(self, x1, x2):
        # Forward
        outputs = self.encoder(torch.cat([x1, x2], dim=0).long())  # (M,a) (M,a) -> (M+M, a): two paragraphs were concatenated along the batch direction
        emb = self.pooling_fn(outputs) 

        # prediction head
        emb = self.dropout(emb)
        emb = self.fc1(emb)
        emb = self.bn1(emb)
        emb = F.gelu(emb)
        emb = self.fc2(emb)
        emb = F.normalize(emb, p=2, dim=1)  # normalisation for the cosine similarity

        emb1, emb2 = emb.chunk(2)  # waist intercept, above was text1 and below was text2
        sim = (emb1 * emb2).sum(1)  # the cosine similarity dot product after normalization

        return sim


class LSTMVerifier(nn.Module):
    def __init__(self, output_size, hidden_size=768, num_layers=3, dropout=0.0):
        super(LSTMVerifier, self).__init__()
        # extract the embedding from DistilBERT tokenization
        self.emb_layer = DistilBertModel.from_pretrained('distilbert-base-uncased').embeddings  
        self.encoder = nn.LSTM(768, hidden_size, num_layers, batch_first=True, bidirectional=True, dropout=dropout)  # bi-LSTM

        self.fc1 = nn.Linear(1536, hidden_size)  # 1536=768*2  
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x1, x2):
        # Forward
        inputs = torch.cat([x1, x2], dim=0).long() # index values of tokens
        inputs = self.emb_layer(inputs)  # the embedding layer with "Deep contextualized word representations" from "https://aclanthology.org/N18-1202/"
        outputs, _ = self.encoder(inputs)  # output
        # average features over the sequential dimension to serve as feature representations of the whole sequence
        emb = outputs.mean(1)   

        # prediction head similar to DistilBERT, BUT WITHOUT dropout layer
        emb = self.fc1(emb)
        emb = self.bn1(emb)
        emb = F.gelu(emb)
        emb = self.fc2(emb)
        emb = F.normalize(emb, p=2, dim=1)

        emb1, emb2 = emb.chunk(2)
        sim = (emb1 * emb2).sum(1)

        return sim

# Training

In [None]:
import os
from tqdm import tqdm


# guarantee STRONG experiment reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

# ensure multiple GPUs didn't read the same data between each other
# and set the seed to maintain a certain level of reproducibility
# passed as an input parameter to the Dataloader
# https://pytorch.org/docs/stable/notes/randomness.html#dataloader
def worker_init_fn_seed(worker_id):
    seed = torch.initial_seed()
    # https://pytorch.org/docs/stable/data.html#data-loading-randomness
    # each worker for "base_seed + worker_id" for "Randomness in multi-process data loading"
    seed = (worker_id + seed) % (2**32)
    np.random.seed(seed)

def train(args):
    args.save_dir = os.path.join(args.save_dir, args.experiment_name)
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)  # locations of ".pth" files

    # initilize models
    if args.model_type == 'distilbert':
        model = BertVerifier(args.emb_size, args.hidden_size, args.dropout)
    elif args.model_type == 'lstm':
        model = LSTMVerifier(args.emb_size, args.hidden_size, args.num_layers, args.dropout)
    else:
        assert NotImplementedError
    model.cuda()

    criterion = nn.BCEWithLogitsLoss()  # "combined a Sigmoid layer and the BCELoss in one single class" from https://pytorch.org/docs/stable/generated/torch.nn.BCEWithLogitsLoss.html

    # load data
    train_set = TrainDataset(args.data_path, max_length=args.max_length)
    val_set = TestDataset('dev', args.data_path, max_length=args.max_length)
    # "num_workers" meant each process(GPU) created multiple subprocesses to read data, which would be faster,
    # but mainly depended on how many cores the CPU had
    # "pin_memory" accelerated the speed of transferring data to the GPU
    train_loader = DataLoader(train_set, batch_size=args.bs, num_workers=4, shuffle=True, pin_memory=True, worker_init_fn=worker_init_fn_seed)  
    val_loader = DataLoader(val_set, batch_size=args.bs, pin_memory=True)

    # setup the optimizer
    params = filter(lambda p:p.requires_grad, model.parameters())  # filter parameters that only needed gradient updates; otherwise it might result in errors

    # From Kingma, Diederik P., and Jimmy Ba. "Adam: A method for stochastic optimization." arXiv preprint arXiv:1412.6980 (2014).
    # "straightforward to implement, is computationally efficient, has little memory requirements, is invariant to diagonal rescaling of the gradients"
    # suitable and essential for fine-tuning PLMs
    optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.wd)  # ADAM was less sensitive to learning rates
    evaluator = Evaluator()
    best_val_acc = 0.0
    best_val_metrics = None
    total_step = 0
    total_losses = []

    # training loops
    for epoch in range(args.epochs):
        model.train()
        # iterator = train_loader
        iterator = tqdm(train_loader)
        for text1, text2, labels in iterator:
            outputs = model(text1.cuda(), text2.cuda())  # the similarity was already calculated
            # logit scores ranged from negative infinity to positive infinity, while the range of cosine similarity was just [-1, 1]
            # by scaling, the range of cosine similarities could be smoothed to have a wider numerical range, probably being more suitable
            loss = criterion(outputs / args.temperature, labels.float().cuda()).mean() 

            # before back-propagation, reset the previously accumulated gradients to zero, manually
            # if gradients accumulated, the results became inaccurate since the direction of gradients was affected
            optimizer.zero_grad()
            loss.backward()  # back-propagation for gradients with "requires_grad=True"
            optimizer.step()  # update

            total_losses.append(loss.item())
            if total_step % 100 == 0:
                print('Step #%d   train_loss %f'%(total_step, sum(total_losses) / len(total_losses)))
                total_losses = []

            total_step += 1  # step++

        # start validation
        eval_metrics_val = test(model, val_loader, evaluator)

        # save evaluation results
        print(f"Epoch {epoch+1}/{args.epochs}:")
        print('Validation Results: ' + '  '.join(['%s: %.4f'%(k,v) for k,v in eval_metrics_val.items()]))  # output results of the big dictionary

        val_acc = eval_metrics_val['accuracy']
        # save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_val_metrics = eval_metrics_val
            torch.save(model.state_dict(), os.path.join(args.save_dir, "best_weights.pth"))
        torch.save(model.state_dict(), os.path.join(args.save_dir, "latest_weights.pth"))

    print('Best Validation Results: ' + '  '.join(['%s: %.4f'%(k,v) for k,v in best_val_metrics.items()]))


@torch.no_grad()
def test(model, loader, evaluator):
    evaluator.reset()
    model.eval()
    for text1, text2, labels in loader:
        scores = model(text1.cuda(), text2.cuda()) # calculate the cosine similarity
        evaluator.add_batch(scores, labels)  # add them to the Evaluator

    eval_metrics = evaluator.run()  # check
    return eval_metrics


def main():
    parser = argparse.ArgumentParser()  # parse some default parameters passed in, from the command line
    parser.add_argument("--seed", type=int, default=68, help="random seed")
    parser.add_argument("--hidden_size", type=int, default=768, help="embedding dimensions of hidden layers")
    parser.add_argument("--emb_size", type=int, default=256)
    parser.add_argument("--num_layers", type=int, default=3)
    parser.add_argument("--bs", type=int, default=16, help="batch size per gpu")
    parser.add_argument("--temperature", type=float, default=0.1)
    parser.add_argument("--lr", type=float, default=1e-5, help="initial learning rate")
    parser.add_argument("--wd", type=float, default=1e-5, help="weight decay")
    parser.add_argument("--dropout", type=float, default=0.1)
    parser.add_argument("--epochs", type=int, default=10, help="number of training epochs")
    parser.add_argument("--max_length", type=int, default=256, help="max length of tokens")
    parser.add_argument("--model_type", type=str, choices=['distilbert', 'lstm'], default='distilbert')
    parser.add_argument("--data_path", type=str, default='./training_data/AV')
    parser.add_argument("--save_dir", type=str, default='./save')
    parser.add_argument("--experiment_name", type=str, default='distilbert')

    args, unknown = parser.parse_known_args()  # get the namespace of all command line arguments, just like a python object
    # set the fixed random seed before experiments started, ensuring a certain level of the experimental reproducibility
    set_seed(args.seed)
    train(args)  # execute the experiment :)

main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Step #0   train_loss 1.548017
Step #100   train_loss 1.031455
Step #200   train_loss 0.825282
Step #300   train_loss 0.752633
Step #400   train_loss 0.724814
Step #500   train_loss 0.714153
Step #600   train_loss 0.712097
Step #700   train_loss 0.693712
Step #800   train_loss 0.699355
Step #900   train_loss 0.674829
Step #1000   train_loss 0.680428
Step #1100   train_loss 0.674030
Step #1200   train_loss 0.649197
Step #1300   train_loss 0.650265
Step #1400   train_loss 0.622099
Step #1500   train_loss 0.621589
Step #1600   train_loss 0.627546
Step #1700   train_loss 0.631908
Step #1800   train_loss 0.608526
Epoch 1/10:
Validation Results: auc: 0.7862  accuracy: 0.5904  precision: 0.9647  recall: 0.1917  f1: 0.3199




Step #1900   train_loss 0.594292
Step #2000   train_loss 0.596427
Step #2100   train_loss 0.552258
Step #2200   train_loss 0.590541
Step #2300   train_loss 0.546810
Step #2400   train_loss 0.575323
Step #2500   train_loss 0.577394
Step #2600   train_loss 0.548857
Step #2700   train_loss 0.540851
Step #2800   train_loss 0.540957
Step #2900   train_loss 0.569226
Step #3000   train_loss 0.541439
Step #3100   train_loss 0.559453
Step #3200   train_loss 0.561013
Step #3300   train_loss 0.551761
Step #3400   train_loss 0.534250
Step #3500   train_loss 0.522121
Step #3600   train_loss 0.551658
Step #3700   train_loss 0.525289
Epoch 2/10:
Validation Results: auc: 0.8209  accuracy: 0.6134  precision: 0.9726  recall: 0.2371  f1: 0.3813




Step #3800   train_loss 0.509898
Step #3900   train_loss 0.506042
Step #4000   train_loss 0.506328
Step #4100   train_loss 0.494565
Step #4200   train_loss 0.506839
Step #4300   train_loss 0.480483
Step #4400   train_loss 0.507733
Step #4500   train_loss 0.512547
Step #4600   train_loss 0.493158
Step #4700   train_loss 0.473521
Step #4800   train_loss 0.503048
Step #4900   train_loss 0.504727
Step #5000   train_loss 0.479363
Step #5100   train_loss 0.490332
Step #5200   train_loss 0.480252
Step #5300   train_loss 0.484533
Step #5400   train_loss 0.490578
Step #5500   train_loss 0.498323
Step #5600   train_loss 0.482311
Epoch 3/10:
Validation Results: auc: 0.8350  accuracy: 0.6221  precision: 0.9831  recall: 0.2522  f1: 0.4014




Step #5700   train_loss 0.455345
Step #5800   train_loss 0.460390
Step #5900   train_loss 0.469825
Step #6000   train_loss 0.436642
Step #6100   train_loss 0.436583
Step #6200   train_loss 0.447531
Step #6300   train_loss 0.462744
Step #6400   train_loss 0.434561
Step #6500   train_loss 0.439207
Step #6600   train_loss 0.444582
Step #6700   train_loss 0.447125
Step #6800   train_loss 0.433833
Step #6900   train_loss 0.448005
Step #7000   train_loss 0.467936
Step #7100   train_loss 0.461203
Step #7200   train_loss 0.461062
Step #7300   train_loss 0.447154
Step #7400   train_loss 0.419742
Epoch 4/10:
Validation Results: auc: 0.8466  accuracy: 0.6322  precision: 0.9831  recall: 0.2725  f1: 0.4268




Step #7500   train_loss 0.436045
Step #7600   train_loss 0.388794
Step #7700   train_loss 0.419586
Step #7800   train_loss 0.402576
Step #7900   train_loss 0.416781
Step #8000   train_loss 0.396591
Step #8100   train_loss 0.393374
Step #8200   train_loss 0.390850
Step #8300   train_loss 0.399740
Step #8400   train_loss 0.406600
Step #8500   train_loss 0.402023
Step #8600   train_loss 0.382583
Step #8700   train_loss 0.400477
Step #8800   train_loss 0.409443
Step #8900   train_loss 0.411606
Step #9000   train_loss 0.403435
Step #9100   train_loss 0.405113
Step #9200   train_loss 0.401791
Step #9300   train_loss 0.433895
Epoch 5/10:
Validation Results: auc: 0.8484  accuracy: 0.6405  precision: 0.9897  recall: 0.2876  f1: 0.4457




Step #9400   train_loss 0.383803
Step #9500   train_loss 0.354144
Step #9600   train_loss 0.346751
Step #9700   train_loss 0.366205
Step #9800   train_loss 0.372480
Step #9900   train_loss 0.367942
Step #10000   train_loss 0.364139
Step #10100   train_loss 0.363920
Step #10200   train_loss 0.334903
Step #10300   train_loss 0.362889
Step #10400   train_loss 0.357184
Step #10500   train_loss 0.373546
Step #10600   train_loss 0.359351
Step #10700   train_loss 0.366093
Step #10800   train_loss 0.377083
Step #10900   train_loss 0.357659
Step #11000   train_loss 0.379929
Step #11100   train_loss 0.366045
Step #11200   train_loss 0.394485
Epoch 6/10:
Validation Results: auc: 0.8492  accuracy: 0.6431  precision: 0.9844  recall: 0.2943  f1: 0.4531




Step #11300   train_loss 0.319893
Step #11400   train_loss 0.328803
Step #11500   train_loss 0.342537
Step #11600   train_loss 0.309981
Step #11700   train_loss 0.309489
Step #11800   train_loss 0.320107
Step #11900   train_loss 0.315413
Step #12000   train_loss 0.333109
Step #12100   train_loss 0.318930
Step #12200   train_loss 0.324243
Step #12300   train_loss 0.329117
Step #12400   train_loss 0.342081
Step #12500   train_loss 0.317935
Step #12600   train_loss 0.326944
Step #12700   train_loss 0.321849
Step #12800   train_loss 0.333436
Step #12900   train_loss 0.319767
Step #13000   train_loss 0.331533
Epoch 7/10:
Validation Results: auc: 0.8563  accuracy: 0.6432  precision: 0.9876  recall: 0.2936  f1: 0.4526




Step #13100   train_loss 0.320262
Step #13200   train_loss 0.290755
Step #13300   train_loss 0.286637
Step #13400   train_loss 0.283989
Step #13500   train_loss 0.282475
Step #13600   train_loss 0.288705
Step #13700   train_loss 0.298153
Step #13800   train_loss 0.311996
Step #13900   train_loss 0.305423
Step #14000   train_loss 0.304508
Step #14100   train_loss 0.293512
Step #14200   train_loss 0.295312
Step #14300   train_loss 0.284433
Step #14400   train_loss 0.284874
Step #14500   train_loss 0.285245
Step #14600   train_loss 0.290968
Step #14700   train_loss 0.283737
Step #14800   train_loss 0.304492
Step #14900   train_loss 0.290820
Epoch 8/10:
Validation Results: auc: 0.8495  accuracy: 0.6456  precision: 0.9857  recall: 0.2989  f1: 0.4587




Step #15000   train_loss 0.289025
Step #15100   train_loss 0.257876
Step #15200   train_loss 0.249406
Step #15300   train_loss 0.260765
Step #15400   train_loss 0.257933
Step #15500   train_loss 0.257064
Step #15600   train_loss 0.257758
Step #15700   train_loss 0.264756
Step #15800   train_loss 0.250820
Step #15900   train_loss 0.273568
Step #16000   train_loss 0.275983
Step #16100   train_loss 0.271542
Step #16200   train_loss 0.258482
Step #16300   train_loss 0.262275
Step #16400   train_loss 0.262319
Step #16500   train_loss 0.280196
Step #16600   train_loss 0.268906
Step #16700   train_loss 0.252172
Step #16800   train_loss 0.281270
Epoch 9/10:
Validation Results: auc: 0.8550  accuracy: 0.6533  precision: 0.9823  recall: 0.3156  f1: 0.4778




Step #16900   train_loss 0.228906
Step #17000   train_loss 0.245109
Step #17100   train_loss 0.230716
Step #17200   train_loss 0.229782
Step #17300   train_loss 0.228735
Step #17400   train_loss 0.219611
Step #17500   train_loss 0.226684
Step #17600   train_loss 0.220339
Step #17700   train_loss 0.237403
