In [1]:
import pandas as pd
import torch
import os

from torch.utils.data import Dataset
from transformers import (BertTokenizer, BertForSequenceClassification, Trainer,
                          TrainingArguments, AutoTokenizer, AutoModel)
# from simpletransformers.language_modeling import LanguageModelingModel
from sklearn.metrics import accuracy_score, f1_score


In [2]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_properties(0))

True
NVIDIA GeForce RTX 3060
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3060', major=8, minor=6, total_memory=12050MB, multi_processor_count=28)


In [3]:
MODEL_NAME = '/home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert'
TRAIN_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/multichannel_bsentiment/multichannel_bsentiment_train.tsv'
TEST_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/multichannel_bsentiment/multichannel_bsentiment_test.tsv'
EVAL_FILE_LOC = '/home/abdullah/Code/dl/bnlp-resources/sentiment/multichannel_bsentiment/multichannel_bsentiment_dev.tsv'


In [4]:
def tsv_to_text(tsv_file_loc):
    file_name = tsv_file_loc.split("/")[-1].split(".")[0]
    txt_name = tsv_file_loc.replace(".tsv", ".txt")
    txt_name = txt_name.replace("split_merged", "texts")

    if os.path.exists(txt_name):
        return [txt_name, file_name]

    df = pd.read_csv(tsv_file_loc, sep="\t")

    for txt in df["text"]:
        with open(txt_name, "a", encoding="utf8") as f:
            f.writelines(txt + "\n")
    return [txt_name, file_name]


tsv_to_text(TRAIN_FILE_LOC)


['/home/abdullah/Code/dl/bnlp-resources/sentiment/multichannel_bsentiment/multichannel_bsentiment_train.txt',
 'multichannel_bsentiment_train']

In [5]:
def tsv_to_df(csv_file_loc):
    df = pd.read_csv(csv_file_loc, sep='\t')
    # remove id column
    df = df.drop(columns=['id'])

    # replace neutral with 0, positive with 1, negative with 2 in class_label column
    df.loc[df['class_label'] == 'Neutral', 'class_label'] = 0
    df.loc[df['class_label'] == 'Positive', 'class_label'] = 1
    df.loc[df['class_label'] == 'Negative', 'class_label'] = 2

    return df


train_df = tsv_to_df(TEST_FILE_LOC)

texts = train_df['text'].tolist()
print(train_df)


                                                   text class_label
0     এমন কোনও রেস্টুরেন্ট হবে না যা আমি ফিরে আসব,যদ...           1
1     এমন কোনও রেস্টুরেন্ট হবে না যা আমি ফিরে আসব,যদ...           1
2     একটি সন্দেহ ছাড়া Orsay, NYC মধ্যে খাঁটি ফরাসি...           1
3                           কর্মচারী কোন অর্থহীনতা হয়।           1
4     চীনা ডেজার্টগুলি পরিচিত মিষ্টি ছাড়াও স্বাস্থ্...           1
...                                                 ...         ...
1527  সেখানে কোন পরিবেশ ছিল না,সেবা ছিল অনুপযুক্ত,এব...           2
1528  সেখানে কোন পরিবেশ ছিল না,সেবা ছিল অনুপযুক্ত,এব...           2
1529  সেখানে কোন পরিবেশ ছিল না,সেবা ছিল অনুপযুক্ত,এব...           2
1530             নির্দিষ্টভাবে এই এক আমার অন্তত প্রিয়।           2
1531  মাছ দুর্ভাগ্যবশত অতীতের মত অতীত হিসাবে প্রস্তু...           2

[1532 rows x 2 columns]


In [6]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  return {
      'accuracy': acc,
  }


In [7]:
def f1_calculator(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  f1 = f1_score(labels, preds, average='weighted')
  return {
      'f1': f1,
  }


In [8]:
train_list = tsv_to_text(TRAIN_FILE_LOC)
test_list = tsv_to_text(TEST_FILE_LOC)
eval_list = tsv_to_text(EVAL_FILE_LOC)


In [9]:
os.environ["WANDB_DISABLED"] = "true"

train_df = tsv_to_df(TRAIN_FILE_LOC)
test_df = tsv_to_df(TEST_FILE_LOC)
eval_df = tsv_to_df(EVAL_FILE_LOC)


class TINYDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(value[idx])
                for key, value in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

train_encodings = tokenizer(train_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
test_encodings = tokenizer(test_df['text'].tolist(
), padding=True, truncation=True, max_length=512)
eval_encodings = tokenizer(eval_df['text'].tolist(
), padding=True, truncation=True, max_length=512)

train_dataset = TINYDataset(train_encodings, train_df['class_label'].tolist())
test_dataset = TINYDataset(test_encodings, test_df['class_label'].tolist())
eval_dataset = TINYDataset(eval_encodings, eval_df['class_label'].tolist())

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14
model


Some weights of the model checkpoint at /home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert were not used when initializing BertForSequenceClassification: ['bert.encoder.layer.0.attention.self.value.weight_mask', 'bert.encoder.layer.0.attention.self.query.weight_mask', 'bert.encoder.layer.1.attention.self.value.weight_orig', 'bert.encoder.layer.1.attention.output.dense.weight_mask', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'bert.encoder.layer.0.intermediate.dense.weight_mask', 'bert.encoder.layer.0.output.dense.weight_orig', 'bert.encoder.layer.0.attention.self.key.weight_mask', 'bert.encoder.layer.1.intermediate.dense.weight_orig', 'bert.encoder.layer.1.attention.self.query.weight_mask', 'cls.predictions.bias', 'bert.encoder.layer.1.intermediate.dense.weight_mask', 'bert.encoder.layer.1.output.dense.weight_orig', 'bert.encoder.layer.0.attention.self.query.weight_orig', 'bert.encoder.l

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=128, out_features=128, bias=True)
              (key): Linear(in_features=128, out_features=128, bias=True)
              (value): Linear(in_features=128, out_features=128, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=128, out_features=128, bias=True)
              (LayerNorm): LayerNorm((128,), eps=1e-12, element

## 1 Epoch Train

In [10]:
training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=1,
    per_device_train_batch_size=48,
    per_device_eval_batch_size=96,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=115,
    save_steps=115,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset

)

trainer.train()


Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 5510
  Num Epochs = 1
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 48
  Gradient Accumulation steps = 1
  Total optimization steps = 115
100%|██████████| 115/115 [00:07<00:00, 24.80it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 96


{'loss': 1.0514, 'learning_rate': 1.1500000000000002e-05, 'epoch': 1.0}


                                                 
100%|██████████| 115/115 [00:07<00:00, 24.80it/s]Saving model checkpoint to temp/checkpoint-115
Configuration saved in temp/checkpoint-115/config.json


{'eval_loss': 0.9983900785446167, 'eval_runtime': 0.4604, 'eval_samples_per_second': 2993.117, 'eval_steps_per_second': 32.581, 'epoch': 1.0}


Model weights saved in temp/checkpoint-115/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-115 (score: 0.9983900785446167).
100%|██████████| 115/115 [00:09<00:00, 12.34it/s]

{'train_runtime': 9.348, 'train_samples_per_second': 589.429, 'train_steps_per_second': 12.302, 'train_loss': 1.051441822881284, 'epoch': 1.0}





TrainOutput(global_step=115, training_loss=1.051441822881284, metrics={'train_runtime': 9.348, 'train_samples_per_second': 589.429, 'train_steps_per_second': 12.302, 'train_loss': 1.051441822881284, 'epoch': 1.0})

## 100 Epoch with 1 epoch model

In [12]:
MODEL_NAME = '/home/abdullah/Code/dl/lt_bert/best_models/1/1_beng_sentiment_50'
model = BertForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=3).to('cuda')
model.manual_seed = 14

for name, param in model.named_parameters():
    if 'bert.encoder.layer.0' in name:
        # print(name, param.requires_grad)
        param.requires_grad = False
# print("\n\n\n")
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

training_args = TrainingArguments(
    output_dir=f"temp",
    num_train_epochs=100,
    per_device_train_batch_size=192,
    per_device_eval_batch_size=384,
    warmup_steps=500,
    learning_rate=5e-5,
    weight_decay=0.01,
    overwrite_output_dir=True,
    logging_dir=f"temp/logs",
    logging_steps=400,
    save_steps=400,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    seed=14,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=f1_calculator,

)

trainer.train()

# print(cool.metrics)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,

)

trainer.train()


loading configuration file /home/abdullah/Code/dl/lt_bert/best_models/1/1_beng_sentiment_50/config.json
Model config BertConfig {
  "_name_or_path": "/home/abdullah/Code/dl/lt_bert/best_models/pruned_best_tiny_bert",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 128,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 512,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 2,
  "num_hidden_layers": 2,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.12.5",
  "type_vocab_size": 2,
  "use_cache": true,
  "voc

{'loss': 0.8551, 'learning_rate': 4e-05, 'epoch': 13.79}


                                                  
 14%|█▍        | 400/2900 [00:52<05:31,  7.53it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 0.7117200493812561, 'eval_f1': 0.41176510095511615, 'eval_runtime': 0.3788, 'eval_samples_per_second': 3638.013, 'eval_steps_per_second': 10.56, 'epoch': 13.79}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 28%|██▊       | 800/2900 [01:46<04:33,  7.67it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.6939, 'learning_rate': 4.375e-05, 'epoch': 27.59}


                                                  
 28%|██▊       | 800/2900 [01:47<04:33,  7.67it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 0.6577465534210205, 'eval_f1': 0.6107355574817853, 'eval_runtime': 0.3612, 'eval_samples_per_second': 3814.79, 'eval_steps_per_second': 11.073, 'epoch': 27.59}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 41%|████▏     | 1200/2900 [02:41<03:40,  7.72it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.6162, 'learning_rate': 3.541666666666667e-05, 'epoch': 41.38}


                                                   
 41%|████▏     | 1200/2900 [02:41<03:40,  7.72it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 0.6204738020896912, 'eval_f1': 0.667007704987435, 'eval_runtime': 0.3691, 'eval_samples_per_second': 3733.634, 'eval_steps_per_second': 10.838, 'epoch': 41.38}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 55%|█████▌    | 1600/2900 [03:34<02:45,  7.86it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.5579, 'learning_rate': 2.7083333333333332e-05, 'epoch': 55.17}


                                                   
 55%|█████▌    | 1600/2900 [03:35<02:45,  7.86it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json
Model weights saved in temp/checkpoint-1600/pytorch_model.bin


{'eval_loss': 0.5906901955604553, 'eval_f1': 0.6817376628514681, 'eval_runtime': 0.3744, 'eval_samples_per_second': 3681.011, 'eval_steps_per_second': 10.685, 'epoch': 55.17}


 69%|██████▉   | 2000/2900 [04:28<01:56,  7.71it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.5299, 'learning_rate': 1.8750000000000002e-05, 'epoch': 68.97}


                                                   
 69%|██████▉   | 2000/2900 [04:28<01:56,  7.71it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 0.587469220161438, 'eval_f1': 0.6972847017702838, 'eval_runtime': 0.3611, 'eval_samples_per_second': 3816.38, 'eval_steps_per_second': 11.078, 'epoch': 68.97}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
 83%|████████▎ | 2400/2900 [05:21<01:05,  7.61it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.5184, 'learning_rate': 1.0416666666666668e-05, 'epoch': 82.76}


                                                   
 83%|████████▎ | 2400/2900 [05:22<01:05,  7.61it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 0.5900393128395081, 'eval_f1': 0.6984216068381424, 'eval_runtime': 0.369, 'eval_samples_per_second': 3734.324, 'eval_steps_per_second': 10.84, 'epoch': 82.76}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin
 97%|█████████▋| 2800/2900 [06:16<00:13,  7.56it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.5141, 'learning_rate': 2.0833333333333334e-06, 'epoch': 96.55}


                                                   
 97%|█████████▋| 2800/2900 [06:16<00:13,  7.56it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 0.5915799140930176, 'eval_f1': 0.6962001476749506, 'eval_runtime': 0.3942, 'eval_samples_per_second': 3495.545, 'eval_steps_per_second': 10.147, 'epoch': 96.55}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
100%|█████████▉| 2899/2900 [06:31<00:00,  7.68it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-2000 (score: 0.587469220161438).
100%|██████████| 2900/2900 [06:31<00:00,  7.41it/s]
***** Running training *****
  Num examples = 5510
  Num Epochs = 100
  Instantaneous batch size per device = 192
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 1
  Total optimization steps = 2900


{'train_runtime': 391.4339, 'train_samples_per_second': 1407.645, 'train_steps_per_second': 7.409, 'train_loss': 0.60874402802566, 'epoch': 100.0}


 14%|█▍        | 400/2900 [00:52<05:26,  7.66it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.5195, 'learning_rate': 4e-05, 'epoch': 13.79}



 14%|█▍        | 400/2900 [00:52<05:26,  7.66it/s]Saving model checkpoint to temp/checkpoint-400
Configuration saved in temp/checkpoint-400/config.json


{'eval_loss': 0.5869231820106506, 'eval_accuracy': 0.7017416545718432, 'eval_runtime': 0.3677, 'eval_samples_per_second': 3747.194, 'eval_steps_per_second': 10.877, 'epoch': 13.79}


Model weights saved in temp/checkpoint-400/pytorch_model.bin
 28%|██▊       | 800/2900 [01:46<04:33,  7.67it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.5086, 'learning_rate': 4.375e-05, 'epoch': 27.59}



 28%|██▊       | 800/2900 [01:46<04:33,  7.67it/s]Saving model checkpoint to temp/checkpoint-800
Configuration saved in temp/checkpoint-800/config.json


{'eval_loss': 0.5989853143692017, 'eval_accuracy': 0.6937590711175616, 'eval_runtime': 0.3613, 'eval_samples_per_second': 3814.481, 'eval_steps_per_second': 11.073, 'epoch': 27.59}


Model weights saved in temp/checkpoint-800/pytorch_model.bin
 41%|████▏     | 1200/2900 [02:40<03:41,  7.68it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.4919, 'learning_rate': 3.541666666666667e-05, 'epoch': 41.38}



 41%|████▏     | 1200/2900 [02:40<03:41,  7.68it/s]Saving model checkpoint to temp/checkpoint-1200
Configuration saved in temp/checkpoint-1200/config.json


{'eval_loss': 0.5988609194755554, 'eval_accuracy': 0.6959361393323658, 'eval_runtime': 0.3717, 'eval_samples_per_second': 3706.974, 'eval_steps_per_second': 10.76, 'epoch': 41.38}


Model weights saved in temp/checkpoint-1200/pytorch_model.bin
 55%|█████▌    | 1600/2900 [03:34<02:45,  7.84it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.4811, 'learning_rate': 2.7083333333333332e-05, 'epoch': 55.17}



 55%|█████▌    | 1600/2900 [03:34<02:45,  7.84it/s]Saving model checkpoint to temp/checkpoint-1600
Configuration saved in temp/checkpoint-1600/config.json


{'eval_loss': 0.597662091255188, 'eval_accuracy': 0.6944847605224964, 'eval_runtime': 0.369, 'eval_samples_per_second': 3734.579, 'eval_steps_per_second': 10.841, 'epoch': 55.17}


Model weights saved in temp/checkpoint-1600/pytorch_model.bin
 69%|██████▉   | 2000/2900 [04:27<01:56,  7.70it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.4717, 'learning_rate': 1.8750000000000002e-05, 'epoch': 68.97}



 69%|██████▉   | 2000/2900 [04:27<01:56,  7.70it/s]Saving model checkpoint to temp/checkpoint-2000
Configuration saved in temp/checkpoint-2000/config.json


{'eval_loss': 0.6043750047683716, 'eval_accuracy': 0.7002902757619739, 'eval_runtime': 0.3654, 'eval_samples_per_second': 3771.137, 'eval_steps_per_second': 10.947, 'epoch': 68.97}


Model weights saved in temp/checkpoint-2000/pytorch_model.bin
 83%|████████▎ | 2400/2900 [05:21<01:05,  7.61it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.4667, 'learning_rate': 1.0416666666666668e-05, 'epoch': 82.76}



 83%|████████▎ | 2400/2900 [05:21<01:05,  7.61it/s]Saving model checkpoint to temp/checkpoint-2400
Configuration saved in temp/checkpoint-2400/config.json


{'eval_loss': 0.609807014465332, 'eval_accuracy': 0.6988388969521045, 'eval_runtime': 0.3818, 'eval_samples_per_second': 3609.457, 'eval_steps_per_second': 10.477, 'epoch': 82.76}


Model weights saved in temp/checkpoint-2400/pytorch_model.bin
 97%|█████████▋| 2800/2900 [06:15<00:13,  7.43it/s]***** Running Evaluation *****
  Num examples = 1378
  Batch size = 384


{'loss': 0.464, 'learning_rate': 2.0833333333333334e-06, 'epoch': 96.55}



 97%|█████████▋| 2800/2900 [06:15<00:13,  7.43it/s]Saving model checkpoint to temp/checkpoint-2800
Configuration saved in temp/checkpoint-2800/config.json


{'eval_loss': 0.6118476986885071, 'eval_accuracy': 0.6981132075471698, 'eval_runtime': 0.3637, 'eval_samples_per_second': 3788.828, 'eval_steps_per_second': 10.998, 'epoch': 96.55}


Model weights saved in temp/checkpoint-2800/pytorch_model.bin
100%|█████████▉| 2899/2900 [06:30<00:00,  7.60it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from temp/checkpoint-400 (score: 0.5869231820106506).
100%|██████████| 2900/2900 [06:30<00:00,  7.43it/s]

{'train_runtime': 390.3079, 'train_samples_per_second': 1411.706, 'train_steps_per_second': 7.43, 'train_loss': 0.4853704005274279, 'epoch': 100.0}





TrainOutput(global_step=2900, training_loss=0.4853704005274279, metrics={'train_runtime': 390.3079, 'train_samples_per_second': 1411.706, 'train_steps_per_second': 7.43, 'train_loss': 0.4853704005274279, 'epoch': 100.0})