In [8]:
from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM, BertweetTokenizer

config = AutoConfig.from_pretrained('vinai/bertweet-base')
tokenizer = AutoTokenizer.from_pretrained('vinai/bertweet-base')
model = AutoModelForMaskedLM.from_config(config)

Downloading: 100%|██████████| 558/558 [00:00<00:00, 559kB/s]
Downloading: 100%|██████████| 824k/824k [00:01<00:00, 598kB/s] 
Downloading: 100%|██████████| 1.03M/1.03M [00:01<00:00, 674kB/s] 
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
from glob import glob
from datasets import Dataset

import re
import pandas as pd
def mask_data_loading(url):
    def stock_symbol_mask(sentense):
        pattern = r'\$[A-Z]*'
        # symbol += re.findall(pattern, sentense)
        result = re.sub(pattern, tokenizer.mask_token, sentense)

        return result

    with open(url, 'r', encoding='utf-8') as f:
        df = pd.read_json(f)
        data = df.copy()
        data = data.loc[df['sentiment'].notnull()]
        data['sentiment'] = pd.Categorical(data['sentiment'])
        data['sentense'] = data['body'].map(stock_symbol_mask)
        data['labels'] = data['body']
        symbols = set()
        for symbol_list in data['body'].str.findall(r'\$[A-Z]+'):
            for symbol in symbol_list:
                symbols.add(symbol)
        return data, symbols

data_url = '../crawler/stock/data/**.json'
url = glob(data_url)[-1]
data, symbols = mask_data_loading(url)

dataset = Dataset.from_pandas(data.loc[:, ['labels', 'sentense']])
dataset = dataset.remove_columns('__index_level_0__')

special_tokens_dict = {'additional_special_tokens': list(symbols)}
tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))

Embedding(29625, 768)

In [7]:
tokenizer.save_pretrained('./symbol-vocab')

('./symbol-vocab\\tokenizer_config.json',
 './symbol-vocab\\special_tokens_map.json',
 './symbol-vocab\\vocab.txt',
 './symbol-vocab\\added_tokens.json',
 './symbol-vocab\\tokenizer.json')

In [3]:
from parse import normalize_except_compony 

def encode(example):
    sentense = normalize_except_compony(example['sentense'])
    label = normalize_except_compony(example['labels'])

    label = tokenizer(label, padding='max_length', truncation=True)
    # 101, 51, 1234, 12541, 151
    result = tokenizer(sentense, padding='max_length', truncation=True)
    # 101, 103, 103, 103
    result['label_ids'] = label['input_ids']

    # masked_position = [i for i in range(len(result['input_ids'])) if result['input_ids'][i] == tokenizer.mask_token_id]
    # result['decoder_input_ids'] = label['input_ids']
    # result['labels'] = [-100 for i in label['input_ids']]
    # for i in range(len(result['labels'])):
    #     if not i in masked_position:
    #         result['labels'][i] = -100

    return result

encoded_dataset = dataset.map(encode, batched=True)

# print(encoded_dataset[0]['sentense'])
# print(encoded_dataset[0]['input_ids'])
# print(encoded_dataset[0]['labels'])
# print(encoded_dataset[0]['label_ids'])
# print(tokenizer.ids_to_tokens)
# print(tokenizer.decode(encoded_dataset[0]['label_ids']))

100%|██████████| 15/15 [00:05<00:00,  2.86ba/s]


In [4]:
from transformers import DataCollatorForLanguageModeling

# data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)

# samples = encoded_dataset[:2]
# for chunk in data_collator(samples)["input_ids"]:
#     print(f"\n'>>> {tokenizer.decode(chunk)}'")

# Training

In [5]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [6]:
from transformers import TrainingArguments, Trainer, Seq2SeqTrainingArguments, Seq2SeqTrainer

# training_args = TrainingArguments(
#     output_dir="test_trainer",
#     per_device_train_batch_size=2,
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=encoded_dataset,
#     compute_metrics=compute_metrics,
# )

# Seq2SeqTrainer
training_args = Seq2SeqTrainingArguments(
    output_dir="test_trainer",
    per_device_train_batch_size=4,
    num_train_epochs=50,
    save_steps=10000
)

train_datset = encoded_dataset.shuffle().select(range(4000))

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_datset,
    compute_metrics=compute_metrics,
)

trainer.train()

The following columns in the training set  don't have a corresponding argument in `BertForMaskedLM.forward` and have been ignored: sentense. If sentense are not expected by `BertForMaskedLM.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4000
  Num Epochs = 50
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 50000
  1%|          | 500/50000 [02:55<4:47:32,  2.87it/s]

{'loss': 1.7203, 'learning_rate': 4.9500000000000004e-05, 'epoch': 0.5}


  2%|▏         | 1000/50000 [05:48<4:27:32,  3.05it/s]

{'loss': 0.0397, 'learning_rate': 4.9e-05, 'epoch': 1.0}


  3%|▎         | 1500/50000 [08:32<4:25:09,  3.05it/s]

{'loss': 0.0184, 'learning_rate': 4.85e-05, 'epoch': 1.5}


  4%|▍         | 2000/50000 [11:16<4:21:45,  3.06it/s]

{'loss': 0.015, 'learning_rate': 4.8e-05, 'epoch': 2.0}


  5%|▌         | 2500/50000 [14:00<4:18:44,  3.06it/s]

{'loss': 0.01, 'learning_rate': 4.75e-05, 'epoch': 2.5}


  6%|▌         | 3000/50000 [16:45<4:18:41,  3.03it/s]

{'loss': 0.0098, 'learning_rate': 4.7e-05, 'epoch': 3.0}


  7%|▋         | 3500/50000 [19:30<4:13:05,  3.06it/s]

{'loss': 0.0073, 'learning_rate': 4.6500000000000005e-05, 'epoch': 3.5}


  8%|▊         | 4000/50000 [22:14<4:10:15,  3.06it/s]

{'loss': 0.0081, 'learning_rate': 4.600000000000001e-05, 'epoch': 4.0}


  9%|▉         | 4500/50000 [24:57<4:09:04,  3.04it/s]

{'loss': 0.0067, 'learning_rate': 4.55e-05, 'epoch': 4.5}


 10%|█         | 5000/50000 [27:41<4:05:37,  3.05it/s]

{'loss': 0.0068, 'learning_rate': 4.5e-05, 'epoch': 5.0}


 11%|█         | 5500/50000 [30:26<4:06:05,  3.01it/s]

{'loss': 0.0062, 'learning_rate': 4.4500000000000004e-05, 'epoch': 5.5}


 12%|█▏        | 6000/50000 [33:11<4:02:39,  3.02it/s]

{'loss': 0.0063, 'learning_rate': 4.4000000000000006e-05, 'epoch': 6.0}


 13%|█▎        | 6500/50000 [35:56<3:58:01,  3.05it/s]

{'loss': 0.0056, 'learning_rate': 4.35e-05, 'epoch': 6.5}


 14%|█▍        | 7000/50000 [38:39<3:55:09,  3.05it/s]

{'loss': 0.0057, 'learning_rate': 4.3e-05, 'epoch': 7.0}


 15%|█▌        | 7500/50000 [41:23<3:52:51,  3.04it/s]

{'loss': 0.0047, 'learning_rate': 4.25e-05, 'epoch': 7.5}


 16%|█▌        | 8000/50000 [44:08<3:52:03,  3.02it/s]

{'loss': 0.0051, 'learning_rate': 4.2e-05, 'epoch': 8.0}


 17%|█▋        | 8500/50000 [46:53<3:49:35,  3.01it/s]

{'loss': 0.0039, 'learning_rate': 4.15e-05, 'epoch': 8.5}


 18%|█▊        | 9000/50000 [49:37<3:44:25,  3.04it/s]

{'loss': 0.0038, 'learning_rate': 4.1e-05, 'epoch': 9.0}


 19%|█▉        | 9500/50000 [52:21<3:40:07,  3.07it/s]

{'loss': 0.0029, 'learning_rate': 4.05e-05, 'epoch': 9.5}


 20%|██        | 10000/50000 [55:05<3:37:11,  3.07it/s]Saving model checkpoint to test_trainer\checkpoint-10000
Configuration saved in test_trainer\checkpoint-10000\config.json


{'loss': 0.0031, 'learning_rate': 4e-05, 'epoch': 10.0}


Model weights saved in test_trainer\checkpoint-10000\pytorch_model.bin
 21%|██        | 10500/50000 [57:52<3:37:38,  3.02it/s] 

{'loss': 0.0022, 'learning_rate': 3.9500000000000005e-05, 'epoch': 10.5}


 22%|██▏       | 11000/50000 [1:00:38<3:34:42,  3.03it/s]

{'loss': 0.0023, 'learning_rate': 3.9000000000000006e-05, 'epoch': 11.0}


 23%|██▎       | 11500/50000 [1:03:23<3:30:22,  3.05it/s]

{'loss': 0.0017, 'learning_rate': 3.85e-05, 'epoch': 11.5}


 24%|██▍       | 12000/50000 [1:06:07<3:26:17,  3.07it/s]

{'loss': 0.0017, 'learning_rate': 3.8e-05, 'epoch': 12.0}


 25%|██▌       | 12500/50000 [1:08:51<3:25:04,  3.05it/s]

{'loss': 0.0014, 'learning_rate': 3.7500000000000003e-05, 'epoch': 12.5}


 26%|██▌       | 13000/50000 [1:11:35<3:23:28,  3.03it/s]

{'loss': 0.0015, 'learning_rate': 3.7e-05, 'epoch': 13.0}


 27%|██▋       | 13500/50000 [1:14:20<3:21:42,  3.02it/s]

{'loss': 0.001, 'learning_rate': 3.65e-05, 'epoch': 13.5}


 28%|██▊       | 14000/50000 [1:17:05<3:16:50,  3.05it/s]

{'loss': 0.0012, 'learning_rate': 3.6e-05, 'epoch': 14.0}


 29%|██▉       | 14500/50000 [1:19:49<3:13:17,  3.06it/s]

{'loss': 0.0008, 'learning_rate': 3.55e-05, 'epoch': 14.5}


 30%|███       | 15000/50000 [1:22:33<3:11:21,  3.05it/s]

{'loss': 0.0008, 'learning_rate': 3.5e-05, 'epoch': 15.0}


 31%|███       | 15500/50000 [1:25:17<3:07:43,  3.06it/s]

{'loss': 0.0007, 'learning_rate': 3.45e-05, 'epoch': 15.5}


 32%|███▏      | 16000/50000 [1:28:02<3:07:41,  3.02it/s]

{'loss': 0.0008, 'learning_rate': 3.4000000000000007e-05, 'epoch': 16.0}


 33%|███▎      | 16500/50000 [1:30:47<3:02:10,  3.06it/s]

{'loss': 0.0006, 'learning_rate': 3.35e-05, 'epoch': 16.5}


 34%|███▍      | 17000/50000 [1:33:31<3:00:06,  3.05it/s]

{'loss': 0.0006, 'learning_rate': 3.3e-05, 'epoch': 17.0}


 35%|███▌      | 17500/50000 [1:36:15<2:57:54,  3.04it/s]

{'loss': 0.0004, 'learning_rate': 3.2500000000000004e-05, 'epoch': 17.5}


 36%|███▌      | 18000/50000 [1:38:59<2:54:42,  3.05it/s]

{'loss': 0.0006, 'learning_rate': 3.2000000000000005e-05, 'epoch': 18.0}


 37%|███▋      | 18500/50000 [1:41:44<2:54:40,  3.01it/s]

{'loss': 0.0005, 'learning_rate': 3.15e-05, 'epoch': 18.5}


 38%|███▊      | 19000/50000 [1:44:31<2:49:38,  3.05it/s]

{'loss': 0.0004, 'learning_rate': 3.1e-05, 'epoch': 19.0}


 39%|███▉      | 19500/50000 [1:47:15<2:47:14,  3.04it/s]

{'loss': 0.0004, 'learning_rate': 3.05e-05, 'epoch': 19.5}


 40%|████      | 20000/50000 [1:49:59<2:44:54,  3.03it/s]Saving model checkpoint to test_trainer\checkpoint-20000
Configuration saved in test_trainer\checkpoint-20000\config.json


{'loss': 0.0004, 'learning_rate': 3e-05, 'epoch': 20.0}


Model weights saved in test_trainer\checkpoint-20000\pytorch_model.bin
 41%|████      | 20500/50000 [1:52:47<2:40:49,  3.06it/s] 

{'loss': 0.0003, 'learning_rate': 2.95e-05, 'epoch': 20.5}


 42%|████▏     | 21000/50000 [1:55:31<2:39:02,  3.04it/s]

{'loss': 0.0004, 'learning_rate': 2.9e-05, 'epoch': 21.0}


 43%|████▎     | 21500/50000 [1:58:15<2:36:13,  3.04it/s]

{'loss': 0.0004, 'learning_rate': 2.8499999999999998e-05, 'epoch': 21.5}


 44%|████▍     | 22000/50000 [2:00:59<2:32:57,  3.05it/s]

{'loss': 0.0004, 'learning_rate': 2.8000000000000003e-05, 'epoch': 22.0}


 45%|████▌     | 22500/50000 [2:03:43<2:30:15,  3.05it/s]

{'loss': 0.0004, 'learning_rate': 2.7500000000000004e-05, 'epoch': 22.5}


 46%|████▌     | 23000/50000 [2:06:27<2:27:29,  3.05it/s]

{'loss': 0.0004, 'learning_rate': 2.7000000000000002e-05, 'epoch': 23.0}


 47%|████▋     | 23500/50000 [2:09:11<2:25:00,  3.05it/s]

{'loss': 0.0003, 'learning_rate': 2.6500000000000004e-05, 'epoch': 23.5}


 48%|████▊     | 24000/50000 [2:11:55<2:21:39,  3.06it/s]

{'loss': 0.0003, 'learning_rate': 2.6000000000000002e-05, 'epoch': 24.0}


 49%|████▉     | 24500/50000 [2:14:38<2:19:25,  3.05it/s]

{'loss': 0.0002, 'learning_rate': 2.5500000000000003e-05, 'epoch': 24.5}


 50%|█████     | 25000/50000 [2:17:22<2:16:53,  3.04it/s]

{'loss': 0.0003, 'learning_rate': 2.5e-05, 'epoch': 25.0}


 51%|█████     | 25500/50000 [2:20:06<2:13:48,  3.05it/s]

{'loss': 0.0003, 'learning_rate': 2.45e-05, 'epoch': 25.5}


 52%|█████▏    | 26000/50000 [2:22:50<2:11:34,  3.04it/s]

{'loss': 0.0003, 'learning_rate': 2.4e-05, 'epoch': 26.0}


 53%|█████▎    | 26500/50000 [2:25:34<2:08:36,  3.05it/s]

{'loss': 0.0004, 'learning_rate': 2.35e-05, 'epoch': 26.5}


 54%|█████▍    | 27000/50000 [2:28:18<2:05:49,  3.05it/s]

{'loss': 0.0003, 'learning_rate': 2.3000000000000003e-05, 'epoch': 27.0}


 55%|█████▌    | 27500/50000 [2:31:02<2:02:56,  3.05it/s]

{'loss': 0.0002, 'learning_rate': 2.25e-05, 'epoch': 27.5}


 56%|█████▌    | 28000/50000 [2:33:46<1:59:43,  3.06it/s]

{'loss': 0.0003, 'learning_rate': 2.2000000000000003e-05, 'epoch': 28.0}


 57%|█████▋    | 28500/50000 [2:36:29<1:57:14,  3.06it/s]

{'loss': 0.0002, 'learning_rate': 2.15e-05, 'epoch': 28.5}


 58%|█████▊    | 29000/50000 [2:39:13<1:55:05,  3.04it/s]

{'loss': 0.0002, 'learning_rate': 2.1e-05, 'epoch': 29.0}


 59%|█████▉    | 29500/50000 [2:41:57<1:52:16,  3.04it/s]

{'loss': 0.0002, 'learning_rate': 2.05e-05, 'epoch': 29.5}


 60%|██████    | 30000/50000 [2:44:41<1:49:47,  3.04it/s]Saving model checkpoint to test_trainer\checkpoint-30000
Configuration saved in test_trainer\checkpoint-30000\config.json


{'loss': 0.0002, 'learning_rate': 2e-05, 'epoch': 30.0}


Model weights saved in test_trainer\checkpoint-30000\pytorch_model.bin
 61%|██████    | 30500/50000 [2:47:30<1:45:54,  3.07it/s]

{'loss': 0.0002, 'learning_rate': 1.9500000000000003e-05, 'epoch': 30.5}


 62%|██████▏   | 31000/50000 [2:50:13<1:43:56,  3.05it/s]

{'loss': 0.0002, 'learning_rate': 1.9e-05, 'epoch': 31.0}


 63%|██████▎   | 31500/50000 [2:52:57<1:40:37,  3.06it/s]

{'loss': 0.0001, 'learning_rate': 1.85e-05, 'epoch': 31.5}


 64%|██████▍   | 32000/50000 [2:55:41<1:38:05,  3.06it/s]

{'loss': 0.0003, 'learning_rate': 1.8e-05, 'epoch': 32.0}


 65%|██████▌   | 32500/50000 [2:58:25<1:35:20,  3.06it/s]

{'loss': 0.0001, 'learning_rate': 1.75e-05, 'epoch': 32.5}


 66%|██████▌   | 33000/50000 [3:01:09<1:33:30,  3.03it/s]

{'loss': 0.0002, 'learning_rate': 1.7000000000000003e-05, 'epoch': 33.0}


 67%|██████▋   | 33500/50000 [3:03:53<1:30:09,  3.05it/s]

{'loss': 0.0002, 'learning_rate': 1.65e-05, 'epoch': 33.5}


 68%|██████▊   | 34000/50000 [3:06:37<1:27:40,  3.04it/s]

{'loss': 0.0002, 'learning_rate': 1.6000000000000003e-05, 'epoch': 34.0}


 69%|██████▉   | 34500/50000 [3:09:21<1:24:30,  3.06it/s]

{'loss': 0.0002, 'learning_rate': 1.55e-05, 'epoch': 34.5}


 70%|███████   | 35000/50000 [3:12:05<1:22:19,  3.04it/s]

{'loss': 0.0002, 'learning_rate': 1.5e-05, 'epoch': 35.0}


 71%|███████   | 35500/50000 [3:14:49<1:18:31,  3.08it/s]

{'loss': 0.0003, 'learning_rate': 1.45e-05, 'epoch': 35.5}


 72%|███████▏  | 36000/50000 [3:17:32<1:16:04,  3.07it/s]

{'loss': 0.0001, 'learning_rate': 1.4000000000000001e-05, 'epoch': 36.0}


 73%|███████▎  | 36500/50000 [3:20:16<1:13:40,  3.05it/s]

{'loss': 0.0002, 'learning_rate': 1.3500000000000001e-05, 'epoch': 36.5}


 74%|███████▍  | 37000/50000 [3:23:00<1:11:10,  3.04it/s]

{'loss': 0.0002, 'learning_rate': 1.3000000000000001e-05, 'epoch': 37.0}


 75%|███████▌  | 37500/50000 [3:25:44<1:08:36,  3.04it/s]

{'loss': 0.0001, 'learning_rate': 1.25e-05, 'epoch': 37.5}


 76%|███████▌  | 38000/50000 [3:28:28<1:05:21,  3.06it/s]

{'loss': 0.0001, 'learning_rate': 1.2e-05, 'epoch': 38.0}


 77%|███████▋  | 38500/50000 [3:31:12<1:03:02,  3.04it/s]

{'loss': 0.0001, 'learning_rate': 1.1500000000000002e-05, 'epoch': 38.5}


 78%|███████▊  | 39000/50000 [3:33:56<1:00:04,  3.05it/s]

{'loss': 0.0002, 'learning_rate': 1.1000000000000001e-05, 'epoch': 39.0}


 79%|███████▉  | 39500/50000 [3:36:40<57:19,  3.05it/s]  

{'loss': 0.0001, 'learning_rate': 1.05e-05, 'epoch': 39.5}


 80%|████████  | 40000/50000 [3:39:24<54:48,  3.04it/s]Saving model checkpoint to test_trainer\checkpoint-40000
Configuration saved in test_trainer\checkpoint-40000\config.json


{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 40.0}


Model weights saved in test_trainer\checkpoint-40000\pytorch_model.bin
 81%|████████  | 40500/50000 [3:42:12<51:37,  3.07it/s]  

{'loss': 0.0001, 'learning_rate': 9.5e-06, 'epoch': 40.5}


 82%|████████▏ | 41000/50000 [3:44:55<49:03,  3.06it/s]

{'loss': 0.0001, 'learning_rate': 9e-06, 'epoch': 41.0}


 83%|████████▎ | 41500/50000 [3:47:39<46:11,  3.07it/s]

{'loss': 0.0001, 'learning_rate': 8.500000000000002e-06, 'epoch': 41.5}


 84%|████████▍ | 42000/50000 [3:50:23<43:50,  3.04it/s]

{'loss': 0.0001, 'learning_rate': 8.000000000000001e-06, 'epoch': 42.0}


 85%|████████▌ | 42500/50000 [3:53:07<41:01,  3.05it/s]

{'loss': 0.0001, 'learning_rate': 7.5e-06, 'epoch': 42.5}


 86%|████████▌ | 43000/50000 [3:55:50<38:09,  3.06it/s]

{'loss': 0.0002, 'learning_rate': 7.000000000000001e-06, 'epoch': 43.0}


 87%|████████▋ | 43500/50000 [3:58:34<35:31,  3.05it/s]

{'loss': 0.0001, 'learning_rate': 6.5000000000000004e-06, 'epoch': 43.5}


 88%|████████▊ | 44000/50000 [4:01:18<32:36,  3.07it/s]

{'loss': 0.0002, 'learning_rate': 6e-06, 'epoch': 44.0}


 89%|████████▉ | 44500/50000 [4:04:02<30:08,  3.04it/s]

{'loss': 0.0001, 'learning_rate': 5.500000000000001e-06, 'epoch': 44.5}


 90%|█████████ | 45000/50000 [4:06:46<27:22,  3.04it/s]

{'loss': 0.0001, 'learning_rate': 5e-06, 'epoch': 45.0}


 91%|█████████ | 45500/50000 [4:09:30<24:36,  3.05it/s]

{'loss': 0.0001, 'learning_rate': 4.5e-06, 'epoch': 45.5}


 92%|█████████▏| 46000/50000 [4:12:13<21:50,  3.05it/s]

{'loss': 0.0002, 'learning_rate': 4.000000000000001e-06, 'epoch': 46.0}


 93%|█████████▎| 46500/50000 [4:14:57<19:07,  3.05it/s]

{'loss': 0.0001, 'learning_rate': 3.5000000000000004e-06, 'epoch': 46.5}


 94%|█████████▍| 47000/50000 [4:17:41<16:28,  3.04it/s]

{'loss': 0.0001, 'learning_rate': 3e-06, 'epoch': 47.0}


 95%|█████████▌| 47500/50000 [4:20:25<13:42,  3.04it/s]

{'loss': 0.0001, 'learning_rate': 2.5e-06, 'epoch': 47.5}


 96%|█████████▌| 48000/50000 [4:23:09<10:55,  3.05it/s]

{'loss': 0.0001, 'learning_rate': 2.0000000000000003e-06, 'epoch': 48.0}


 97%|█████████▋| 48500/50000 [4:25:53<08:11,  3.05it/s]

{'loss': 0.0001, 'learning_rate': 1.5e-06, 'epoch': 48.5}


 98%|█████████▊| 49000/50000 [4:28:36<05:27,  3.05it/s]

{'loss': 0.0001, 'learning_rate': 1.0000000000000002e-06, 'epoch': 49.0}


 99%|█████████▉| 49500/50000 [4:31:20<02:43,  3.07it/s]

{'loss': 0.0001, 'learning_rate': 5.000000000000001e-07, 'epoch': 49.5}


100%|██████████| 50000/50000 [4:34:04<00:00,  3.05it/s]Saving model checkpoint to test_trainer\checkpoint-50000
Configuration saved in test_trainer\checkpoint-50000\config.json


{'loss': 0.0001, 'learning_rate': 0.0, 'epoch': 50.0}


Model weights saved in test_trainer\checkpoint-50000\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 50000/50000 [4:34:09<00:00,  3.04it/s]

{'train_runtime': 16449.4913, 'train_samples_per_second': 12.158, 'train_steps_per_second': 3.04, 'train_loss': 0.019213099329695105, 'epoch': 50.0}





TrainOutput(global_step=50000, training_loss=0.019213099329695105, metrics={'train_runtime': 16449.4913, 'train_samples_per_second': 12.158, 'train_steps_per_second': 3.04, 'train_loss': 0.019213099329695105, 'epoch': 50.0})

In [7]:
from transformers import pipeline

fill_mask = pipeline(
    "fill-mask",
    model=model,
    tokenizer=tokenizer,
    device=0
)

s = '[MASK] to the sky!!!'
fill_mask(s)

[{'score': 0.9999961853027344,
  'token': 29304,
  'token_str': '$TSLA',
  'sequence': 'to the sky!!!'},
 {'score': 2.9771953791168926e-07,
  'token': 29623,
  'token_str': '$TSM',
  'sequence': 'to the sky!!!'},
 {'score': 2.534041811941279e-07,
  'token': 29364,
  'token_str': '$MSFT',
  'sequence': 'to the sky!!!'},
 {'score': 2.319639662573536e-07,
  'token': 29125,
  'token_str': '$FB',
  'sequence': 'to the sky!!!'},
 {'score': 1.744811584103445e-07,
  'token': 29180,
  'token_str': '$MULN',
  'sequence': 'to the sky!!!'}]

In [8]:
import torch
torch.cuda.empty_cache()
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    1673 MB |    4893 MB |  613870 GB |  613869 GB |
|       from large pool |    1671 MB |    4890 MB |  613778 GB |  613777 GB |
|       from small pool |       2 MB |       2 MB |      91 GB |      91 GB |
|---------------------------------------------------------------------------|
| Active memory         |    1673 MB |    4893 MB |  613870 GB |  613869 GB |
|       from large pool |    1671 MB |    4890 MB |  613778 GB |  613777 GB |
|       from small pool |       2 MB |       2 MB |      91 GB |      91 GB |
|---------------------------------------------------------------