In [6]:
from tokenizers import BertWordPieceTokenizer
import pandas as pd
import os
import string
import re

In [7]:
PATH = 'C:/Users/bill/Documents/projects/data/chatbot'

In [8]:
def cleanse(text):
    text = ''.join([ x for x in str(text).lower() if x in string.printable ])
    text = re.sub(r'\b\d+\b', '', text)
    text = text.replace('\n', ' ')
    text = re.sub(r'[#`\*\"“”\n\\…\+\-\/\=\(\)‘•:\[\]\|’\!;$,?\'%]', ' ', text)
    text = re.sub('\s+', ' ', text)
    return text

data = pd.read_csv(os.path.join(PATH, 'cnv_2019'), sep='\t', names=[
    'rpt_mnth', 'cnv_id', 'msg_id', 'agent', 'msg_type', 'channel', 
    'creat_ts', 'text', 'intent', 'score', 'resp_cd'
])
data = data[data['agent'].isin(['coremobile', 'ccp'])]['text'].dropna().apply(cleanse).drop_duplicates().reset_index(drop=True)
data.to_csv(os.path.join(PATH, 'tokenizer/sample'), index=False, header=False)

In [9]:
!head $PATH/tokenizer/sample

i ordered authorized users on my platinum card. want to see when they expect to receive it. if tracking is available
tracking for additional cards platinum
other card has not been mailed yet you will be notified once its mailed.
hi alan thanks for contacting us could you please elaborate your query so that i can help you with it
i see one of the card has been mailed on jan under rush delivery maximum time is working days however you will receive it soon as its under rush delivery.
no chat with a customer care professional
hi joshua i am sorry to hear about your decision to cancel the card i am here to honor your request please help me with the reason for the cancellation of the card.
cancel a card
hi shashikant thank you for messaging us. i can hep you with card cancellation. if you don t mind may i know the reason of cancellation 
im no longer going to use the card


In [10]:
tokenizer = BertWordPieceTokenizer()
tokenizer.train(files=os.path.join(PATH, 'tokenizer/sample'), vocab_size=30000, min_frequency=3)

In [11]:
tokenizer.save(os.path.join(PATH, 'tokenizer'))

['C:/Users/bill/Documents/projects/data/chatbot\\tokenizer\\vocab.txt']

In [12]:
tokenizer = BertWordPieceTokenizer(os.path.join(PATH, 'tokenizer/vocab.txt'))

In [24]:
from transformers import DistilBertConfig

config = DistilBertConfig(
    vocab_size=30000,
    max_position_embeddings=512,
    n_heads=6,
    n_layers=3
)

In [25]:
from transformers import LineByLineTextDataset, DistilBertTokenizer, DistilBertForMaskedLM

tokenizer = DistilBertTokenizer.from_pretrained(os.path.join(PATH, 'tokenizer/vocab.txt'), do_lower_case=True, max_len=512)
#model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased')
model = DistilBertForMaskedLM(config=config).from_pretrained('distilbert-base-uncased')

dataset = LineByLineTextDataset(
    tokenizer=tokenizer,
    file_path=os.path.join(PATH, 'tokenizer/sample'),
    block_size=128,
)

Calling DistilBertTokenizer.from_pretrained() with the path to a single file or url is deprecated


In [26]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=True, mlm_probability=0.15
)

In [27]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir=os.path.join(PATH, 'distilbert'),
    overwrite_output_dir=True,
    num_train_epochs=5,
    per_gpu_train_batch_size=32,
    save_steps=10000,
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset,
    prediction_loss_only=True,
)

In [28]:
trainer.train()

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=246440.0, style=ProgressStyle(description…

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha)


{"learning_rate": 4.997971108586268e-05, "loss": 6.128949224472046, "step": 500}
{"learning_rate": 4.9959422171725374e-05, "loss": 5.802680884361267, "step": 1000}
{"learning_rate": 4.9939133257588053e-05, "loss": 5.359263319015503, "step": 1500}
{"learning_rate": 4.9918844343450746e-05, "loss": 4.659339546203613, "step": 2000}
{"learning_rate": 4.9898555429313425e-05, "loss": 4.230475105762482, "step": 2500}
{"learning_rate": 4.987826651517611e-05, "loss": 3.9149328083992003, "step": 3000}
{"learning_rate": 4.98579776010388e-05, "loss": 3.7832697257995607, "step": 3500}
{"learning_rate": 4.9837688686901476e-05, "loss": 3.568826804637909, "step": 4000}
{"learning_rate": 4.981739977276416e-05, "loss": 3.4469726157188414, "step": 4500}
{"learning_rate": 4.979711085862685e-05, "loss": 3.3241396498680116, "step": 5000}
{"learning_rate": 4.9776821944489534e-05, "loss": 3.2045674166679383, "step": 5500}
{"learning_rate": 4.975653303035222e-05, "loss": 3.1618262119293212, "step": 6000}
{"lear



{"learning_rate": 4.957393280311638e-05, "loss": 2.7053635318279268, "step": 10500}
{"learning_rate": 4.955364388897906e-05, "loss": 2.621333956718445, "step": 11000}
{"learning_rate": 4.953335497484175e-05, "loss": 2.643410466194153, "step": 11500}
{"learning_rate": 4.951306606070443e-05, "loss": 2.6186161227226257, "step": 12000}
{"learning_rate": 4.9492777146567124e-05, "loss": 2.596000167131424, "step": 12500}
{"learning_rate": 4.94724882324298e-05, "loss": 2.5665775735378267, "step": 13000}
{"learning_rate": 4.945219931829249e-05, "loss": 2.552228819847107, "step": 13500}
{"learning_rate": 4.9431910404155175e-05, "loss": 2.5118647100925444, "step": 14000}
{"learning_rate": 4.9411621490017854e-05, "loss": 2.522433589696884, "step": 14500}
{"learning_rate": 4.939133257588054e-05, "loss": 2.5040662636756896, "step": 15000}
{"learning_rate": 4.9371043661743226e-05, "loss": 2.4685007209777834, "step": 15500}
{"learning_rate": 4.935075474760591e-05, "loss": 2.432910655260086, "step": 16

{"learning_rate": 4.7585619217659474e-05, "loss": 1.9016480882167817, "step": 59500}
{"learning_rate": 4.756533030352216e-05, "loss": 1.9052011079788207, "step": 60000}
{"learning_rate": 4.754504138938484e-05, "loss": 1.9043810179233551, "step": 60500}
{"learning_rate": 4.7524752475247525e-05, "loss": 1.9105456326007844, "step": 61000}
{"learning_rate": 4.750446356111021e-05, "loss": 1.911097437620163, "step": 61500}
{"learning_rate": 4.74841746469729e-05, "loss": 1.911690155506134, "step": 62000}
{"learning_rate": 4.746388573283558e-05, "loss": 1.880244991183281, "step": 62500}
{"learning_rate": 4.744359681869827e-05, "loss": 1.866886534333229, "step": 63000}
{"learning_rate": 4.742330790456095e-05, "loss": 1.8720847388505937, "step": 63500}
{"learning_rate": 4.7403018990423634e-05, "loss": 1.8840956897735597, "step": 64000}
{"learning_rate": 4.738273007628632e-05, "loss": 1.8646244925260544, "step": 64500}
{"learning_rate": 4.7362441162149e-05, "loss": 1.8871823093891145, "step": 650

{"learning_rate": 4.559730563220257e-05, "loss": 1.7383772941827773, "step": 108500}
{"learning_rate": 4.5577016718065254e-05, "loss": 1.7687381777763367, "step": 109000}
{"learning_rate": 4.555672780392793e-05, "loss": 1.7648169734477996, "step": 109500}
{"learning_rate": 4.5536438889790626e-05, "loss": 1.7507342969179154, "step": 110000}
{"learning_rate": 4.5516149975653305e-05, "loss": 1.7386659895181655, "step": 110500}
{"learning_rate": 4.549586106151599e-05, "loss": 1.7125080479383468, "step": 111000}
{"learning_rate": 4.5475572147378677e-05, "loss": 1.754179223060608, "step": 111500}
{"learning_rate": 4.5455283233241356e-05, "loss": 1.7469014900922775, "step": 112000}
{"learning_rate": 4.543499431910405e-05, "loss": 1.7653832006454468, "step": 112500}
{"learning_rate": 4.541470540496673e-05, "loss": 1.744118652701378, "step": 113000}
{"learning_rate": 4.5394416490829414e-05, "loss": 1.7512749701738357, "step": 113500}
{"learning_rate": 4.53741275766921e-05, "loss": 1.71605557060

{"learning_rate": 4.3629280960882976e-05, "loss": 1.6722978160381317, "step": 157000}
{"learning_rate": 4.360899204674566e-05, "loss": 1.6812296562194824, "step": 157500}
{"learning_rate": 4.358870313260834e-05, "loss": 1.6579440114498138, "step": 158000}
{"learning_rate": 4.356841421847103e-05, "loss": 1.6519947255849838, "step": 158500}
{"learning_rate": 4.354812530433371e-05, "loss": 1.6671981557607651, "step": 159000}
{"learning_rate": 4.35278363901964e-05, "loss": 1.6744684401750565, "step": 159500}
{"learning_rate": 4.3507547476059084e-05, "loss": 1.6526326731443406, "step": 160000}
{"learning_rate": 4.348725856192177e-05, "loss": 1.6204775751829148, "step": 160500}
{"learning_rate": 4.346696964778445e-05, "loss": 1.6730505256652832, "step": 161000}
{"learning_rate": 4.3446680733647135e-05, "loss": 1.665782541513443, "step": 161500}
{"learning_rate": 4.342639181950982e-05, "loss": 1.652376938223839, "step": 162000}
{"learning_rate": 4.340610290537251e-05, "loss": 1.63657340586185

{"learning_rate": 4.166125628956338e-05, "loss": 1.609274302959442, "step": 205500}
{"learning_rate": 4.164096737542607e-05, "loss": 1.6341561917066574, "step": 206000}
{"learning_rate": 4.1620678461288755e-05, "loss": 1.6323397884368895, "step": 206500}
{"learning_rate": 4.1600389547151434e-05, "loss": 1.6266976351737976, "step": 207000}
{"learning_rate": 4.158010063301413e-05, "loss": 1.621362811088562, "step": 207500}
{"learning_rate": 4.1559811718876806e-05, "loss": 1.612387875199318, "step": 208000}
{"learning_rate": 4.153952280473949e-05, "loss": 1.608404631137848, "step": 208500}
{"learning_rate": 4.151923389060218e-05, "loss": 1.6109141927957535, "step": 209000}
{"learning_rate": 4.149894497646486e-05, "loss": 1.6198628281354903, "step": 209500}
{"learning_rate": 4.147865606232755e-05, "loss": 1.61674462556839, "step": 210000}
{"learning_rate": 4.145836714819023e-05, "loss": 1.578289226770401, "step": 210500}
{"learning_rate": 4.1438078234052915e-05, "loss": 1.5854062976837158,

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=246440.0, style=ProgressStyle(description…

{"learning_rate": 3.999756533030352e-05, "loss": 1.557959607720375, "step": 246500}
{"learning_rate": 3.997727641616621e-05, "loss": 1.55147276699543, "step": 247000}
{"learning_rate": 3.9956987502028894e-05, "loss": 1.5901314021348953, "step": 247500}
{"learning_rate": 3.993669858789158e-05, "loss": 1.541367137312889, "step": 248000}
{"learning_rate": 3.991640967375426e-05, "loss": 1.5665299710035323, "step": 248500}
{"learning_rate": 3.9896120759616945e-05, "loss": 1.586864545106888, "step": 249000}
{"learning_rate": 3.987583184547963e-05, "loss": 1.5585404982566833, "step": 249500}
{"learning_rate": 3.985554293134232e-05, "loss": 1.569550331234932, "step": 250000}
{"learning_rate": 3.9835254017205e-05, "loss": 1.5784214890003205, "step": 250500}
{"learning_rate": 3.981496510306769e-05, "loss": 1.569969848036766, "step": 251000}
{"learning_rate": 3.979467618893037e-05, "loss": 1.5491194353103637, "step": 251500}
{"learning_rate": 3.9774387274793054e-05, "loss": 1.5666718628406524, "s

{"learning_rate": 3.802954065898393e-05, "loss": 1.5353796137571334, "step": 295000}
{"learning_rate": 3.8009251744846616e-05, "loss": 1.5185956262350082, "step": 295500}
{"learning_rate": 3.79889628307093e-05, "loss": 1.5097811735868454, "step": 296000}
{"learning_rate": 3.796867391657199e-05, "loss": 1.558483452439308, "step": 296500}
{"learning_rate": 3.7948385002434674e-05, "loss": 1.5500751608610153, "step": 297000}
{"learning_rate": 3.792809608829735e-05, "loss": 1.5625609576702117, "step": 297500}
{"learning_rate": 3.7907807174160046e-05, "loss": 1.5290316992998123, "step": 298000}
{"learning_rate": 3.7887518260022725e-05, "loss": 1.51874440741539, "step": 298500}
{"learning_rate": 3.786722934588541e-05, "loss": 1.5457076816558837, "step": 299000}
{"learning_rate": 3.78469404317481e-05, "loss": 1.5231231701374055, "step": 299500}
{"learning_rate": 3.7826651517610776e-05, "loss": 1.49819038271904, "step": 300000}
{"learning_rate": 3.780636260347347e-05, "loss": 1.5417707315683364

{"learning_rate": 3.606151598766434e-05, "loss": 1.522289174079895, "step": 343500}
{"learning_rate": 3.604122707352703e-05, "loss": 1.5141226394176484, "step": 344000}
{"learning_rate": 3.602093815938971e-05, "loss": 1.5406827812194823, "step": 344500}
{"learning_rate": 3.6000649245252396e-05, "loss": 1.507112223148346, "step": 345000}
{"learning_rate": 3.598036033111508e-05, "loss": 1.4872461782693862, "step": 345500}
{"learning_rate": 3.596007141697776e-05, "loss": 1.5090194594860078, "step": 346000}
{"learning_rate": 3.5939782502840454e-05, "loss": 1.5053401322364808, "step": 346500}
{"learning_rate": 3.591949358870313e-05, "loss": 1.4946132545471191, "step": 347000}
{"learning_rate": 3.589920467456582e-05, "loss": 1.49993820810318, "step": 347500}
{"learning_rate": 3.5878915760428505e-05, "loss": 1.5170123401880264, "step": 348000}
{"learning_rate": 3.585862684629119e-05, "loss": 1.513134134054184, "step": 348500}
{"learning_rate": 3.583833793215387e-05, "loss": 1.5309298247098924

{"learning_rate": 3.409349131634475e-05, "loss": 1.4709656341075896, "step": 392000}
{"learning_rate": 3.407320240220744e-05, "loss": 1.4912839455604554, "step": 392500}
{"learning_rate": 3.405291348807012e-05, "loss": 1.4785441794395446, "step": 393000}
{"learning_rate": 3.4032624573932804e-05, "loss": 1.4948534595966338, "step": 393500}
{"learning_rate": 3.401233565979549e-05, "loss": 1.4920565427541732, "step": 394000}
{"learning_rate": 3.3992046745658176e-05, "loss": 1.4795369819402695, "step": 394500}
{"learning_rate": 3.3971757831520855e-05, "loss": 1.4890050913095474, "step": 395000}
{"learning_rate": 3.395146891738355e-05, "loss": 1.4791620744466782, "step": 395500}
{"learning_rate": 3.3931180003246227e-05, "loss": 1.480379185795784, "step": 396000}
{"learning_rate": 3.391089108910891e-05, "loss": 1.4998876304626465, "step": 396500}
{"learning_rate": 3.38906021749716e-05, "loss": 1.4869209820032119, "step": 397000}
{"learning_rate": 3.387031326083428e-05, "loss": 1.491666363716

{"learning_rate": 3.212546664502516e-05, "loss": 1.4487463830709457, "step": 440500}
{"learning_rate": 3.210517773088784e-05, "loss": 1.4592644022703172, "step": 441000}
{"learning_rate": 3.208488881675053e-05, "loss": 1.4848800472021102, "step": 441500}
{"learning_rate": 3.206459990261321e-05, "loss": 1.4751463032960892, "step": 442000}
{"learning_rate": 3.20443109884759e-05, "loss": 1.4393612760305405, "step": 442500}
{"learning_rate": 3.202402207433858e-05, "loss": 1.4588327634334564, "step": 443000}
{"learning_rate": 3.200373316020126e-05, "loss": 1.435251164317131, "step": 443500}
{"learning_rate": 3.1983444246063955e-05, "loss": 1.462027539253235, "step": 444000}
{"learning_rate": 3.1963155331926634e-05, "loss": 1.4450304403305054, "step": 444500}
{"learning_rate": 3.194286641778932e-05, "loss": 1.4614441318511964, "step": 445000}
{"learning_rate": 3.1922577503652006e-05, "loss": 1.4763686267137528, "step": 445500}
{"learning_rate": 3.190228858951469e-05, "loss": 1.47699538195133

{"learning_rate": 3.0157441973705568e-05, "loss": 1.4520417665243148, "step": 489000}
{"learning_rate": 3.013715305956825e-05, "loss": 1.4402511203289032, "step": 489500}
{"learning_rate": 3.011686414543094e-05, "loss": 1.4508647820949554, "step": 490000}
{"learning_rate": 3.0096575231293623e-05, "loss": 1.4774880388975142, "step": 490500}
{"learning_rate": 3.0076286317156305e-05, "loss": 1.4361363503932953, "step": 491000}
{"learning_rate": 3.005599740301899e-05, "loss": 1.4335335114002228, "step": 491500}
{"learning_rate": 3.0035708488881674e-05, "loss": 1.387437010884285, "step": 492000}
{"learning_rate": 3.0015419574744363e-05, "loss": 1.4704072333574294, "step": 492500}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=246440.0, style=ProgressStyle(description…

{"learning_rate": 2.9995130660607046e-05, "loss": 1.4187353028655052, "step": 493000}
{"learning_rate": 2.9974841746469728e-05, "loss": 1.4357689129710198, "step": 493500}
{"learning_rate": 2.9954552832332418e-05, "loss": 1.4401490358114242, "step": 494000}
{"learning_rate": 2.99342639181951e-05, "loss": 1.4251235044002533, "step": 494500}
{"learning_rate": 2.9913975004057783e-05, "loss": 1.4508706914186478, "step": 495000}
{"learning_rate": 2.989368608992047e-05, "loss": 1.4584126826524735, "step": 495500}
{"learning_rate": 2.987339717578315e-05, "loss": 1.4426148283481597, "step": 496000}
{"learning_rate": 2.985310826164584e-05, "loss": 1.4342023227214813, "step": 496500}
{"learning_rate": 2.9832819347508523e-05, "loss": 1.4338972841501236, "step": 497000}
{"learning_rate": 2.9812530433371206e-05, "loss": 1.4429015947580337, "step": 497500}
{"learning_rate": 2.9792241519233895e-05, "loss": 1.4313430364131927, "step": 498000}
{"learning_rate": 2.9771952605096577e-05, "loss": 1.4175987

{"learning_rate": 2.8047394903424768e-05, "loss": 1.4196793838739394, "step": 541000}
{"learning_rate": 2.8027105989287457e-05, "loss": 1.429656055688858, "step": 541500}
{"learning_rate": 2.800681707515014e-05, "loss": 1.4180287597179413, "step": 542000}
{"learning_rate": 2.7986528161012825e-05, "loss": 1.40483101272583, "step": 542500}
{"learning_rate": 2.7966239246875508e-05, "loss": 1.4368343467712403, "step": 543000}
{"learning_rate": 2.794595033273819e-05, "loss": 1.4101314208507538, "step": 543500}
{"learning_rate": 2.792566141860088e-05, "loss": 1.4334459022283554, "step": 544000}
{"learning_rate": 2.7905372504463562e-05, "loss": 1.4426021975278855, "step": 544500}
{"learning_rate": 2.788508359032625e-05, "loss": 1.4062258239984513, "step": 545000}
{"learning_rate": 2.786479467618893e-05, "loss": 1.38975182056427, "step": 545500}
{"learning_rate": 2.7844505762051613e-05, "loss": 1.4387222310304641, "step": 546000}
{"learning_rate": 2.7824216847914303e-05, "loss": 1.410276244163

{"learning_rate": 2.6079370232105175e-05, "loss": 1.4235679978132247, "step": 589500}
{"learning_rate": 2.6059081317967865e-05, "loss": 1.423918808579445, "step": 590000}
{"learning_rate": 2.6038792403830547e-05, "loss": 1.400764684319496, "step": 590500}
{"learning_rate": 2.6018503489693237e-05, "loss": 1.3937043334245682, "step": 591000}
{"learning_rate": 2.599821457555592e-05, "loss": 1.4108518929481506, "step": 591500}
{"learning_rate": 2.5977925661418602e-05, "loss": 1.4238339796066284, "step": 592000}
{"learning_rate": 2.5957636747281288e-05, "loss": 1.3929543142318725, "step": 592500}
{"learning_rate": 2.593734783314397e-05, "loss": 1.4495368180274963, "step": 593000}
{"learning_rate": 2.5917058919006653e-05, "loss": 1.422385253548622, "step": 593500}
{"learning_rate": 2.5896770004869342e-05, "loss": 1.415796257853508, "step": 594000}
{"learning_rate": 2.5876481090732025e-05, "loss": 1.3939064801931382, "step": 594500}
{"learning_rate": 2.5856192176594714e-05, "loss": 1.39105452

{"learning_rate": 2.4131634474922904e-05, "loss": 1.3966151776313782, "step": 637500}
{"learning_rate": 2.411134556078559e-05, "loss": 1.3808111408948898, "step": 638000}
{"learning_rate": 2.4091056646648273e-05, "loss": 1.4130213742256164, "step": 638500}
{"learning_rate": 2.407076773251096e-05, "loss": 1.3955873770713807, "step": 639000}
{"learning_rate": 2.405047881837364e-05, "loss": 1.3763981648683548, "step": 639500}
{"learning_rate": 2.4030189904236327e-05, "loss": 1.410669920027256, "step": 640000}
{"learning_rate": 2.400990099009901e-05, "loss": 1.3749116407632829, "step": 640500}
{"learning_rate": 2.3989612075961695e-05, "loss": 1.3950397911071777, "step": 641000}
{"learning_rate": 2.396932316182438e-05, "loss": 1.3949546324014663, "step": 641500}
{"learning_rate": 2.3949034247687067e-05, "loss": 1.3677165193557739, "step": 642000}
{"learning_rate": 2.392874533354975e-05, "loss": 1.4090167031288148, "step": 642500}
{"learning_rate": 2.3908456419412432e-05, "loss": 1.380791756

{"learning_rate": 2.218389871774063e-05, "loss": 1.3747013164758681, "step": 685500}
{"learning_rate": 2.2163609803603312e-05, "loss": 1.381378919005394, "step": 686000}
{"learning_rate": 2.2143320889465994e-05, "loss": 1.3979718990325927, "step": 686500}
{"learning_rate": 2.212303197532868e-05, "loss": 1.4007807220220565, "step": 687000}
{"learning_rate": 2.2102743061191366e-05, "loss": 1.3660761442184448, "step": 687500}
{"learning_rate": 2.2082454147054052e-05, "loss": 1.3812437908649444, "step": 688000}
{"learning_rate": 2.2062165232916735e-05, "loss": 1.3771176652908326, "step": 688500}
{"learning_rate": 2.204187631877942e-05, "loss": 1.382931806206703, "step": 689000}
{"learning_rate": 2.2021587404642107e-05, "loss": 1.3645994728803634, "step": 689500}
{"learning_rate": 2.200129849050479e-05, "loss": 1.3799995934963227, "step": 690000}
{"learning_rate": 2.1981009576367472e-05, "loss": 1.3639313107728959, "step": 690500}
{"learning_rate": 2.1960720662230158e-05, "loss": 1.37746959

{"learning_rate": 2.023616296055835e-05, "loss": 1.365504203915596, "step": 733500}
{"learning_rate": 2.0215874046421037e-05, "loss": 1.3783961013555526, "step": 734000}
{"learning_rate": 2.019558513228372e-05, "loss": 1.3705402365922927, "step": 734500}
{"learning_rate": 2.0175296218146406e-05, "loss": 1.3530641012191773, "step": 735000}
{"learning_rate": 2.015500730400909e-05, "loss": 1.3769248238801957, "step": 735500}
{"learning_rate": 2.0134718389871778e-05, "loss": 1.3863266075849534, "step": 736000}
{"learning_rate": 2.011442947573446e-05, "loss": 1.4057585242986679, "step": 736500}
{"learning_rate": 2.0094140561597143e-05, "loss": 1.3602711684703828, "step": 737000}
{"learning_rate": 2.007385164745983e-05, "loss": 1.3676246027946473, "step": 737500}
{"learning_rate": 2.0053562733322515e-05, "loss": 1.3600405867099763, "step": 738000}
{"learning_rate": 2.0033273819185197e-05, "loss": 1.3602644058465958, "step": 738500}
{"learning_rate": 2.0012984905047883e-05, "loss": 1.34968235

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=246440.0, style=ProgressStyle(description…

{"learning_rate": 1.999269599091057e-05, "loss": 1.3387557954788207, "step": 739500}
{"learning_rate": 1.997240707677325e-05, "loss": 1.3461408115625382, "step": 740000}
{"learning_rate": 1.9952118162635934e-05, "loss": 1.3523745130300522, "step": 740500}
{"learning_rate": 1.993182924849862e-05, "loss": 1.3588051514029502, "step": 741000}
{"learning_rate": 1.9911540334361306e-05, "loss": 1.3533748210668564, "step": 741500}
{"learning_rate": 1.9891251420223992e-05, "loss": 1.3523860224485398, "step": 742000}
{"learning_rate": 1.9870962506086674e-05, "loss": 1.3365569578409195, "step": 742500}
{"learning_rate": 1.985067359194936e-05, "loss": 1.3363545098304748, "step": 743000}
{"learning_rate": 1.9830384677812043e-05, "loss": 1.3496023089885711, "step": 743500}
{"learning_rate": 1.981009576367473e-05, "loss": 1.3657423913478852, "step": 744000}
{"learning_rate": 1.9789806849537415e-05, "loss": 1.3497359300851821, "step": 744500}
{"learning_rate": 1.9769517935400097e-05, "loss": 1.3580512

{"learning_rate": 1.8024671319590977e-05, "loss": 1.3578760102391243, "step": 788000}
{"learning_rate": 1.800438240545366e-05, "loss": 1.3297338378429413, "step": 788500}
{"learning_rate": 1.7984093491316345e-05, "loss": 1.342516992330551, "step": 789000}
{"learning_rate": 1.796380457717903e-05, "loss": 1.3373197013139724, "step": 789500}
{"learning_rate": 1.7943515663041717e-05, "loss": 1.3224813375473023, "step": 790000}
{"learning_rate": 1.79232267489044e-05, "loss": 1.3556777563095093, "step": 790500}
{"learning_rate": 1.7902937834767082e-05, "loss": 1.3604735733270645, "step": 791000}
{"learning_rate": 1.7882648920629768e-05, "loss": 1.349819734454155, "step": 791500}
{"learning_rate": 1.7862360006492454e-05, "loss": 1.3540546241998672, "step": 792000}
{"learning_rate": 1.784207109235514e-05, "loss": 1.3349113218784332, "step": 792500}
{"learning_rate": 1.7821782178217823e-05, "loss": 1.3422654752731322, "step": 793000}
{"learning_rate": 1.780149326408051e-05, "loss": 1.3399999147

{"learning_rate": 1.6056646648271385e-05, "loss": 1.3332064895629883, "step": 836500}
{"learning_rate": 1.603635773413407e-05, "loss": 1.3292752977609634, "step": 837000}
{"learning_rate": 1.6016068819996753e-05, "loss": 1.3397505968809127, "step": 837500}
{"learning_rate": 1.599577990585944e-05, "loss": 1.3051401509046554, "step": 838000}
{"learning_rate": 1.5975490991722125e-05, "loss": 1.3340400949716569, "step": 838500}
{"learning_rate": 1.5955202077584808e-05, "loss": 1.3375668610930442, "step": 839000}
{"learning_rate": 1.5934913163447494e-05, "loss": 1.3353790278434754, "step": 839500}
{"learning_rate": 1.591462424931018e-05, "loss": 1.306329852104187, "step": 840000}
{"learning_rate": 1.5894335335172862e-05, "loss": 1.3349035919904708, "step": 840500}
{"learning_rate": 1.5874046421035545e-05, "loss": 1.3404281212687492, "step": 841000}
{"learning_rate": 1.585375750689823e-05, "loss": 1.3121871932148934, "step": 841500}
{"learning_rate": 1.5833468592760916e-05, "loss": 1.3567345

{"learning_rate": 1.4108910891089108e-05, "loss": 1.3227641572952271, "step": 884500}
{"learning_rate": 1.4088621976951794e-05, "loss": 1.3160849746465684, "step": 885000}
{"learning_rate": 1.4068333062814478e-05, "loss": 1.3251977037191391, "step": 885500}
{"learning_rate": 1.4048044148677164e-05, "loss": 1.3236295803785325, "step": 886000}
{"learning_rate": 1.4027755234539849e-05, "loss": 1.3035841497778893, "step": 886500}
{"learning_rate": 1.4007466320402531e-05, "loss": 1.3272311924099922, "step": 887000}
{"learning_rate": 1.3987177406265217e-05, "loss": 1.3279775449037552, "step": 887500}
{"learning_rate": 1.3966888492127903e-05, "loss": 1.3046665771007537, "step": 888000}
{"learning_rate": 1.3946599577990587e-05, "loss": 1.30213351726532, "step": 888500}
{"learning_rate": 1.392631066385327e-05, "loss": 1.3365437886714935, "step": 889000}
{"learning_rate": 1.3906021749715956e-05, "loss": 1.3433624626398086, "step": 889500}
{"learning_rate": 1.388573283557864e-05, "loss": 1.332285

{"learning_rate": 1.2161175133906834e-05, "loss": 1.31106216275692, "step": 932500}
{"learning_rate": 1.214088621976952e-05, "loss": 1.3402301832437515, "step": 933000}
{"learning_rate": 1.2120597305632204e-05, "loss": 1.3150762270689011, "step": 933500}
{"learning_rate": 1.2100308391494888e-05, "loss": 1.3265742626190185, "step": 934000}
{"learning_rate": 1.2080019477357572e-05, "loss": 1.3320905768871307, "step": 934500}
{"learning_rate": 1.2059730563220258e-05, "loss": 1.3166250885725022, "step": 935000}
{"learning_rate": 1.203944164908294e-05, "loss": 1.3158378969430924, "step": 935500}
{"learning_rate": 1.2019152734945627e-05, "loss": 1.2974478902816773, "step": 936000}
{"learning_rate": 1.1998863820808311e-05, "loss": 1.3162614876031875, "step": 936500}
{"learning_rate": 1.1978574906670995e-05, "loss": 1.3155238993167877, "step": 937000}
{"learning_rate": 1.195828599253368e-05, "loss": 1.31269797539711, "step": 937500}
{"learning_rate": 1.1937997078396365e-05, "loss": 1.340927590

{"learning_rate": 1.0213439376724559e-05, "loss": 1.3029207339286804, "step": 980500}
{"learning_rate": 1.0193150462587243e-05, "loss": 1.2904956755638122, "step": 981000}
{"learning_rate": 1.0172861548449927e-05, "loss": 1.3023320500850677, "step": 981500}
{"learning_rate": 1.0152572634312613e-05, "loss": 1.2791064519882203, "step": 982000}
{"learning_rate": 1.0132283720175296e-05, "loss": 1.313620969414711, "step": 982500}
{"learning_rate": 1.0111994806037982e-05, "loss": 1.2930543764829636, "step": 983000}
{"learning_rate": 1.0091705891900666e-05, "loss": 1.2986779861450195, "step": 983500}
{"learning_rate": 1.007141697776335e-05, "loss": 1.2897263371944427, "step": 984000}
{"learning_rate": 1.0051128063626034e-05, "loss": 1.3246774456501007, "step": 984500}
{"learning_rate": 1.003083914948872e-05, "loss": 1.338060205221176, "step": 985000}
{"learning_rate": 1.0010550235351405e-05, "loss": 1.293686448097229, "step": 985500}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=246440.0, style=ProgressStyle(description…

{"learning_rate": 9.990261321214089e-06, "loss": 1.297412528216839, "step": 986000}
{"learning_rate": 9.969972407076773e-06, "loss": 1.282992022573948, "step": 986500}
{"learning_rate": 9.949683492939459e-06, "loss": 1.2940843108892441, "step": 987000}
{"learning_rate": 9.929394578802143e-06, "loss": 1.283890133857727, "step": 987500}
{"learning_rate": 9.909105664664828e-06, "loss": 1.3096230722665787, "step": 988000}
{"learning_rate": 9.888816750527512e-06, "loss": 1.2764560493230819, "step": 988500}
{"learning_rate": 9.868527836390198e-06, "loss": 1.2951607362031936, "step": 989000}
{"learning_rate": 9.84823892225288e-06, "loss": 1.2720163358449936, "step": 989500}
{"learning_rate": 9.827950008115566e-06, "loss": 1.3180661995410918, "step": 990000}
{"learning_rate": 9.80766109397825e-06, "loss": 1.2849779943823814, "step": 990500}
{"learning_rate": 9.787372179840935e-06, "loss": 1.2958510301709174, "step": 991000}
{"learning_rate": 9.76708326570362e-06, "loss": 1.3101927233934403, "s

{"learning_rate": 8.022236649894497e-06, "loss": 1.3007326887845994, "step": 1034500}
{"learning_rate": 8.001947735757183e-06, "loss": 1.296877972126007, "step": 1035000}
{"learning_rate": 7.981658821619867e-06, "loss": 1.3061036437749862, "step": 1035500}
{"learning_rate": 7.961369907482553e-06, "loss": 1.3106937417984008, "step": 1036000}
{"learning_rate": 7.941080993345235e-06, "loss": 1.2955031903386116, "step": 1036500}
{"learning_rate": 7.920792079207921e-06, "loss": 1.2886796119213104, "step": 1037000}
{"learning_rate": 7.900503165070606e-06, "loss": 1.2985824863910675, "step": 1037500}
{"learning_rate": 7.88021425093329e-06, "loss": 1.2920254402160645, "step": 1038000}
{"learning_rate": 7.859925336795974e-06, "loss": 1.3107896807193755, "step": 1038500}
{"learning_rate": 7.83963642265866e-06, "loss": 1.295791655421257, "step": 1039000}
{"learning_rate": 7.819347508521344e-06, "loss": 1.2694955052137376, "step": 1039500}
{"learning_rate": 7.799058594384029e-06, "loss": 1.2821785

{"learning_rate": 6.074500892712223e-06, "loss": 1.292912920475006, "step": 1082500}
{"learning_rate": 6.054211978574907e-06, "loss": 1.2878412790298461, "step": 1083000}
{"learning_rate": 6.033923064437591e-06, "loss": 1.2598158628344536, "step": 1083500}
{"learning_rate": 6.0136341503002765e-06, "loss": 1.263903661608696, "step": 1084000}
{"learning_rate": 5.993345236162961e-06, "loss": 1.3000698479413986, "step": 1084500}
{"learning_rate": 5.973056322025646e-06, "loss": 1.2746466190218926, "step": 1085000}
{"learning_rate": 5.95276740788833e-06, "loss": 1.2741111152768134, "step": 1085500}
{"learning_rate": 5.932478493751014e-06, "loss": 1.2962345304489136, "step": 1086000}
{"learning_rate": 5.912189579613699e-06, "loss": 1.2986977437138558, "step": 1086500}
{"learning_rate": 5.891900665476384e-06, "loss": 1.26608571267128, "step": 1087000}
{"learning_rate": 5.871611751339069e-06, "loss": 1.2718926829099655, "step": 1087500}
{"learning_rate": 5.851322837201753e-06, "loss": 1.2577878

{"learning_rate": 4.1267651355299465e-06, "loss": 1.2860627691745758, "step": 1130500}
{"learning_rate": 4.1064762213926316e-06, "loss": 1.2795404821634293, "step": 1131000}
{"learning_rate": 4.086187307255316e-06, "loss": 1.2630192734599113, "step": 1131500}
{"learning_rate": 4.065898393118001e-06, "loss": 1.2701977671980857, "step": 1132000}
{"learning_rate": 4.045609478980685e-06, "loss": 1.3126185117959976, "step": 1132500}
{"learning_rate": 4.025320564843369e-06, "loss": 1.270764562010765, "step": 1133000}
{"learning_rate": 4.0050316507060545e-06, "loss": 1.3107987071871758, "step": 1133500}
{"learning_rate": 3.984742736568739e-06, "loss": 1.249496756196022, "step": 1134000}
{"learning_rate": 3.964453822431424e-06, "loss": 1.2781783623695373, "step": 1134500}
{"learning_rate": 3.944164908294108e-06, "loss": 1.2932086094617843, "step": 1135000}
{"learning_rate": 3.923875994156793e-06, "loss": 1.2802577686309815, "step": 1135500}
{"learning_rate": 3.9035870800194774e-06, "loss": 1.2

{"learning_rate": 2.1993182924849866e-06, "loss": 1.2816464748382568, "step": 1178000}
{"learning_rate": 2.179029378347671e-06, "loss": 1.2825721714496612, "step": 1178500}
{"learning_rate": 2.1587404642103556e-06, "loss": 1.2556605181694032, "step": 1179000}
{"learning_rate": 2.1384515500730402e-06, "loss": 1.2885964673757553, "step": 1179500}
{"learning_rate": 2.118162635935725e-06, "loss": 1.249369080543518, "step": 1180000}
{"learning_rate": 2.0978737217984096e-06, "loss": 1.2701292046904564, "step": 1180500}
{"learning_rate": 2.077584807661094e-06, "loss": 1.285576274752617, "step": 1181000}
{"learning_rate": 2.0572958935237785e-06, "loss": 1.2585791773200035, "step": 1181500}
{"learning_rate": 2.037006979386463e-06, "loss": 1.2575023874044418, "step": 1182000}
{"learning_rate": 2.016718065249148e-06, "loss": 1.25357055413723, "step": 1182500}
{"learning_rate": 1.9964291511118325e-06, "loss": 1.284180515050888, "step": 1183000}
{"learning_rate": 1.976140236974517e-06, "loss": 1.25

{"learning_rate": 2.515825353027106e-07, "loss": 1.2396953089237213, "step": 1226000}
{"learning_rate": 2.3129362116539526e-07, "loss": 1.253200320005417, "step": 1226500}
{"learning_rate": 2.1100470702807986e-07, "loss": 1.2877734628915787, "step": 1227000}
{"learning_rate": 1.907157928907645e-07, "loss": 1.2784199369549751, "step": 1227500}
{"learning_rate": 1.7042687875344912e-07, "loss": 1.2779794754981995, "step": 1228000}
{"learning_rate": 1.5013796461613374e-07, "loss": 1.244050340652466, "step": 1228500}
{"learning_rate": 1.2984905047881836e-07, "loss": 1.2739269877672195, "step": 1229000}
{"learning_rate": 1.09560136341503e-07, "loss": 1.255731644630432, "step": 1229500}
{"learning_rate": 8.927122220418765e-08, "loss": 1.2779735770821572, "step": 1230000}
{"learning_rate": 6.898230806687227e-08, "loss": 1.2759363926649094, "step": 1230500}
{"learning_rate": 4.869339392955689e-08, "loss": 1.2933054921627045, "step": 1231000}
{"learning_rate": 2.8404479792241524e-08, "loss": 1.2

TrainOutput(global_step=1232200, training_loss=1.473795949356173)

In [29]:
trainer.save_model(os.path.join(PATH, 'distilbert'))