# model

In [1]:
import torch
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda")

In [2]:
import torch
import torch.nn as nn
import math

class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x, rat):
        x = x + (self.pe[:x.size(0), :]) * rat
        return self.dropout(x)

class TFModule(nn.Module):
    def __init__(self, vocab_size, embed_size, num_layers, num_hiddens, num_heads, embeddropout_rate, dropout_rate, pos_ratio):
        super(TFModule, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.pos_embed = PositionalEncoding(embed_size, embeddropout_rate, 1025)
        self.pos_ratio = pos_ratio
        
        self.encoder_layer = nn.TransformerEncoderLayer(
                                        d_model=embed_size,
                                        dim_feedforward=num_hiddens,
                                        nhead=num_heads, 
                                        dropout=dropout_rate, 
                                        activation='relu')
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        
        self.decoder = nn.Linear(embed_size, vocab_size) # 初始时间步和最终时间步的隐藏状态作为全连接层输入

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask.to(device)
    
    def forward(self, inputs):
        
        word_emb = self.embedding(inputs.permute(1, 0))
        embeddings = self.pos_embed(word_emb, self.pos_ratio)
#         print(word_emb, '\n', embeddings-word_emb)
        
        # outputs形状是(长度, 批量大小, embeding大小)
        outputs = self.transformer_encoder(embeddings, mask=self._generate_square_subsequent_mask(embeddings.shape[0]))

        output = outputs.permute(1, 0, 2)
        outs = output.reshape(output.size(0)*output.size(1), output.size(2))
        ret = self.decoder(outs)
        return ret

In [3]:
# import torch
# import torch.nn as nn

In [4]:
# encoder_layer = nn.TransformerEncoderLayer(d_model=1, nhead=1, dropout=0, activation='relu')
# transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=1)

In [5]:
# src = torch.ones(3,1,1)
# out = encoder_layer(src)
# print(src.shape,out.shape)
# print(src)
# print(out)

In [6]:
# # dir(encoder_layer)
# vos = 53
# emb = 22
# numl = 3
# numh = 233
# numhea = 11
# dror = 0
# model = TFModule(vos, emb, numl, numh, numhea, dror)
# src = torch.ones((2,3),dtype=int)
# print(src.shape)
# out = model(src)
# print(out)

# data

In [7]:
import torch
from torchtext.datasets import WikiText2
from torchtext.data.utils import get_tokenizer
from collections import Counter
from torchtext.vocab import Vocab

In [8]:
class Corpus(object):
    def __init__(self, train_batch_size=20, eval_batch_size=10, bptt=35):
        self.bptt = bptt
        train_iter = WikiText2(split='train')
        self.tokenizer = get_tokenizer('basic_english')
        counter = Counter()
        for line in train_iter:
            counter.update(self.tokenizer(line))
        self.vocab = Vocab(counter)
        train_iter, val_iter, test_iter = WikiText2()
        train_data = self.data_process(train_iter)
        val_data = self.data_process(val_iter)
        test_data = self.data_process(test_iter)

        self.train_data = self.batchify(train_data, train_batch_size)
        self.val_data = self.batchify(val_data, eval_batch_size)
        self.test_data = self.batchify(test_data, eval_batch_size)

    def data_process(self, raw_text_iter):
        data = [torch.tensor([self.vocab[token] for token in self.tokenizer(item)],
                           dtype=torch.long) for item in raw_text_iter]
        return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))

    def batchify(self, data, batch_size):
        # Divide the dataset into batch_size parts.
        nbatch = data.size(0) // batch_size
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        data = data.narrow(0, 0, nbatch * batch_size)
        # Evenly divide the data across the batch_size batches.
        data = data.view(batch_size, -1).t().contiguous()
        return data.to(device)

    def get_batch(self, source, i):
        seq_len = min(self.bptt, len(source) - 1 - i)
        data = source[i:i+seq_len]
        target = source[i+1:i+1+seq_len].reshape(-1)
        return data, target

    def get_ntokens(self):
        return len(self.vocab.stoi)

# main

In [9]:
import argparse
import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable

import data
import model
class args():
    pos_ratio = 1
    emsize = 100
    nhid = 200
    nlayers = 3
    nhead = 20
    lr = 0.5
    epochs = 2000
    batch_size = 32
    bptt = 256
    embeddropout = 0.1
    dropout = 0.3
    tied = False
    seed = 1234
    clip = 0.005
    save = 'model.pt'

In [10]:
data_loader = Corpus(train_batch_size=args.batch_size,
                     eval_batch_size=args.batch_size,
                     bptt=args.bptt)

In [11]:
train_data = data_loader.train_data
val_data = data_loader.val_data
test_data = data_loader.test_data

In [12]:
def get_batch(source, i, evaluation=False):
    seq_len = min(args.bptt, len(source) - 1 - i)
#     data = Variable(source[i:i+seq_len], volatile=evaluation)
#     target = Variable(source[i+1:i+1+seq_len].view(-1))
    data = source[i:i+seq_len].t()
    tmp = source[i+1:i+1+seq_len].t()
#     print(data.shape,tmp.shape)
    target = tmp.reshape(-1)
    return data, target

In [13]:
# import torch
# torch.__version__
# print(train_data.size(0))
# for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
#     data, targets = get_batch(train_data, i)
#     print(data.shape,targets.shape,targets[-1])
#     if (batch>=2): break

In [14]:
len(data_loader.vocab.itos)

28783

In [15]:
ntokens = len(data_loader.vocab.itos)
# model = RNNModel(args.model, ntokens, args.emsize, args.nhid,
#         args.nlayers, args.dropout, args.tied)
model = TFModule(ntokens, args.emsize, args.nlayers, args.nhid, args.nhead, args.embeddropout, args.dropout, args.pos_ratio)

In [16]:
model = model.to(device)

In [17]:
model

TFModule(
  (embedding): Embedding(28783, 100)
  (pos_embed): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): _LinearWithBias(in_features=100, out_features=100, bias=True)
    )
    (linear1): Linear(in_features=100, out_features=200, bias=True)
    (dropout): Dropout(p=0.3, inplace=False)
    (linear2): Linear(in_features=200, out_features=100, bias=True)
    (norm1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.3, inplace=False)
    (dropout2): Dropout(p=0.3, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=100, out_features=100, bias=True)
        )
        (linear1): Linear(in_features=100, out_fe

In [18]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)

In [19]:
def clip_gradient(optimizer, grad_clip):
    for group in optimizer.param_groups:
        for param in group["params"]:
            if param.grad is not None:
                param.grad.data.clamp_(-grad_clip, grad_clip)

def evaluate(data_source):
    model.eval()
    total_loss = 0
    total_words = 0
    for batch, i in enumerate(range(0, data_source.size(0) - 1, args.bptt)):
        data, targets = get_batch(data_source, i)
        
        output = model(data)
        loss = criterion(output, targets)

        total_loss += loss.data * targets.shape[0]
        total_words += targets.shape[0]
        
#         if batch > 1: break
        
    total_loss = float(total_loss.to(torch.device('cpu')))
#     print(total_loss,total_words)
    return total_loss / total_words, math.exp(total_loss / total_words)

def train():
    model.train()
    total_loss = 0.
    total_words = 0
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        model.zero_grad()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, targets)
        loss.backward()
        clip_gradient(optimizer,args.clip)
        optimizer.step()

        total_loss += loss.data * targets.shape[0]
        total_words += targets.shape[0]
        
#         if batch > 10: break
        
#         if (batch % args.log_interval == 0 and batch > 0):
#             cur_loss = total_loss / args.log_interval
#             elapsed = time.time() - start_time
#             print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
#                     'loss {:5.2f} | ppl {:8.2f}'.format(
#                 233, batch, len(train_data) // args.bptt, args.lr,
#                 elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
#             total_loss = 0
#             start_time = time.time()

    cur_loss = total_loss / total_words
    print('train {:10d} words | loss {:5.5f} | ppl {:5.5}'.format(total_words, cur_loss, math.exp(cur_loss)))
    return cur_loss, math.exp(cur_loss)

In [20]:
import numpy as np
Losstrain = []
Lossval = []
Losstest = []

In [None]:
import time
for T in range(args.epochs):
    print('Round : ',T,"  ",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
#     train()
#     print('  valid ',evaluate(val_data))
#     print('  test  ',evaluate(test_data))
    trainl, trainp = train()
    vall, valp = evaluate(val_data)
    testl, testp = evaluate(test_data)
    
    Losstrain.append(trainp)
    Lossval.append(valp)
    Losstest.append(testp)
    np.save('model-2-try-train',np.array(Losstrain))
    np.save('model-2-try-val',np.array(Lossval))
    np.save('model-2-try-test',np.array(Losstest))
    
    print('  valid {:5.5f} | test {:5.5f}'.format(valp, testp))
    print('-'*80)

Round :  0    2021-05-20 19:39:00
train    2049952 words | loss 7.35754 | ppl 1568.0
  valid 798.16332 | test 749.16902
--------------------------------------------------------------------------------
Round :  1    2021-05-20 19:40:44
train    2049952 words | loss 6.81547 | ppl 911.84
  valid 661.89706 | test 623.83616
--------------------------------------------------------------------------------
Round :  2    2021-05-20 19:42:27
train    2049952 words | loss 6.67491 | ppl 792.27
  valid 596.45165 | test 563.12396
--------------------------------------------------------------------------------
Round :  3    2021-05-20 19:44:10
train    2049952 words | loss 6.58580 | ppl 724.73
  valid 554.72322 | test 523.56553
--------------------------------------------------------------------------------
Round :  4    2021-05-20 19:45:54
train    2049952 words | loss 6.51999 | ppl 678.57
  valid 524.46752 | test 494.25424
----------------------------------------------------------------------------

train    2049952 words | loss 5.79643 | ppl 329.12
  valid 285.53222 | test 259.18547
--------------------------------------------------------------------------------
Round :  42    2021-05-20 20:51:20
train    2049952 words | loss 5.78551 | ppl 325.55
  valid 284.25225 | test 258.60280
--------------------------------------------------------------------------------
Round :  43    2021-05-20 20:53:04
train    2049952 words | loss 5.77589 | ppl 322.43
  valid 282.25636 | test 256.65163
--------------------------------------------------------------------------------
Round :  44    2021-05-20 20:54:47
train    2049952 words | loss 5.76499 | ppl 318.94
  valid 280.60940 | test 254.58564
--------------------------------------------------------------------------------
Round :  45    2021-05-20 20:56:30
train    2049952 words | loss 5.75460 | ppl 315.64
  valid 279.57816 | test 253.91329
--------------------------------------------------------------------------------
Round :  46    2021-05-20

train    2049952 words | loss 5.47053 | ppl 237.59
  valid 249.68630 | test 225.65557
--------------------------------------------------------------------------------
Round :  83    2021-05-20 22:01:59
train    2049952 words | loss 5.46525 | ppl 236.34
  valid 250.09925 | test 225.82359
--------------------------------------------------------------------------------
Round :  84    2021-05-20 22:03:42
train    2049952 words | loss 5.45831 | ppl 234.7
  valid 250.57907 | test 225.65265
--------------------------------------------------------------------------------
Round :  85    2021-05-20 22:05:25
train    2049952 words | loss 5.45305 | ppl 233.47
  valid 248.82369 | test 225.59655
--------------------------------------------------------------------------------
Round :  86    2021-05-20 22:07:08
train    2049952 words | loss 5.44664 | ppl 231.98
  valid 249.19802 | test 225.08311
--------------------------------------------------------------------------------
Round :  87    2021-05-20 

train    2049952 words | loss 5.27468 | ppl 195.33
  valid 244.63672 | test 220.70508
--------------------------------------------------------------------------------
Round :  124    2021-05-20 23:12:33
train    2049952 words | loss 5.27106 | ppl 194.62
  valid 243.66805 | test 220.93611
--------------------------------------------------------------------------------
Round :  125    2021-05-20 23:14:16
train    2049952 words | loss 5.26665 | ppl 193.77
  valid 246.81406 | test 222.97058
--------------------------------------------------------------------------------
Round :  126    2021-05-20 23:16:00
train    2049952 words | loss 5.26400 | ppl 193.25
  valid 245.28492 | test 221.99536
--------------------------------------------------------------------------------
Round :  127    2021-05-20 23:17:43
train    2049952 words | loss 5.26031 | ppl 192.54
  valid 243.90872 | test 220.35968
--------------------------------------------------------------------------------
Round :  128    2021-

train    2049952 words | loss 5.14490 | ppl 171.55
  valid 243.22983 | test 221.10359
--------------------------------------------------------------------------------
Round :  165    2021-05-21 00:23:09
train    2049952 words | loss 5.14199 | ppl 171.06
  valid 244.73660 | test 222.32793
--------------------------------------------------------------------------------
Round :  166    2021-05-21 00:24:52
train    2049952 words | loss 5.13924 | ppl 170.59
  valid 242.88798 | test 221.62548
--------------------------------------------------------------------------------
Round :  167    2021-05-21 00:26:36
train    2049952 words | loss 5.13671 | ppl 170.15
  valid 245.54766 | test 223.42260
--------------------------------------------------------------------------------
Round :  168    2021-05-21 00:28:19
train    2049952 words | loss 5.13538 | ppl 169.93
  valid 245.68258 | test 223.54066
--------------------------------------------------------------------------------
Round :  169    2021-

train    2049952 words | loss 5.05030 | ppl 156.07
  valid 245.29551 | test 223.69394
--------------------------------------------------------------------------------
Round :  207    2021-05-21 01:35:30
train    2049952 words | loss 5.04929 | ppl 155.91
  valid 245.09777 | test 223.75164
--------------------------------------------------------------------------------
Round :  208    2021-05-21 01:37:13
train    2049952 words | loss 5.04509 | ppl 155.26
  valid 246.61481 | test 226.20188
--------------------------------------------------------------------------------
Round :  209    2021-05-21 01:38:56
train    2049952 words | loss 5.04392 | ppl 155.08
  valid 248.16310 | test 227.21026
--------------------------------------------------------------------------------
Round :  210    2021-05-21 01:40:39
train    2049952 words | loss 5.04200 | ppl 154.78
  valid 246.83076 | test 225.37102
--------------------------------------------------------------------------------
Round :  211    2021-

train    2049952 words | loss 4.98349 | ppl 145.98
  valid 248.67980 | test 228.00334
--------------------------------------------------------------------------------
Round :  248    2021-05-21 02:46:04
train    2049952 words | loss 4.98129 | ppl 145.66
  valid 248.79105 | test 228.99300
--------------------------------------------------------------------------------
Round :  249    2021-05-21 02:47:47
train    2049952 words | loss 4.98024 | ppl 145.51
  valid 245.88738 | test 226.32574
--------------------------------------------------------------------------------
Round :  250    2021-05-21 02:49:31
train    2049952 words | loss 4.97988 | ppl 145.46
  valid 247.48333 | test 226.96692
--------------------------------------------------------------------------------
Round :  251    2021-05-21 02:51:14
train    2049952 words | loss 4.97738 | ppl 145.09
  valid 242.68652 | test 221.99788
--------------------------------------------------------------------------------
Round :  252    2021-

train    2049952 words | loss 4.93195 | ppl 138.65
  valid 248.01684 | test 228.35353
--------------------------------------------------------------------------------
Round :  289    2021-05-21 03:56:38
train    2049952 words | loss 4.92988 | ppl 138.36
  valid 249.02545 | test 229.09364
--------------------------------------------------------------------------------
Round :  290    2021-05-21 03:58:21
train    2049952 words | loss 4.92765 | ppl 138.05
  valid 247.02658 | test 227.91992
--------------------------------------------------------------------------------
Round :  291    2021-05-21 04:00:04
train    2049952 words | loss 4.92736 | ppl 138.01
  valid 246.80384 | test 227.87186
--------------------------------------------------------------------------------
Round :  292    2021-05-21 04:01:47
train    2049952 words | loss 4.92526 | ppl 137.72
  valid 245.67828 | test 225.71810
--------------------------------------------------------------------------------
Round :  293    2021-

train    2049952 words | loss 4.89017 | ppl 132.98
  valid 247.29695 | test 227.25360
--------------------------------------------------------------------------------
Round :  330    2021-05-21 05:07:13
train    2049952 words | loss 4.88794 | ppl 132.68
  valid 248.66254 | test 228.85798
--------------------------------------------------------------------------------
Round :  331    2021-05-21 05:08:56
train    2049952 words | loss 4.88835 | ppl 132.73
  valid 247.70047 | test 228.00393
--------------------------------------------------------------------------------
Round :  332    2021-05-21 05:10:39
train    2049952 words | loss 4.88825 | ppl 132.72
  valid 251.36993 | test 231.71985
--------------------------------------------------------------------------------
Round :  333    2021-05-21 05:12:23
train    2049952 words | loss 4.88748 | ppl 132.62
  valid 249.17012 | test 229.45985
--------------------------------------------------------------------------------
Round :  334    2021-

train    2049952 words | loss 4.85729 | ppl 128.68
  valid 248.45268 | test 228.89182
--------------------------------------------------------------------------------
Round :  371    2021-05-21 06:17:46
train    2049952 words | loss 4.85577 | ppl 128.48
  valid 250.25097 | test 230.80333
--------------------------------------------------------------------------------
Round :  372    2021-05-21 06:19:29
train    2049952 words | loss 4.85595 | ppl 128.5
  valid 247.62017 | test 227.45680
--------------------------------------------------------------------------------
Round :  373    2021-05-21 06:21:13
train    2049952 words | loss 4.85393 | ppl 128.24
  valid 248.12012 | test 228.35530
--------------------------------------------------------------------------------
Round :  374    2021-05-21 06:22:56
train    2049952 words | loss 4.85296 | ppl 128.12
  valid 249.76187 | test 229.29006
--------------------------------------------------------------------------------
Round :  375    2021-0

train    2049952 words | loss 4.82882 | ppl 125.06
  valid 251.11003 | test 231.16487
--------------------------------------------------------------------------------
Round :  412    2021-05-21 07:28:21
train    2049952 words | loss 4.82660 | ppl 124.79
  valid 251.22383 | test 230.82159
--------------------------------------------------------------------------------
Round :  413    2021-05-21 07:30:04
train    2049952 words | loss 4.82737 | ppl 124.88
  valid 246.83263 | test 227.40025
--------------------------------------------------------------------------------
Round :  414    2021-05-21 07:31:48
train    2049952 words | loss 4.82669 | ppl 124.8
  valid 250.06309 | test 229.92954
--------------------------------------------------------------------------------
Round :  415    2021-05-21 07:33:31
train    2049952 words | loss 4.82641 | ppl 124.76
  valid 249.44822 | test 229.22121
--------------------------------------------------------------------------------
Round :  416    2021-0

train    2049952 words | loss 4.80515 | ppl 122.14
  valid 247.41335 | test 227.38591
--------------------------------------------------------------------------------
Round :  453    2021-05-21 08:38:56
train    2049952 words | loss 4.80411 | ppl 122.01
  valid 245.34815 | test 225.26376
--------------------------------------------------------------------------------
Round :  454    2021-05-21 08:40:39
train    2049952 words | loss 4.80220 | ppl 121.78
  valid 250.19903 | test 229.99100
--------------------------------------------------------------------------------
Round :  455    2021-05-21 08:42:23
train    2049952 words | loss 4.80348 | ppl 121.93
  valid 247.34786 | test 226.95836
--------------------------------------------------------------------------------
Round :  456    2021-05-21 08:44:06
train    2049952 words | loss 4.80371 | ppl 121.96
  valid 251.56759 | test 230.85464
--------------------------------------------------------------------------------
Round :  457    2021-

train    2049952 words | loss 4.78441 | ppl 119.63
  valid 249.89488 | test 229.60554
--------------------------------------------------------------------------------
Round :  494    2021-05-21 09:49:32
train    2049952 words | loss 4.78388 | ppl 119.57
  valid 249.77338 | test 229.74076
--------------------------------------------------------------------------------
Round :  495    2021-05-21 09:51:15
train    2049952 words | loss 4.78443 | ppl 119.63
  valid 248.08627 | test 228.14835
--------------------------------------------------------------------------------
Round :  496    2021-05-21 09:52:59
train    2049952 words | loss 4.78354 | ppl 119.53
  valid 248.01814 | test 228.38906
--------------------------------------------------------------------------------
Round :  497    2021-05-21 09:54:42
train    2049952 words | loss 4.78244 | ppl 119.39
  valid 248.92716 | test 229.55950
--------------------------------------------------------------------------------
Round :  498    2021-

train    2049952 words | loss 4.76538 | ppl 117.38
  valid 248.96069 | test 229.47194
--------------------------------------------------------------------------------
Round :  535    2021-05-21 10:55:13
train    2049952 words | loss 4.76369 | ppl 117.18
  valid 247.04358 | test 227.48384
--------------------------------------------------------------------------------
Round :  536    2021-05-21 10:56:45
train    2049952 words | loss 4.76288 | ppl 117.08
  valid 246.63725 | test 227.76081
--------------------------------------------------------------------------------
Round :  537    2021-05-21 10:58:19
train    2049952 words | loss 4.76310 | ppl 117.11
  valid 249.08092 | test 229.74528
--------------------------------------------------------------------------------
Round :  538    2021-05-21 10:59:51
train    2049952 words | loss 4.76225 | ppl 117.01
  valid 249.13104 | test 229.73934
--------------------------------------------------------------------------------
Round :  539    2021-

train    2049952 words | loss 4.74714 | ppl 115.25
  valid 250.55891 | test 231.03334
--------------------------------------------------------------------------------
Round :  576    2021-05-21 11:58:47
train    2049952 words | loss 4.74642 | ppl 115.17
  valid 248.55657 | test 229.29172
--------------------------------------------------------------------------------
Round :  577    2021-05-21 12:00:20
train    2049952 words | loss 4.74737 | ppl 115.28
  valid 248.89813 | test 228.87952
--------------------------------------------------------------------------------
Round :  578    2021-05-21 12:01:54
train    2049952 words | loss 4.74476 | ppl 114.98
  valid 249.83529 | test 229.83222
--------------------------------------------------------------------------------
Round :  579    2021-05-21 12:03:26
train    2049952 words | loss 4.74612 | ppl 115.14
  valid 249.03692 | test 229.57065
--------------------------------------------------------------------------------
Round :  580    2021-

train    2049952 words | loss 4.73269 | ppl 113.6
  valid 249.94705 | test 230.39186
--------------------------------------------------------------------------------
Round :  618    2021-05-21 13:03:55
train    2049952 words | loss 4.73187 | ppl 113.51
  valid 247.78267 | test 227.96363
--------------------------------------------------------------------------------
Round :  619    2021-05-21 13:05:27
train    2049952 words | loss 4.73100 | ppl 113.41
  valid 249.93102 | test 230.44332
--------------------------------------------------------------------------------
Round :  620    2021-05-21 13:07:00
train    2049952 words | loss 4.73013 | ppl 113.31
  valid 249.23071 | test 229.31791
--------------------------------------------------------------------------------
Round :  621    2021-05-21 13:08:32
train    2049952 words | loss 4.73038 | ppl 113.34
  valid 247.10682 | test 227.45021
--------------------------------------------------------------------------------
Round :  622    2021-0

train    2049952 words | loss 4.71645 | ppl 111.77
  valid 248.88347 | test 229.14077
--------------------------------------------------------------------------------
Round :  659    2021-05-21 14:07:32
train    2049952 words | loss 4.71659 | ppl 111.79
  valid 246.07801 | test 227.09531
--------------------------------------------------------------------------------
Round :  660    2021-05-21 14:09:06
train    2049952 words | loss 4.71682 | ppl 111.81
  valid 247.95437 | test 229.03076
--------------------------------------------------------------------------------
Round :  661    2021-05-21 14:10:39
train    2049952 words | loss 4.71637 | ppl 111.76
  valid 249.44866 | test 230.02060
--------------------------------------------------------------------------------
Round :  662    2021-05-21 14:12:12
train    2049952 words | loss 4.71667 | ppl 111.8
  valid 246.27409 | test 227.60016
--------------------------------------------------------------------------------
Round :  663    2021-0

train    2049952 words | loss 4.70532 | ppl 110.53
  valid 248.41516 | test 229.48784
--------------------------------------------------------------------------------
Round :  700    2021-05-21 15:11:06
train    2049952 words | loss 4.70598 | ppl 110.61
  valid 247.18003 | test 228.02338
--------------------------------------------------------------------------------
Round :  701    2021-05-21 15:12:39
train    2049952 words | loss 4.70616 | ppl 110.63
  valid 246.22843 | test 227.42682
--------------------------------------------------------------------------------
Round :  702    2021-05-21 15:14:13
train    2049952 words | loss 4.70475 | ppl 110.47
  valid 246.79463 | test 227.86856
--------------------------------------------------------------------------------
Round :  703    2021-05-21 15:15:46
train    2049952 words | loss 4.70500 | ppl 110.5
  valid 248.29829 | test 228.70756
--------------------------------------------------------------------------------
Round :  704    2021-0

train    2049952 words | loss 4.69434 | ppl 109.33
  valid 245.78331 | test 227.20991
--------------------------------------------------------------------------------
Round :  741    2021-05-21 16:14:46
train    2049952 words | loss 4.69369 | ppl 109.26
  valid 245.75092 | test 226.76628
--------------------------------------------------------------------------------
Round :  742    2021-05-21 16:16:19
train    2049952 words | loss 4.69426 | ppl 109.32
  valid 245.32340 | test 226.82560
--------------------------------------------------------------------------------
Round :  743    2021-05-21 16:17:53
train    2049952 words | loss 4.69380 | ppl 109.27
  valid 248.83994 | test 230.06103
--------------------------------------------------------------------------------
Round :  744    2021-05-21 16:19:26
train    2049952 words | loss 4.69401 | ppl 109.29
  valid 247.28325 | test 227.97058
--------------------------------------------------------------------------------
Round :  745    2021-

train    2049952 words | loss 4.68524 | ppl 108.34
  valid 247.18637 | test 228.08845
--------------------------------------------------------------------------------
Round :  782    2021-05-21 17:18:26
train    2049952 words | loss 4.68458 | ppl 108.26
  valid 244.74973 | test 226.18960
--------------------------------------------------------------------------------
Round :  783    2021-05-21 17:19:59
train    2049952 words | loss 4.68310 | ppl 108.1
  valid 247.67433 | test 229.50101
--------------------------------------------------------------------------------
Round :  784    2021-05-21 17:21:33
train    2049952 words | loss 4.68488 | ppl 108.3
  valid 245.83821 | test 227.14603
--------------------------------------------------------------------------------
Round :  785    2021-05-21 17:23:06
train    2049952 words | loss 4.68227 | ppl 108.02
  valid 245.87075 | test 227.31892
--------------------------------------------------------------------------------
Round :  786    2021-05

train    2049952 words | loss 4.67552 | ppl 107.29
  valid 246.86070 | test 228.17100
--------------------------------------------------------------------------------
Round :  823    2021-05-21 18:22:07
train    2049952 words | loss 4.67562 | ppl 107.3
  valid 245.05919 | test 226.96188
--------------------------------------------------------------------------------
Round :  824    2021-05-21 18:23:40
train    2049952 words | loss 4.67581 | ppl 107.32
  valid 247.43138 | test 229.01952
--------------------------------------------------------------------------------
Round :  825    2021-05-21 18:25:13
train    2049952 words | loss 4.67475 | ppl 107.21
  valid 244.15760 | test 225.81741
--------------------------------------------------------------------------------
Round :  826    2021-05-21 18:26:46
train    2049952 words | loss 4.67444 | ppl 107.17
  valid 248.35085 | test 229.89543
--------------------------------------------------------------------------------
Round :  827    2021-0

train    2049952 words | loss 4.66594 | ppl 106.26
  valid 245.04961 | test 226.83709
--------------------------------------------------------------------------------
Round :  864    2021-05-21 19:25:46
train    2049952 words | loss 4.66568 | ppl 106.24
  valid 245.33699 | test 226.99860
--------------------------------------------------------------------------------
Round :  865    2021-05-21 19:27:19
train    2049952 words | loss 4.66620 | ppl 106.29
  valid 248.19942 | test 228.87514
--------------------------------------------------------------------------------
Round :  866    2021-05-21 19:28:52
train    2049952 words | loss 4.66452 | ppl 106.11
  valid 245.93470 | test 227.71125
--------------------------------------------------------------------------------
Round :  867    2021-05-21 19:30:25
train    2049952 words | loss 4.66375 | ppl 106.03
  valid 246.23590 | test 228.23694
--------------------------------------------------------------------------------
Round :  868    2021-

train    2049952 words | loss 4.65612 | ppl 105.23
  valid 245.82574 | test 228.19860
--------------------------------------------------------------------------------
Round :  905    2021-05-21 20:29:24
train    2049952 words | loss 4.65524 | ppl 105.13
  valid 243.24089 | test 225.23407
--------------------------------------------------------------------------------
Round :  906    2021-05-21 20:30:57
train    2049952 words | loss 4.65572 | ppl 105.19
  valid 245.93757 | test 227.73514
--------------------------------------------------------------------------------
Round :  907    2021-05-21 20:32:30
train    2049952 words | loss 4.65555 | ppl 105.17
  valid 244.52206 | test 226.24912
--------------------------------------------------------------------------------
Round :  908    2021-05-21 20:34:03
train    2049952 words | loss 4.65605 | ppl 105.22
  valid 244.12742 | test 226.07131
--------------------------------------------------------------------------------
Round :  909    2021-

train    2049952 words | loss 4.64978 | ppl 104.56
  valid 246.87164 | test 228.25676
--------------------------------------------------------------------------------
Round :  946    2021-05-21 21:33:04
train    2049952 words | loss 4.64874 | ppl 104.45
  valid 245.44345 | test 227.74703
--------------------------------------------------------------------------------
Round :  947    2021-05-21 21:34:37
train    2049952 words | loss 4.64884 | ppl 104.46
  valid 244.91862 | test 227.50477
--------------------------------------------------------------------------------
Round :  948    2021-05-21 21:36:10
train    2049952 words | loss 4.64741 | ppl 104.31
  valid 245.63602 | test 228.02715
--------------------------------------------------------------------------------
Round :  949    2021-05-21 21:37:43
train    2049952 words | loss 4.64867 | ppl 104.45
  valid 243.45729 | test 225.86632
--------------------------------------------------------------------------------
Round :  950    2021-

train    2049952 words | loss 4.63946 | ppl 103.49
  valid 243.90616 | test 225.86048
--------------------------------------------------------------------------------
Round :  987    2021-05-21 22:36:43
train    2049952 words | loss 4.64056 | ppl 103.6
  valid 245.99092 | test 227.92746
--------------------------------------------------------------------------------
Round :  988    2021-05-21 22:38:15
train    2049952 words | loss 4.63876 | ppl 103.42
  valid 243.21522 | test 225.23570
--------------------------------------------------------------------------------
Round :  989    2021-05-21 22:39:49
train    2049952 words | loss 4.63837 | ppl 103.38
  valid 244.36284 | test 226.29029
--------------------------------------------------------------------------------
Round :  990    2021-05-21 22:41:22
train    2049952 words | loss 4.63837 | ppl 103.38
train    2049952 words | loss 4.63899 | ppl 103.44
  valid 245.38192 | test 227.81992
----------------------------------------------------

train    2049952 words | loss 4.63253 | ppl 102.77
  valid 244.65384 | test 227.55887
--------------------------------------------------------------------------------
Round :  1029    2021-05-21 23:41:50
train    2049952 words | loss 4.63263 | ppl 102.78
  valid 244.29517 | test 227.22283
--------------------------------------------------------------------------------
Round :  1030    2021-05-21 23:43:24
train    2049952 words | loss 4.63152 | ppl 102.67
  valid 246.07714 | test 229.02792
--------------------------------------------------------------------------------
Round :  1031    2021-05-21 23:44:57
train    2049952 words | loss 4.63134 | ppl 102.65
  valid 241.28058 | test 224.12727
--------------------------------------------------------------------------------
Round :  1032    2021-05-21 23:46:31
train    2049952 words | loss 4.63127 | ppl 102.64
  valid 243.80023 | test 226.62554
--------------------------------------------------------------------------------
Round :  1033    

  valid 244.71476 | test 227.29190
--------------------------------------------------------------------------------
Round :  1069    2021-05-22 00:43:56
train    2049952 words | loss 4.62353 | ppl 101.85
  valid 242.16419 | test 224.86949
--------------------------------------------------------------------------------
Round :  1070    2021-05-22 00:45:30
train    2049952 words | loss 4.62378 | ppl 101.88
  valid 242.36282 | test 225.53301
--------------------------------------------------------------------------------
Round :  1071    2021-05-22 00:47:02
train    2049952 words | loss 4.62199 | ppl 101.7
  valid 242.84620 | test 225.20077
--------------------------------------------------------------------------------
Round :  1072    2021-05-22 00:48:36
train    2049952 words | loss 4.62353 | ppl 101.85
  valid 245.95894 | test 228.90294
--------------------------------------------------------------------------------
Round :  1073    2021-05-22 00:50:09
train    2049952 words | loss 4.

train    2049952 words | loss 4.61701 | ppl 101.19
  valid 241.74080 | test 225.40586
--------------------------------------------------------------------------------
Round :  1110    2021-05-22 01:47:37
train    2049952 words | loss 4.61595 | ppl 101.08
  valid 242.95413 | test 226.06489
--------------------------------------------------------------------------------
Round :  1111    2021-05-22 01:49:10
train    2049952 words | loss 4.61501 | ppl 100.99
  valid 242.63162 | test 225.48102
--------------------------------------------------------------------------------
Round :  1112    2021-05-22 01:50:43
train    2049952 words | loss 4.61591 | ppl 101.08
  valid 243.28146 | test 226.21860
--------------------------------------------------------------------------------
Round :  1113    2021-05-22 01:52:16
train    2049952 words | loss 4.61575 | ppl 101.06
  valid 242.04392 | test 225.34691
--------------------------------------------------------------------------------
Round :  1114    

  valid 242.70096 | test 225.89201
--------------------------------------------------------------------------------
Round :  1150    2021-05-22 02:49:38
train    2049952 words | loss 4.60878 | ppl 100.36
  valid 244.34304 | test 227.18243
--------------------------------------------------------------------------------
Round :  1151    2021-05-22 02:51:11
train    2049952 words | loss 4.60843 | ppl 100.33
  valid 242.88203 | test 225.79126
--------------------------------------------------------------------------------
Round :  1152    2021-05-22 02:52:44
train    2049952 words | loss 4.60845 | ppl 100.33
  valid 242.12565 | test 225.15980
--------------------------------------------------------------------------------
Round :  1153    2021-05-22 02:54:16
train    2049952 words | loss 4.60886 | ppl 100.37
  valid 243.53922 | test 226.65448
--------------------------------------------------------------------------------
Round :  1154    2021-05-22 02:55:49
train    2049952 words | loss 4

train    2049952 words | loss 4.60025 | ppl 99.509
  valid 243.14390 | test 225.53814
--------------------------------------------------------------------------------
Round :  1191    2021-05-22 03:53:05
train    2049952 words | loss 4.60105 | ppl 99.589
  valid 245.29379 | test 228.36333
--------------------------------------------------------------------------------
Round :  1192    2021-05-22 03:54:37
train    2049952 words | loss 4.60099 | ppl 99.583
  valid 242.60715 | test 225.50165
--------------------------------------------------------------------------------
Round :  1193    2021-05-22 03:56:11
train    2049952 words | loss 4.60123 | ppl 99.607
  valid 243.47546 | test 226.34188
--------------------------------------------------------------------------------
Round :  1194    2021-05-22 03:57:44
train    2049952 words | loss 4.59941 | ppl 99.426
  valid 239.75888 | test 222.80583
--------------------------------------------------------------------------------
Round :  1195    

  valid 240.85395 | test 224.41245
--------------------------------------------------------------------------------
Round :  1231    2021-05-22 04:55:09
train    2049952 words | loss 4.59371 | ppl 98.861
  valid 243.72575 | test 226.20761
--------------------------------------------------------------------------------
Round :  1232    2021-05-22 04:56:42
train    2049952 words | loss 4.59306 | ppl 98.796
  valid 243.87103 | test 227.43669
--------------------------------------------------------------------------------
Round :  1233    2021-05-22 04:58:15
train    2049952 words | loss 4.59501 | ppl 98.989
  valid 243.46709 | test 226.66643
--------------------------------------------------------------------------------
Round :  1234    2021-05-22 04:59:47
train    2049952 words | loss 4.59447 | ppl 98.936
  valid 240.10235 | test 223.44455
--------------------------------------------------------------------------------
Round :  1235    2021-05-22 05:01:21
train    2049952 words | loss 4

train    2049952 words | loss 4.58678 | ppl 98.178
  valid 244.05568 | test 226.38050
--------------------------------------------------------------------------------
Round :  1272    2021-05-22 05:58:40
train    2049952 words | loss 4.58694 | ppl 98.193
  valid 241.07061 | test 224.81335
--------------------------------------------------------------------------------
Round :  1273    2021-05-22 06:00:13
train    2049952 words | loss 4.58896 | ppl 98.392
  valid 243.09130 | test 226.38178
--------------------------------------------------------------------------------
Round :  1274    2021-05-22 06:01:46
train    2049952 words | loss 4.58643 | ppl 98.143
  valid 241.87376 | test 225.05112
--------------------------------------------------------------------------------
Round :  1275    2021-05-22 06:03:19
train    2049952 words | loss 4.58772 | ppl 98.27
  valid 245.80653 | test 228.76325
--------------------------------------------------------------------------------
Round :  1276    2

  valid 243.78018 | test 227.17644
--------------------------------------------------------------------------------
Round :  1312    2021-05-22 07:00:40
train    2049952 words | loss 4.57948 | ppl 97.463
  valid 241.31026 | test 224.40804
--------------------------------------------------------------------------------
Round :  1313    2021-05-22 07:02:14
train    2049952 words | loss 4.57960 | ppl 97.476
  valid 241.61749 | test 225.31162
--------------------------------------------------------------------------------
Round :  1314    2021-05-22 07:03:47
train    2049952 words | loss 4.58041 | ppl 97.554
  valid 238.33278 | test 222.25002
--------------------------------------------------------------------------------
Round :  1315    2021-05-22 07:05:20
train    2049952 words | loss 4.58071 | ppl 97.583
  valid 242.93146 | test 225.96862
--------------------------------------------------------------------------------
Round :  1316    2021-05-22 07:06:53
train    2049952 words | loss 4

train    2049952 words | loss 4.57374 | ppl 96.906
  valid 239.68507 | test 223.59151
--------------------------------------------------------------------------------
Round :  1353    2021-05-22 08:04:20
train    2049952 words | loss 4.57477 | ppl 97.006
  valid 239.91621 | test 223.84569
--------------------------------------------------------------------------------
Round :  1354    2021-05-22 08:05:53
train    2049952 words | loss 4.57377 | ppl 96.908
  valid 240.45107 | test 223.99975
--------------------------------------------------------------------------------
Round :  1355    2021-05-22 08:07:26
train    2049952 words | loss 4.57451 | ppl 96.981
  valid 240.71664 | test 224.67511
--------------------------------------------------------------------------------
Round :  1356    2021-05-22 08:09:00
train    2049952 words | loss 4.57349 | ppl 96.881
  valid 238.77583 | test 222.33976
--------------------------------------------------------------------------------
Round :  1357    

  valid 241.31435 | test 224.73783
--------------------------------------------------------------------------------
Round :  1393    2021-05-22 09:06:25
train    2049952 words | loss 4.56721 | ppl 96.275
  valid 242.81632 | test 226.03182
--------------------------------------------------------------------------------
Round :  1394    2021-05-22 09:07:58
train    2049952 words | loss 4.56739 | ppl 96.293
  valid 240.15233 | test 223.74169
--------------------------------------------------------------------------------
Round :  1395    2021-05-22 09:09:31
train    2049952 words | loss 4.56744 | ppl 96.297
  valid 241.35037 | test 224.28140
--------------------------------------------------------------------------------
Round :  1396    2021-05-22 09:11:04
train    2049952 words | loss 4.56697 | ppl 96.252
  valid 241.17803 | test 224.91343
--------------------------------------------------------------------------------
Round :  1397    2021-05-22 09:12:38
train    2049952 words | loss 4

train    2049952 words | loss 4.56152 | ppl 95.729
  valid 241.49115 | test 224.96715
--------------------------------------------------------------------------------
Round :  1434    2021-05-22 10:10:07
train    2049952 words | loss 4.56078 | ppl 95.658
  valid 241.55029 | test 224.38843
--------------------------------------------------------------------------------
Round :  1435    2021-05-22 10:11:40
train    2049952 words | loss 4.56233 | ppl 95.807
  valid 239.97735 | test 224.03680
--------------------------------------------------------------------------------
Round :  1436    2021-05-22 10:13:13
train    2049952 words | loss 4.56004 | ppl 95.587
  valid 240.32183 | test 224.09031
--------------------------------------------------------------------------------
Round :  1437    2021-05-22 10:14:47
train    2049952 words | loss 4.56123 | ppl 95.701
  valid 241.22107 | test 224.77315
--------------------------------------------------------------------------------
Round :  1438    

  valid 239.41381 | test 222.93992
--------------------------------------------------------------------------------
Round :  1474    2021-05-22 11:12:11
train    2049952 words | loss 4.55586 | ppl 95.188
  valid 240.41728 | test 223.50046
--------------------------------------------------------------------------------
Round :  1475    2021-05-22 11:13:43
train    2049952 words | loss 4.55523 | ppl 95.129
  valid 240.08569 | test 223.24136
--------------------------------------------------------------------------------
Round :  1476    2021-05-22 11:15:15
train    2049952 words | loss 4.55540 | ppl 95.145
  valid 240.81126 | test 223.94175
--------------------------------------------------------------------------------
Round :  1477    2021-05-22 11:16:46
train    2049952 words | loss 4.55548 | ppl 95.153
  valid 240.85662 | test 223.36983
--------------------------------------------------------------------------------
Round :  1478    2021-05-22 11:18:19
train    2049952 words | loss 4

In [None]:
# torch.save(model.state_dict(), 'parameter-2.pkl')
# evaluate(test_data)

In [None]:
# if 1:
#     x = torch.ones((1,1),dtype=int)
#     x[0,0] = 1
#     print(model.embedding(x))

In [None]:
# import time
# for T in range(100):
#     print('Round : ',T,"  ",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
#     train()
#     print('  valid ',evaluate(val_data))
#     print('  test  ',evaluate(test_data))
#     print('-'*80)

In [None]:
# if 1:
#     x = torch.tensor([[1000,1],[100,1],[1000,1]],dtype=float)
#     y = torch.tensor([0,1,0])
#     print(criterion(x,y))

In [None]:
# -math.log(0.1)