# model

In [1]:
import torch
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda")

In [2]:
import torch
import torch.nn as nn
import math

class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x, rat):
        x = x + (self.pe[:x.size(0), :]) * rat
        return self.dropout(x)

class TFModule(nn.Module):
    def __init__(self, vocab_size, embed_size, num_layers, num_hiddens, num_heads, embeddropout_rate, dropout_rate, pos_ratio):
        super(TFModule, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.pos_embed = PositionalEncoding(embed_size, embeddropout_rate, 1025)
        self.pos_ratio = pos_ratio
        
        self.encoder_layer = nn.TransformerEncoderLayer(
                                        d_model=embed_size,
                                        dim_feedforward=num_hiddens,
                                        nhead=num_heads, 
                                        dropout=dropout_rate, 
                                        activation='relu')
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        
        self.decoder = nn.Linear(embed_size, vocab_size) # 初始时间步和最终时间步的隐藏状态作为全连接层输入

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask.to(device)
    
    def forward(self, inputs):
        
        word_emb = self.embedding(inputs.permute(1, 0))
        embeddings = self.pos_embed(word_emb, self.pos_ratio)
#         print(word_emb, '\n', embeddings-word_emb)
        
        # outputs形状是(长度, 批量大小, embeding大小)
        outputs = self.transformer_encoder(embeddings, mask=self._generate_square_subsequent_mask(embeddings.shape[0]))

        output = outputs.permute(1, 0, 2)
        outs = output.reshape(output.size(0)*output.size(1), output.size(2))
        ret = self.decoder(outs)
        return ret

In [3]:
# import torch
# import torch.nn as nn

In [4]:
# encoder_layer = nn.TransformerEncoderLayer(d_model=1, nhead=1, dropout=0, activation='relu')
# transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=1)

In [5]:
# src = torch.ones(3,1,1)
# out = encoder_layer(src)
# print(src.shape,out.shape)
# print(src)
# print(out)

In [6]:
# # dir(encoder_layer)
# vos = 53
# emb = 22
# numl = 3
# numh = 233
# numhea = 11
# dror = 0
# model = TFModule(vos, emb, numl, numh, numhea, dror)
# src = torch.ones((2,3),dtype=int)
# print(src.shape)
# out = model(src)
# print(out)

# data

In [7]:
import torch
from torchtext.datasets import WikiText2
from torchtext.data.utils import get_tokenizer
from collections import Counter
from torchtext.vocab import Vocab

In [8]:
class Corpus(object):
    def __init__(self, train_batch_size=20, eval_batch_size=10, bptt=35):
        self.bptt = bptt
        train_iter = WikiText2(split='train')
        self.tokenizer = get_tokenizer('basic_english')
        counter = Counter()
        for line in train_iter:
            counter.update(self.tokenizer(line))
        self.vocab = Vocab(counter)
        train_iter, val_iter, test_iter = WikiText2()
        train_data = self.data_process(train_iter)
        val_data = self.data_process(val_iter)
        test_data = self.data_process(test_iter)

        self.train_data = self.batchify(train_data, train_batch_size)
        self.val_data = self.batchify(val_data, eval_batch_size)
        self.test_data = self.batchify(test_data, eval_batch_size)

    def data_process(self, raw_text_iter):
        data = [torch.tensor([self.vocab[token] for token in self.tokenizer(item)],
                           dtype=torch.long) for item in raw_text_iter]
        return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))

    def batchify(self, data, batch_size):
        # Divide the dataset into batch_size parts.
        nbatch = data.size(0) // batch_size
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        data = data.narrow(0, 0, nbatch * batch_size)
        # Evenly divide the data across the batch_size batches.
        data = data.view(batch_size, -1).t().contiguous()
        return data.to(device)

    def get_batch(self, source, i):
        seq_len = min(self.bptt, len(source) - 1 - i)
        data = source[i:i+seq_len]
        target = source[i+1:i+1+seq_len].reshape(-1)
        return data, target

    def get_ntokens(self):
        return len(self.vocab.stoi)

# main

In [9]:
import argparse
import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable

import data
import model
class args():
    pos_ratio = 1
    emsize = 100
    nhid = 200
    nlayers = 3
    nhead = 20
    lr = 0.5
    epochs = 2000
    batch_size = 32
    bptt = 64
    embeddropout = 0.1
    dropout = 0.3
    tied = False
    seed = 1234
    clip = 0.005
    save = 'model.pt'

In [10]:
data_loader = Corpus(train_batch_size=args.batch_size,
                     eval_batch_size=args.batch_size,
                     bptt=args.bptt)

In [11]:
train_data = data_loader.train_data
val_data = data_loader.val_data
test_data = data_loader.test_data

In [12]:
def get_batch(source, i, evaluation=False):
    seq_len = min(args.bptt, len(source) - 1 - i)
#     data = Variable(source[i:i+seq_len], volatile=evaluation)
#     target = Variable(source[i+1:i+1+seq_len].view(-1))
    data = source[i:i+seq_len].t()
    tmp = source[i+1:i+1+seq_len].t()
#     print(data.shape,tmp.shape)
    target = tmp.reshape(-1)
    return data, target

In [13]:
# import torch
# torch.__version__
# print(train_data.size(0))
# for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
#     data, targets = get_batch(train_data, i)
#     print(data.shape,targets.shape,targets[-1])
#     if (batch>=2): break

In [14]:
len(data_loader.vocab.itos)

28783

In [15]:
ntokens = len(data_loader.vocab.itos)
# model = RNNModel(args.model, ntokens, args.emsize, args.nhid,
#         args.nlayers, args.dropout, args.tied)
model = TFModule(ntokens, args.emsize, args.nlayers, args.nhid, args.nhead, args.embeddropout, args.dropout, args.pos_ratio)

In [16]:
model = model.to(device)

In [17]:
model

TFModule(
  (embedding): Embedding(28783, 100)
  (pos_embed): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): _LinearWithBias(in_features=100, out_features=100, bias=True)
    )
    (linear1): Linear(in_features=100, out_features=200, bias=True)
    (dropout): Dropout(p=0.3, inplace=False)
    (linear2): Linear(in_features=200, out_features=100, bias=True)
    (norm1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.3, inplace=False)
    (dropout2): Dropout(p=0.3, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=100, out_features=100, bias=True)
        )
        (linear1): Linear(in_features=100, out_fe

In [18]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)

In [19]:
def clip_gradient(optimizer, grad_clip):
    for group in optimizer.param_groups:
        for param in group["params"]:
            if param.grad is not None:
                param.grad.data.clamp_(-grad_clip, grad_clip)

def evaluate(data_source):
    model.eval()
    total_loss = 0
    total_words = 0
    for batch, i in enumerate(range(0, data_source.size(0) - 1, args.bptt)):
        data, targets = get_batch(data_source, i)
        
        output = model(data)
        loss = criterion(output, targets)

        total_loss += loss.data * targets.shape[0]
        total_words += targets.shape[0]
        
#         if batch > 1: break
        
    total_loss = float(total_loss.to(torch.device('cpu')))
#     print(total_loss,total_words)
    return total_loss / total_words, math.exp(total_loss / total_words)

def train():
    model.train()
    total_loss = 0.
    total_words = 0
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        model.zero_grad()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, targets)
        loss.backward()
        clip_gradient(optimizer,args.clip)
        optimizer.step()

        total_loss += loss.data * targets.shape[0]
        total_words += targets.shape[0]
        
#         if batch > 10: break
        
#         if (batch % args.log_interval == 0 and batch > 0):
#             cur_loss = total_loss / args.log_interval
#             elapsed = time.time() - start_time
#             print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
#                     'loss {:5.2f} | ppl {:8.2f}'.format(
#                 233, batch, len(train_data) // args.bptt, args.lr,
#                 elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
#             total_loss = 0
#             start_time = time.time()

    cur_loss = total_loss / total_words
    print('train {:10d} words | loss {:5.5f} | ppl {:5.5}'.format(total_words, cur_loss, math.exp(cur_loss)))
    return cur_loss, math.exp(cur_loss)

In [20]:
import numpy as np
Losstrain = []
Lossval = []
Losstest = []

In [None]:
import time
for T in range(args.epochs):
    print('Round : ',T,"  ",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
#     train()
#     print('  valid ',evaluate(val_data))
#     print('  test  ',evaluate(test_data))
    trainl, trainp = train()
    vall, valp = evaluate(val_data)
    testl, testp = evaluate(test_data)
    
    Losstrain.append(trainp)
    Lossval.append(valp)
    Losstest.append(testp)
    np.save('model-2-try2-train',np.array(Losstrain))
    np.save('model-2-try2-val',np.array(Lossval))
    np.save('model-2-try2-test',np.array(Losstest))
    
    print('  valid {:5.5f} | test {:5.5f}'.format(valp, testp))
    print('-'*80)

Round :  0    2021-05-21 10:23:26
train    2049952 words | loss 6.84823 | ppl 942.21
  valid 551.15593 | test 516.56571
--------------------------------------------------------------------------------
Round :  1    2021-05-21 10:24:45
train    2049952 words | loss 6.42837 | ppl 619.17
  valid 456.10070 | test 424.63366
--------------------------------------------------------------------------------
Round :  2    2021-05-21 10:26:04
train    2049952 words | loss 6.28390 | ppl 535.87
  valid 409.29662 | test 378.34212
--------------------------------------------------------------------------------
Round :  3    2021-05-21 10:27:22
train    2049952 words | loss 6.18977 | ppl 487.73
  valid 378.91049 | test 348.56095
--------------------------------------------------------------------------------
Round :  4    2021-05-21 10:28:40
train    2049952 words | loss 6.11680 | ppl 453.41
  valid 356.65362 | test 326.85929
----------------------------------------------------------------------------

train    2049952 words | loss 5.28866 | ppl 198.08
  valid 251.35190 | test 225.68683
--------------------------------------------------------------------------------
Round :  42    2021-05-21 11:18:32
train    2049952 words | loss 5.27966 | ppl 196.3
  valid 252.43397 | test 226.55949
--------------------------------------------------------------------------------
Round :  43    2021-05-21 11:19:50
train    2049952 words | loss 5.27117 | ppl 194.64
  valid 252.02362 | test 226.33100
--------------------------------------------------------------------------------
Round :  44    2021-05-21 11:21:08
train    2049952 words | loss 5.26250 | ppl 192.96
  valid 252.57723 | test 226.44112
--------------------------------------------------------------------------------
Round :  45    2021-05-21 11:22:27
train    2049952 words | loss 5.25413 | ppl 191.35
  valid 252.19739 | test 225.32722
--------------------------------------------------------------------------------
Round :  46    2021-05-21 

train    2049952 words | loss 5.07555 | ppl 160.06
  valid 257.55196 | test 232.87931
--------------------------------------------------------------------------------
Round :  83    2021-05-21 12:12:19
train    2049952 words | loss 5.07293 | ppl 159.64
  valid 257.49190 | test 231.79089
--------------------------------------------------------------------------------
Round :  84    2021-05-21 12:13:39
train    2049952 words | loss 5.07186 | ppl 159.47
  valid 259.27366 | test 234.31317
--------------------------------------------------------------------------------
Round :  85    2021-05-21 12:14:58
train    2049952 words | loss 5.06593 | ppl 158.53
  valid 258.63675 | test 233.24059
--------------------------------------------------------------------------------
Round :  87    2021-05-21 12:17:36
train    2049952 words | loss 5.06391 | ppl 158.21
  valid 258.20728 | test 232.19825
--------------------------------------------------------------------------------
Round :  88    2021-05-21

train    2049952 words | loss 4.99777 | ppl 148.08
  valid 257.70925 | test 232.36911
--------------------------------------------------------------------------------
Round :  125    2021-05-21 13:07:31
train    2049952 words | loss 4.99636 | ppl 147.87
  valid 259.75290 | test 234.50001
--------------------------------------------------------------------------------
Round :  126    2021-05-21 13:08:50
train    2049952 words | loss 4.99595 | ppl 147.81
  valid 258.85174 | test 235.57533
--------------------------------------------------------------------------------
Round :  127    2021-05-21 13:10:09
train    2049952 words | loss 4.99446 | ppl 147.59
  valid 258.05330 | test 233.66379
--------------------------------------------------------------------------------
Round :  128    2021-05-21 13:11:27
train    2049952 words | loss 4.99309 | ppl 147.39
  valid 259.48404 | test 235.78841
--------------------------------------------------------------------------------
Round :  129    2021-

train    2049952 words | loss 4.96581 | ppl 143.43
  valid 254.58420 | test 230.55913
--------------------------------------------------------------------------------
Round :  166    2021-05-21 14:01:18
train    2049952 words | loss 4.96496 | ppl 143.3
  valid 255.22214 | test 233.60015
--------------------------------------------------------------------------------
Round :  167    2021-05-21 14:02:36
train    2049952 words | loss 4.96498 | ppl 143.31
  valid 255.92346 | test 231.02164
--------------------------------------------------------------------------------
Round :  168    2021-05-21 14:03:55
train    2049952 words | loss 4.96575 | ppl 143.42
  valid 255.83707 | test 233.11211
--------------------------------------------------------------------------------
Round :  169    2021-05-21 14:05:14
train    2049952 words | loss 4.96459 | ppl 143.25
  valid 253.29432 | test 228.99916
--------------------------------------------------------------------------------
Round :  170    2021-0

train    2049952 words | loss 4.94539 | ppl 140.53
  valid 245.76296 | test 224.79627
--------------------------------------------------------------------------------
Round :  207    2021-05-21 14:55:08
train    2049952 words | loss 4.94400 | ppl 140.33
  valid 248.80178 | test 225.11814
--------------------------------------------------------------------------------
Round :  208    2021-05-21 14:56:28
train    2049952 words | loss 4.94393 | ppl 140.32
  valid 246.56492 | test 226.48057
--------------------------------------------------------------------------------
Round :  209    2021-05-21 14:57:47
train    2049952 words | loss 4.94103 | ppl 139.91
  valid 247.99168 | test 224.44818
--------------------------------------------------------------------------------
Round :  210    2021-05-21 14:59:06
train    2049952 words | loss 4.94109 | ppl 139.92
  valid 247.39907 | test 226.30655
--------------------------------------------------------------------------------
Round :  211    2021-

train    2049952 words | loss 4.88436 | ppl 132.21
  valid 249.39004 | test 226.88364
--------------------------------------------------------------------------------
Round :  248    2021-05-21 15:49:00
train    2049952 words | loss 4.88486 | ppl 132.27
  valid 249.60580 | test 229.48357
--------------------------------------------------------------------------------
Round :  249    2021-05-21 15:50:19
train    2049952 words | loss 4.88193 | ppl 131.88
  valid 248.03839 | test 225.63247
--------------------------------------------------------------------------------
Round :  250    2021-05-21 15:51:38
train    2049952 words | loss 4.88055 | ppl 131.7
  valid 248.21013 | test 228.18798
--------------------------------------------------------------------------------
Round :  251    2021-05-21 15:52:56
train    2049952 words | loss 4.87840 | ppl 131.42
  valid 247.27749 | test 224.58895
--------------------------------------------------------------------------------
Round :  252    2021-0

train    2049952 words | loss 4.83855 | ppl 126.29
  valid 248.59006 | test 229.24124
--------------------------------------------------------------------------------
Round :  289    2021-05-21 16:42:46
train    2049952 words | loss 4.83707 | ppl 126.1
  valid 245.21785 | test 223.18979
--------------------------------------------------------------------------------
Round :  290    2021-05-21 16:44:05
train    2049952 words | loss 4.83718 | ppl 126.11
  valid 249.12842 | test 229.81155
--------------------------------------------------------------------------------
Round :  291    2021-05-21 16:45:24
train    2049952 words | loss 4.83557 | ppl 125.91
  valid 244.55842 | test 222.87321
--------------------------------------------------------------------------------
Round :  292    2021-05-21 16:46:43
train    2049952 words | loss 4.83521 | ppl 125.86
  valid 249.00846 | test 228.33252
--------------------------------------------------------------------------------
Round :  293    2021-0

train    2049952 words | loss 4.81072 | ppl 122.82
  valid 245.21714 | test 223.02533
--------------------------------------------------------------------------------
Round :  330    2021-05-21 17:36:33
train    2049952 words | loss 4.80989 | ppl 122.72
  valid 246.74182 | test 227.96681
--------------------------------------------------------------------------------
Round :  331    2021-05-21 17:37:52
train    2049952 words | loss 4.80923 | ppl 122.64
  valid 242.97722 | test 222.58493
--------------------------------------------------------------------------------
Round :  332    2021-05-21 17:39:10
train    2049952 words | loss 4.80880 | ppl 122.58
  valid 248.10913 | test 229.09079
--------------------------------------------------------------------------------
Round :  333    2021-05-21 17:40:29
train    2049952 words | loss 4.80775 | ppl 122.46
  valid 244.86364 | test 223.00043
--------------------------------------------------------------------------------
Round :  334    2021-

train    2049952 words | loss 4.79128 | ppl 120.46
  valid 248.22677 | test 229.34992
--------------------------------------------------------------------------------
Round :  371    2021-05-21 18:30:22
train    2049952 words | loss 4.79128 | ppl 120.45
  valid 244.36156 | test 222.76564
--------------------------------------------------------------------------------
Round :  372    2021-05-21 18:31:41
train    2049952 words | loss 4.79076 | ppl 120.39
  valid 247.51580 | test 228.78028
--------------------------------------------------------------------------------
Round :  373    2021-05-21 18:33:00
train    2049952 words | loss 4.78950 | ppl 120.24
  valid 242.39801 | test 221.24135
--------------------------------------------------------------------------------
Round :  374    2021-05-21 18:34:17
train    2049952 words | loss 4.78920 | ppl 120.21
  valid 247.93124 | test 229.14231
--------------------------------------------------------------------------------
Round :  375    2021-

train    2049952 words | loss 4.77144 | ppl 118.09
  valid 243.85824 | test 222.75976
--------------------------------------------------------------------------------
Round :  412    2021-05-21 19:24:10
train    2049952 words | loss 4.77130 | ppl 118.07
  valid 246.23116 | test 228.14033
--------------------------------------------------------------------------------
Round :  413    2021-05-21 19:25:29
train    2049952 words | loss 4.77102 | ppl 118.04
  valid 241.91142 | test 221.10451
--------------------------------------------------------------------------------
Round :  414    2021-05-21 19:26:48
train    2049952 words | loss 4.77006 | ppl 117.93
  valid 246.35725 | test 227.93924
--------------------------------------------------------------------------------
Round :  415    2021-05-21 19:28:07
train    2049952 words | loss 4.76998 | ppl 117.92
  valid 244.04558 | test 222.52464
--------------------------------------------------------------------------------
Round :  416    2021-

train    2049952 words | loss 4.76112 | ppl 116.88
  valid 247.96652 | test 229.22927
--------------------------------------------------------------------------------
Round :  453    2021-05-21 20:18:00
train    2049952 words | loss 4.76116 | ppl 116.88
  valid 241.05051 | test 219.61055
--------------------------------------------------------------------------------
Round :  454    2021-05-21 20:19:19
train    2049952 words | loss 4.76069 | ppl 116.83
  valid 246.57628 | test 228.35636
--------------------------------------------------------------------------------
Round :  455    2021-05-21 20:20:38
train    2049952 words | loss 4.76073 | ppl 116.83
  valid 243.97672 | test 221.55481
--------------------------------------------------------------------------------
Round :  456    2021-05-21 20:21:57
train    2049952 words | loss 4.76161 | ppl 116.93
  valid 248.36548 | test 229.62145
--------------------------------------------------------------------------------
Round :  457    2021-

train    2049952 words | loss 4.74977 | ppl 115.56
  valid 242.18425 | test 220.56982
--------------------------------------------------------------------------------
Round :  494    2021-05-21 21:11:52
train    2049952 words | loss 4.74946 | ppl 115.52
  valid 247.51320 | test 229.76606
--------------------------------------------------------------------------------
Round :  495    2021-05-21 21:13:11
train    2049952 words | loss 4.74875 | ppl 115.44
  valid 241.01144 | test 219.80941
--------------------------------------------------------------------------------
Round :  496    2021-05-21 21:14:30
train    2049952 words | loss 4.74942 | ppl 115.52
  valid 245.47594 | test 227.26500
--------------------------------------------------------------------------------
Round :  497    2021-05-21 21:15:48
train    2049952 words | loss 4.74979 | ppl 115.56
  valid 243.68184 | test 221.31113
--------------------------------------------------------------------------------
Round :  498    2021-

train    2049952 words | loss 4.74092 | ppl 114.54
  valid 244.67567 | test 227.07465
--------------------------------------------------------------------------------
Round :  535    2021-05-21 22:05:43
train    2049952 words | loss 4.73912 | ppl 114.33
  valid 240.78753 | test 219.28035
--------------------------------------------------------------------------------
Round :  536    2021-05-21 22:07:01
train    2049952 words | loss 4.73947 | ppl 114.37
  valid 246.08633 | test 228.11227
--------------------------------------------------------------------------------
Round :  537    2021-05-21 22:08:19
train    2049952 words | loss 4.73980 | ppl 114.41
  valid 244.12187 | test 222.21728
--------------------------------------------------------------------------------
Round :  538    2021-05-21 22:09:38
train    2049952 words | loss 4.73891 | ppl 114.31
  valid 247.23135 | test 229.38881
--------------------------------------------------------------------------------
Round :  539    2021-

train    2049952 words | loss 4.73928 | ppl 114.35
  valid 248.99815 | test 231.87825
--------------------------------------------------------------------------------
Round :  577    2021-05-21 23:00:53
train    2049952 words | loss 4.73803 | ppl 114.21
  valid 239.27773 | test 217.40886
--------------------------------------------------------------------------------
Round :  578    2021-05-21 23:02:13
train    2049952 words | loss 4.73869 | ppl 114.28
  valid 246.93830 | test 230.41152
--------------------------------------------------------------------------------
Round :  579    2021-05-21 23:03:32
train    2049952 words | loss 4.73876 | ppl 114.29
  valid 239.34052 | test 218.30886
--------------------------------------------------------------------------------
Round :  580    2021-05-21 23:04:51
train    2049952 words | loss 4.73897 | ppl 114.32
  valid 247.49141 | test 230.27495
--------------------------------------------------------------------------------
Round :  581    2021-

train    2049952 words | loss 4.73921 | ppl 114.34
  valid 242.16928 | test 220.38280
--------------------------------------------------------------------------------
Round :  618    2021-05-21 23:54:45
train    2049952 words | loss 4.74064 | ppl 114.51
  valid 248.17409 | test 230.70481
--------------------------------------------------------------------------------
Round :  619    2021-05-21 23:56:04
train    2049952 words | loss 4.74066 | ppl 114.51
  valid 240.32071 | test 219.24963
--------------------------------------------------------------------------------
Round :  620    2021-05-21 23:57:24
train    2049952 words | loss 4.74143 | ppl 114.6
  valid 252.03685 | test 235.50947
--------------------------------------------------------------------------------
Round :  621    2021-05-21 23:58:42
train    2049952 words | loss 4.74131 | ppl 114.58
  valid 242.82978 | test 221.32405
--------------------------------------------------------------------------------
Round :  622    2021-0

train    2049952 words | loss 4.74668 | ppl 115.2
  valid 249.94326 | test 233.79464
--------------------------------------------------------------------------------
Round :  659    2021-05-22 00:48:33
train    2049952 words | loss 4.74606 | ppl 115.13
  valid 242.53982 | test 220.77902
--------------------------------------------------------------------------------
Round :  660    2021-05-22 00:49:52
train    2049952 words | loss 4.74543 | ppl 115.06
  valid 251.76570 | test 235.21383
--------------------------------------------------------------------------------
Round :  661    2021-05-22 00:51:11
train    2049952 words | loss 4.74475 | ppl 114.98
  valid 243.31494 | test 221.35128
--------------------------------------------------------------------------------
Round :  662    2021-05-22 00:52:30
train    2049952 words | loss 4.74528 | ppl 115.04
  valid 250.61648 | test 234.30481
--------------------------------------------------------------------------------
Round :  663    2021-0

train    2049952 words | loss 4.73470 | ppl 113.83
  valid 242.41709 | test 220.76190
--------------------------------------------------------------------------------
Round :  700    2021-05-22 01:42:19
train    2049952 words | loss 4.73474 | ppl 113.83
  valid 251.13141 | test 235.35686
--------------------------------------------------------------------------------
Round :  701    2021-05-22 01:43:38
train    2049952 words | loss 4.73286 | ppl 113.62
  valid 242.60191 | test 221.21700
--------------------------------------------------------------------------------
Round :  702    2021-05-22 01:44:57
train    2049952 words | loss 4.73456 | ppl 113.81
  valid 251.60955 | test 235.78853
--------------------------------------------------------------------------------
Round :  703    2021-05-22 01:46:16
train    2049952 words | loss 4.73345 | ppl 113.69
  valid 243.33112 | test 221.79395
--------------------------------------------------------------------------------
Round :  704    2021-

train    2049952 words | loss 4.72708 | ppl 112.97
  valid 248.50339 | test 233.05404
--------------------------------------------------------------------------------
Round :  741    2021-05-22 02:36:09
train    2049952 words | loss 4.72670 | ppl 112.92
  valid 244.93990 | test 223.14583
--------------------------------------------------------------------------------
Round :  742    2021-05-22 02:37:27
train    2049952 words | loss 4.72735 | ppl 113.0
  valid 253.25046 | test 237.53935
--------------------------------------------------------------------------------
Round :  743    2021-05-22 02:38:46
train    2049952 words | loss 4.72650 | ppl 112.9
  valid 243.00868 | test 221.52549
--------------------------------------------------------------------------------
Round :  744    2021-05-22 02:40:05
train    2049952 words | loss 4.72682 | ppl 112.94
  valid 250.28877 | test 234.55844
--------------------------------------------------------------------------------
Round :  745    2021-05

train    2049952 words | loss 4.71972 | ppl 112.14
  valid 246.75118 | test 223.77871
--------------------------------------------------------------------------------
Round :  782    2021-05-22 03:30:01
train    2049952 words | loss 4.72201 | ppl 112.39
  valid 255.61429 | test 238.87444
--------------------------------------------------------------------------------
Round :  783    2021-05-22 03:31:19
train    2049952 words | loss 4.72140 | ppl 112.33
  valid 246.20043 | test 223.53362
--------------------------------------------------------------------------------
Round :  784    2021-05-22 03:32:37
train    2049952 words | loss 4.72090 | ppl 112.27
  valid 255.36133 | test 239.67677
--------------------------------------------------------------------------------
Round :  785    2021-05-22 03:33:56
train    2049952 words | loss 4.72052 | ppl 112.23
  valid 246.12852 | test 224.19563
--------------------------------------------------------------------------------
Round :  786    2021-

train    2049952 words | loss 4.72409 | ppl 112.63
  valid 257.95055 | test 241.48430
--------------------------------------------------------------------------------
Round :  823    2021-05-22 04:23:46
train    2049952 words | loss 4.72278 | ppl 112.48
  valid 248.02176 | test 225.10138
--------------------------------------------------------------------------------
Round :  824    2021-05-22 04:25:04
train    2049952 words | loss 4.72321 | ppl 112.53
  valid 254.34605 | test 238.46671
--------------------------------------------------------------------------------
Round :  825    2021-05-22 04:26:23
train    2049952 words | loss 4.72326 | ppl 112.53
  valid 249.88745 | test 226.47132
--------------------------------------------------------------------------------
Round :  826    2021-05-22 04:27:43
train    2049952 words | loss 4.72450 | ppl 112.67
  valid 257.99102 | test 241.88994
--------------------------------------------------------------------------------
Round :  827    2021-

train    2049952 words | loss 4.72071 | ppl 112.25
  valid 246.54853 | test 223.50600
--------------------------------------------------------------------------------
Round :  864    2021-05-22 05:17:33
train    2049952 words | loss 4.72139 | ppl 112.32
  valid 255.94421 | test 239.43977
--------------------------------------------------------------------------------
Round :  865    2021-05-22 05:18:52
train    2049952 words | loss 4.72010 | ppl 112.18
  valid 252.21592 | test 228.62955
--------------------------------------------------------------------------------
Round :  866    2021-05-22 05:20:11
train    2049952 words | loss 4.72002 | ppl 112.17
  valid 257.75163 | test 242.32282
--------------------------------------------------------------------------------
Round :  867    2021-05-22 05:21:30
train    2049952 words | loss 4.71868 | ppl 112.02
  valid 249.58251 | test 226.81704
--------------------------------------------------------------------------------
Round :  868    2021-

train    2049952 words | loss 4.71780 | ppl 111.92
  valid 255.15176 | test 239.50537
--------------------------------------------------------------------------------
Round :  905    2021-05-22 06:11:22
train    2049952 words | loss 4.71610 | ppl 111.73
  valid 246.83695 | test 224.18798
--------------------------------------------------------------------------------
Round :  906    2021-05-22 06:12:40
train    2049952 words | loss 4.71709 | ppl 111.84
  valid 256.63569 | test 240.66261
--------------------------------------------------------------------------------
Round :  907    2021-05-22 06:13:59
train    2049952 words | loss 4.71640 | ppl 111.77
  valid 247.51320 | test 224.41651
--------------------------------------------------------------------------------
Round :  908    2021-05-22 06:15:18
train    2049952 words | loss 4.71711 | ppl 111.84
  valid 254.55184 | test 239.29216
--------------------------------------------------------------------------------
Round :  909    2021-

train    2049952 words | loss 4.71542 | ppl 111.66
  valid 246.76456 | test 223.94487
--------------------------------------------------------------------------------
Round :  946    2021-05-22 07:05:10
train    2049952 words | loss 4.71651 | ppl 111.78
  valid 256.86415 | test 240.63027
--------------------------------------------------------------------------------
Round :  947    2021-05-22 07:06:29
train    2049952 words | loss 4.71493 | ppl 111.6
  valid 244.45377 | test 222.10727
--------------------------------------------------------------------------------
Round :  948    2021-05-22 07:07:48
train    2049952 words | loss 4.71516 | ppl 111.63
  valid 255.23703 | test 239.95842
--------------------------------------------------------------------------------
Round :  949    2021-05-22 07:09:06
train    2049952 words | loss 4.71481 | ppl 111.59
  valid 249.03126 | test 225.04693
--------------------------------------------------------------------------------
Round :  950    2021-0

train    2049952 words | loss 4.72004 | ppl 112.17
  valid 257.57810 | test 241.30125
--------------------------------------------------------------------------------
Round :  987    2021-05-22 07:58:55
train    2049952 words | loss 4.71876 | ppl 112.03
  valid 245.39064 | test 223.20455
--------------------------------------------------------------------------------
Round :  988    2021-05-22 08:00:13
train    2049952 words | loss 4.72053 | ppl 112.23
  valid 256.95464 | test 241.47082
--------------------------------------------------------------------------------
Round :  989    2021-05-22 08:01:32
train    2049952 words | loss 4.71917 | ppl 112.07
  valid 244.28178 | test 222.07857
--------------------------------------------------------------------------------
Round :  990    2021-05-22 08:02:52
train    2049952 words | loss 4.72169 | ppl 112.36
  valid 258.49984 | test 243.28873
--------------------------------------------------------------------------------
Round :  991    2021-

  valid 268.42907 | test 252.33026
--------------------------------------------------------------------------------
Round :  1027    2021-05-22 08:51:23
train    2049952 words | loss 4.74230 | ppl 114.7
  valid 252.68153 | test 228.08527
--------------------------------------------------------------------------------
Round :  1028    2021-05-22 08:52:42
train    2049952 words | loss 4.74492 | ppl 115.0
  valid 263.15461 | test 246.88810
--------------------------------------------------------------------------------
Round :  1029    2021-05-22 08:54:01
train    2049952 words | loss 4.74450 | ppl 114.95
  valid 250.06149 | test 224.94156
--------------------------------------------------------------------------------
Round :  1030    2021-05-22 08:55:18
train    2049952 words | loss 4.74640 | ppl 115.17
  valid 263.10367 | test 247.21004
--------------------------------------------------------------------------------
Round :  1031    2021-05-22 08:56:37
train    2049952 words | loss 4.7

train    2049952 words | loss 4.77267 | ppl 118.23
  valid 252.08859 | test 226.97314
--------------------------------------------------------------------------------
Round :  1068    2021-05-22 09:45:07
train    2049952 words | loss 4.77534 | ppl 118.55
  valid 269.95007 | test 253.40101
--------------------------------------------------------------------------------
Round :  1069    2021-05-22 09:46:26
train    2049952 words | loss 4.77327 | ppl 118.31
  valid 250.79776 | test 225.72475
--------------------------------------------------------------------------------
Round :  1070    2021-05-22 09:47:44
train    2049952 words | loss 4.77584 | ppl 118.61
  valid 270.40206 | test 254.59762
--------------------------------------------------------------------------------
Round :  1071    2021-05-22 09:49:03
train    2049952 words | loss 4.77385 | ppl 118.37
  valid 250.57907 | test 225.92599
--------------------------------------------------------------------------------
Round :  1072    

  valid 253.41842 | test 229.51631
--------------------------------------------------------------------------------
Round :  1108    2021-05-22 10:37:34
train    2049952 words | loss 4.81450 | ppl 123.28
  valid 267.62761 | test 250.78886
--------------------------------------------------------------------------------
Round :  1109    2021-05-22 10:38:53
train    2049952 words | loss 4.81754 | ppl 123.66
  valid 266.23439 | test 239.06443
--------------------------------------------------------------------------------
Round :  1110    2021-05-22 10:40:12
train    2049952 words | loss 4.82205 | ppl 124.22
  valid 267.69956 | test 250.76968
--------------------------------------------------------------------------------
Round :  1111    2021-05-22 10:41:31
train    2049952 words | loss 4.81693 | ppl 123.58
  valid 255.86870 | test 230.21687
--------------------------------------------------------------------------------
Round :  1112    2021-05-22 10:42:50
train    2049952 words | loss 4

In [None]:
# torch.save(model.state_dict(), 'parameter-2.pkl')
# evaluate(test_data)

In [None]:
# if 1:
#     x = torch.ones((1,1),dtype=int)
#     x[0,0] = 1
#     print(model.embedding(x))

In [None]:
# import time
# for T in range(100):
#     print('Round : ',T,"  ",time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
#     train()
#     print('  valid ',evaluate(val_data))
#     print('  test  ',evaluate(test_data))
#     print('-'*80)

In [None]:
# if 1:
#     x = torch.tensor([[1000,1],[100,1],[1000,1]],dtype=float)
#     y = torch.tensor([0,1,0])
#     print(criterion(x,y))

In [None]:
# -math.log(0.1)