# 螺旋桨RNA UPP预测 #

In [1]:
# 如果需要进行持久化安装, 需要使用持久化路径, 如下方代码示例:
# If a persistence installation is required, 
# you need to use the persistence path as the following: 
# !mkdir /home/aistudio/external-libraries
# !pip install beautifulsoup4 -t /home/aistudio/external-libraries

In [2]:
# 程序在/work/code目录下， 需先加入路径
import sys 
sys.path.append('/home/aistudio/work/code')
# fly_paddle是唯一需要直接调用的模块
# fly_paddle is the only module required for interactive sessions
import fly_paddle as fp

# args包括几乎所有需要的参数， 贯穿于几乎所有的程序调用中
# args由fp.parse_args2()根据任务初始化, 要用到的任务包括： ‘train', 'validate', 'predict'
# args is a structure storing most (if not all) parameters and used for most function calls.
# args is initialized by fp.parse_args2(), depending on the specific task, such as "train", "validate" "predict"
args, _ = fp.parse_args2('train')
print(fp.gwio.json_str(args.__dict__))
# 注： 根据不同的网络等等需要， args可能包含一些用不到的参数
# Attention: some parameters in args may not be used depending on the network etc.

{
   "action": "train",
   "argv": "-h",
   "verbose": 1,
   "resume": false,
   "load_dir": null,
   "save_dir": null,
   "save_level": 2,
   "save_grpby": ["epoch", "batch"],
   "log": "fly_paddle-May13.log",
   "data_dir": "data",
   "data_name": "predict",
   "data_suffix": ".pkl",
   "data_size": 0,
   "test_size": 0.1,
   "split_seed": null,
   "input_genre": "Seq",
   "input_fmt": "NLC",
   "seq_length": [0, 512, -1],
   "residue_fmt": "vector",
   "residue_nn": 0,
   "residue_dbn": false,
   "residue_attr": false,
   "residue_extra": false,
   "label_genre": "upp",
   "label_fmt": "NL",
   "label_tone": "none",
   "label_ntype": 2,
   "label_smooth": false,
   "net_src_file": "/home/aistudio/work/code/paddle_nets.py",
   "net": "lazylinear",
   "resnet": false,
   "act_fn": "relu",
   "norm_fn": "none",
   "norm_axis": -1,
   "dropout": 0.2,
   "feature_dim": 1,
   "embed_dim": 32,
   "embed_num": 1,
   "linear_num": 2,
   "linear_dim": [32],
   "linear_resnet": false,
   "conv

In [3]:
# 两种更新args的方法： 1） args.update(**dict), 2) args.[key] = value
# Two main ways to update values in args: 1) args.update(**dict), 2) args.[key] = value
args.update(data_dir='work/data', data_name='train', residue_dbn=True, residue_extra=True)

# 网络参数 （net parameters): 
# 网络的设计主要考虑了三个支配RNA碱基配对的因素： 
#    1) 来自于全部序列的排列组合（配分）竞争，用Attention机制来模拟
#    2）来自于线性大分子的一维序列限制， 用LSTM结构来模拟
#    3）来自于局部紧邻碱基的合作（比如，一个孤立的碱基对极不稳定）， 用1D Convolution来模拟
# 所以框架由以上三个模块组成， 并在输入和输出层加了1-3个线性层。除非特意说明， 所有的隐藏层的维度为32.
# 训练中发现高维度和深度的网络并不能给出更好的结果！
# Three main mechanisms directing RNA base pairing are taken into consideration for the 
# design of the network architecture. 
#   1) The combinatorial configurational space of attainable RNA base pairs, approximated by Attention Mechanism
#   2) The quasi-1D nature of unbranched, continuous RNA polymers, approximated by LSTM
#   3) The cooperativity of neighboring bases for stable base pairing, approximated by 1D Convolution
# Hence the neural net comprises of three main building blocks, with additional linear layers for the input and output. 
# The dimensions of all hidden layers are 32 unless noted otherwise.
# Larger and/or deeper nets gave similar, but no better, performances!
args.net='seq2seq_attnlstmconv1d'  # the net name defined in paddle_nets.py
# 输入模块由一个线性层组成
# The input block is a single linear feedforward layer
args.linear_num = 1 # the number of linear feedforward layers
# 三大处理模块 (the three main data-crunching blocks)
args.attn_num = 1 # the number of transformer encoder layers
args.lstm_num = 1 # the number of bidirectional lstm layers
args.conv1d_num = 1 # the number of 1D convolution layers
# 输出模块由三个线性层组成， 维度分别为32, 32, 2
# three linear layers for the final output, with dimensions of 32, 32, and 2, respectively
args.output_dim = [32, 32, 2]
# 如果序列被补长到同一长度， 对归一化的影响不清楚， 所以用batch_size=1
# If sequences are padded to the same length, such padding may interfere with normalization, hence batch_size=1 
args.norm_fn = 'layer' # layer normalization
args.batch_size = 1 # 1 is used in consideration of the layer norm above
# 最后递交用的损失函数选为softmax+bce, 也可以用 softmax+mse, 结果几乎一样
# The submitted results were trained with softmax+bce loss function. 
# Essentially the same results were obtained with softmax+mse loss function
args.loss_fn = ['softmax+bce'] # softmax is needed here as the final output has a dimension of 2
args.label_tone = 'soft' # soft label
args.loss_sqrt = True # sqrt(loss) is only necessary for softmax+mse
args.loss_padding = False # exclude padded residues from loss
# 需要运行fp.autoconfig_args()来消除参数的不一致性
# fp.autoconfig_args() is needed to resolve inconsistencies between parameters
args = fp.autoconfig_args(args)


In [4]:
# 建立和检测模型 （Get and inspect the model）
model = fp.get_model(args)
# 注： 最后的输出矩阵的维度为[N, L, 2]
# Note: the shape of the output is [N, L, 2]

2021-05-13 10:53:48,896 - INFO - Used net definition: [0;39;46m/home/aistudio/work/code/paddle_nets.py[0m
2021-05-13 10:53:48,986 - INFO - {'total_params': 36418, 'trainable_params': 36418}
2021-05-13 10:53:48,987 - INFO - Optimizer method: adam
2021-05-13 10:53:48,988 - INFO -    learning rate: 0.003
2021-05-13 10:53:48,988 - INFO -     lr_scheduler: reduced
2021-05-13 10:53:48,989 - INFO -     weight decay: none
2021-05-13 10:53:48,989 - INFO -          l1decay: 0.0001
2021-05-13 10:53:48,991 - INFO -          l2decay: 0.0001
2021-05-13 10:53:48,991 - INFO - Getting loss function: ['softmax+bce']


-------------------------------------------------------------------------------------------------------------------------------------
      Layer (type)                          Input Shape                                  Output Shape                   Param #    
   MyEmbeddingLayer-1                      [[2, 512, 10]]                                [2, 512, 10]                      0       
        Linear-1                           [[2, 512, 10]]                                [2, 512, 32]                     352      
         ReLU-1                            [[2, 512, 32]]                                [2, 512, 32]                      0       
       LayerNorm-1                         [[2, 512, 32]]                                [2, 512, 32]                     64       
        Dropout-1                          [[2, 512, 32]]                                [2, 512, 32]                      0       
     MyLinearTower-1                       [[2, 512, 10]]                 

In [5]:
# 读取数据. 提供的数据被转换成了一个dict, 存储为pickle文件. 
# 输入矩阵中最后两列的数据为linear_partition_c和linear_partition_v的预测结果
# read in data. The provided data are transfomed into a dict, which is saved as a pickle file
# the last two columns in the input matrix are the predictions of linear_partition_c and linear_partition_v
midata = fp.get_midata(args)
train_data, valid_data = fp.train_test_split(midata, test_size=0.1)

2021-05-13 10:53:54,736 - INFO - Loading data: work/data/train.pkl
2021-05-13 10:53:54,799 - INFO -    # of data: 5000,  max seqlen: 500, user seq_length: [0, 512, -1]
2021-05-13 10:53:54,800 - INFO -  residue fmt: vector, nn: 0, dbn: True, attr: False, genre: upp
2021-05-13 10:53:54,825 - INFO - Selected 5000 data sets with length range: [0, 512, -1]
2021-05-13 10:54:00,860 - INFO - Processing upp data...


In [6]:
# 训练模型， 最后的loss应该在[0.52, 0.53]区间内
# 每epoch需要五分钟左右(在CPU上)， 自然结束需要～20个epoch
# train the model, the final loss should be within 0.52 and 0.53.
# takes about 5 minutes to complete one epoch. 
# self-termination takes ~20 epochs
train_loss, valid_loss = fp.train(model, train_data, num_epochs=21, validate_callback = fp.func_partial(fp.validate_in_train, midata=valid_data, save_dir='./'))
# 注： 软标签的情况下不能得到0的交叉熵
# Note: zero cross-entropy is not possible with soft labels

2021-05-13 10:54:07,183 - INFO - Training, data size: 4500
2021-05-13 10:54:07,184 - INFO -          batch size: 1
2021-05-13 10:54:07,185 - INFO -             shuffle: True
2021-05-13 10:54:07,185 - INFO -        # of batches: 4500
2021-05-13 10:54:07,186 - INFO -      recap interval: 151
2021-05-13 10:54:07,186 - INFO -   validate interval: 450
2021-05-13 10:54:07,187 - INFO -         # of epochs: 21
2021-05-13 10:54:07,187 - INFO -        loss padding: False
2021-05-13 10:54:08,028 - INFO - Epoch/batch: 0/   0, ibatch:    0, loss: [0;36m0.8334[0m, std: 0.4388
2021-05-13 10:54:16,475 - INFO - loss: [0;32m0.8304[0m, std: 0.4166
2021-05-13 10:54:22,838 - INFO - Epoch/batch: 0/ 151, ibatch:  151, loss: [0;36m0.6754[0m, std: 0.6512
2021-05-13 10:54:29,126 - INFO - Epoch/batch: 0/ 302, ibatch:  302, loss: [0;36m0.5804[0m, std: 0.6641
2021-05-13 11:01:14,115 - INFO - loss: [0;32m0.5391[0m, std: 0.6125
2021-05-13 11:01:14,636 - INFO - Epoch/batch: 1/1812, ibatch: 6312, loss: [0;3

Epoch 54: ReduceOnPlateau set learning rate to 0.0024300000000000003.


2021-05-13 11:03:08,721 - INFO - loss: [0;32m0.5360[0m, std: 0.6108
2021-05-13 11:03:09,684 - INFO - Epoch/batch: 1/3624, ibatch: 8124, loss: [0;36m0.5452[0m, std: 0.6181
2021-05-13 11:03:15,831 - INFO - Epoch/batch: 1/3775, ibatch: 8275, loss: [0;36m0.5598[0m, std: 0.6385
2021-05-13 11:03:21,895 - INFO - Epoch/batch: 1/3926, ibatch: 8426, loss: [0;36m0.5343[0m, std: 0.6129
2021-05-13 11:03:36,723 - INFO - loss: [0;32m0.5340[0m, std: 0.6057
2021-05-13 11:03:36,741 - INFO - Saved model states in: earlystop_0.5340
2021-05-13 11:03:36,743 - INFO - Saved net python code: earlystop_0.5340/paddle_nets.py
2021-05-13 11:03:36,752 - INFO - Saved best model: earlystop_0.5340
2021-05-13 11:03:36,753 - INFO - Removing earlystop model: earlystop_0.5351
2021-05-13 11:03:37,964 - INFO - Epoch/batch: 1/4077, ibatch: 8577, loss: [0;36m0.5533[0m, std: 0.6344
2021-05-13 11:03:44,011 - INFO - Epoch/batch: 1/4228, ibatch: 8728, loss: [0;36m0.5616[0m, std: 0.6299
2021-05-13 11:03:50,188 - INFO

Epoch 82: ReduceOnPlateau set learning rate to 0.002187.


2021-05-13 11:07:44,854 - INFO - Epoch/batch: 2/3322, ibatch: 12322, loss: [0;36m0.5294[0m, std: 0.6157
2021-05-13 11:07:50,888 - INFO - Epoch/batch: 2/3473, ibatch: 12473, loss: [0;36m0.5570[0m, std: 0.6235
2021-05-13 11:08:05,917 - INFO - loss: [0;32m0.5335[0m, std: 0.6285
2021-05-13 11:08:06,937 - INFO - Epoch/batch: 2/3624, ibatch: 12624, loss: [0;36m0.5400[0m, std: 0.6211
2021-05-13 11:08:12,895 - INFO - Epoch/batch: 2/3775, ibatch: 12775, loss: [0;36m0.5449[0m, std: 0.6272
2021-05-13 11:08:19,167 - INFO - Epoch/batch: 2/3926, ibatch: 12926, loss: [0;36m0.5416[0m, std: 0.6182
2021-05-13 11:08:35,285 - INFO - loss: [0;32m0.5328[0m, std: 0.6016
2021-05-13 11:08:36,422 - INFO - Epoch/batch: 2/4077, ibatch: 13077, loss: [0;36m0.5526[0m, std: 0.6340
2021-05-13 11:08:42,406 - INFO - Epoch/batch: 2/4228, ibatch: 13228, loss: [0;36m0.5318[0m, std: 0.6078
2021-05-13 11:08:48,446 - INFO - Epoch/batch: 2/4379, ibatch: 13379, loss: [0;36m0.5086[0m, std: 0.6004
2021-05-13 1

Epoch 101: ReduceOnPlateau set learning rate to 0.0019683.


2021-05-13 11:10:55,334 - INFO - Epoch/batch: 3/1661, ibatch: 15161, loss: [0;36m0.5343[0m, std: 0.6123
2021-05-13 11:11:10,813 - INFO - loss: [0;32m0.5328[0m, std: 0.6166
2021-05-13 11:11:11,292 - INFO - Epoch/batch: 3/1812, ibatch: 15312, loss: [0;36m0.5261[0m, std: 0.6101
2021-05-13 11:11:17,743 - INFO - Epoch/batch: 3/1963, ibatch: 15463, loss: [0;36m0.5487[0m, std: 0.6273
2021-05-13 11:11:23,712 - INFO - Epoch/batch: 3/2114, ibatch: 15614, loss: [0;36m0.5305[0m, std: 0.6057
2021-05-13 11:11:39,424 - INFO - loss: [0;32m0.5359[0m, std: 0.6410
2021-05-13 11:11:40,002 - INFO - Epoch/batch: 3/2265, ibatch: 15765, loss: [0;36m0.5402[0m, std: 0.6206
2021-05-13 11:11:45,710 - INFO - Epoch/batch: 3/2416, ibatch: 15916, loss: [0;36m0.5300[0m, std: 0.6058
2021-05-13 11:11:51,884 - INFO - Epoch/batch: 3/2567, ibatch: 16067, loss: [0;36m0.5408[0m, std: 0.6197
2021-05-13 11:12:07,456 - INFO - loss: [0;32m0.5425[0m, std: 0.6680
2021-05-13 11:12:08,179 - INFO - Epoch/batch: 3/

Epoch 112: ReduceOnPlateau set learning rate to 0.00177147.


2021-05-13 11:12:43,371 - INFO - Epoch/batch: 3/3322, ibatch: 16822, loss: [0;36m0.5586[0m, std: 0.6344
2021-05-13 11:12:49,513 - INFO - Epoch/batch: 3/3473, ibatch: 16973, loss: [0;36m0.5165[0m, std: 0.6051
2021-05-13 11:13:04,611 - INFO - loss: [0;32m0.5348[0m, std: 0.6447
2021-05-13 11:13:05,647 - INFO - Epoch/batch: 3/3624, ibatch: 17124, loss: [0;36m0.5303[0m, std: 0.6150
2021-05-13 11:13:11,843 - INFO - Epoch/batch: 3/3775, ibatch: 17275, loss: [0;36m0.5384[0m, std: 0.6208
2021-05-13 11:13:18,390 - INFO - Epoch/batch: 3/3926, ibatch: 17426, loss: [0;36m0.5417[0m, std: 0.6198
2021-05-13 11:13:33,354 - INFO - loss: [0;32m0.5355[0m, std: 0.6491
2021-05-13 11:13:34,433 - INFO - Epoch/batch: 3/4077, ibatch: 17577, loss: [0;36m0.5405[0m, std: 0.6250
2021-05-13 11:13:40,576 - INFO - Epoch/batch: 3/4228, ibatch: 17728, loss: [0;36m0.5387[0m, std: 0.6175
2021-05-13 11:13:46,891 - INFO - Epoch/batch: 3/4379, ibatch: 17879, loss: [0;36m0.5263[0m, std: 0.6115
2021-05-13 1

Epoch 123: ReduceOnPlateau set learning rate to 0.0015943230000000001.


2021-05-13 11:14:43,011 - INFO - loss: [0;32m0.5325[0m, std: 0.5884
2021-05-13 11:14:43,166 - INFO - Epoch/batch: 4/ 453, ibatch: 18453, loss: [0;36m0.5415[0m, std: 0.6178
2021-05-13 11:14:49,677 - INFO - Epoch/batch: 4/ 604, ibatch: 18604, loss: [0;36m0.5439[0m, std: 0.6268
2021-05-13 11:14:55,896 - INFO - Epoch/batch: 4/ 755, ibatch: 18755, loss: [0;36m0.5471[0m, std: 0.6282
2021-05-13 11:15:12,126 - INFO - loss: [0;32m0.5310[0m, std: 0.6273
2021-05-13 11:15:12,148 - INFO - Saved model states in: earlystop_0.5310
2021-05-13 11:15:12,150 - INFO - Saved net python code: earlystop_0.5310/paddle_nets.py
2021-05-13 11:15:12,161 - INFO - Saved best model: earlystop_0.5310
2021-05-13 11:15:12,163 - INFO - Removing earlystop model: earlystop_0.5315
2021-05-13 11:15:12,443 - INFO - Epoch/batch: 4/ 906, ibatch: 18906, loss: [0;36m0.5342[0m, std: 0.6113
2021-05-13 11:15:18,829 - INFO - Epoch/batch: 4/1057, ibatch: 19057, loss: [0;36m0.5393[0m, std: 0.6218
2021-05-13 11:15:25,199 -

Epoch 134: ReduceOnPlateau set learning rate to 0.0014348907.


2021-05-13 11:16:22,420 - INFO - Epoch/batch: 4/2114, ibatch: 20114, loss: [0;36m0.5516[0m, std: 0.6298
2021-05-13 11:16:38,471 - INFO - loss: [0;32m0.5335[0m, std: 0.6292
2021-05-13 11:16:39,075 - INFO - Epoch/batch: 4/2265, ibatch: 20265, loss: [0;36m0.5352[0m, std: 0.6149
2021-05-13 11:16:45,222 - INFO - Epoch/batch: 4/2416, ibatch: 20416, loss: [0;36m0.5373[0m, std: 0.6130
2021-05-13 11:16:51,466 - INFO - Epoch/batch: 4/2567, ibatch: 20567, loss: [0;36m0.5410[0m, std: 0.6187
2021-05-13 11:17:06,922 - INFO - loss: [0;32m0.5296[0m, std: 0.6092
2021-05-13 11:17:06,940 - INFO - Saved model states in: earlystop_0.5296
2021-05-13 11:17:06,942 - INFO - Saved net python code: earlystop_0.5296/paddle_nets.py
2021-05-13 11:17:06,950 - INFO - Saved best model: earlystop_0.5296
2021-05-13 11:17:06,951 - INFO - Removing earlystop model: earlystop_0.5301
2021-05-13 11:17:07,732 - INFO - Epoch/batch: 4/2718, ibatch: 20718, loss: [0;36m0.5375[0m, std: 0.6126
2021-05-13 11:17:13,861 -

Epoch 156: ReduceOnPlateau set learning rate to 0.00129140163.


2021-05-13 11:20:15,765 - INFO - loss: [0;32m0.5344[0m, std: 0.6451
2021-05-13 11:20:15,996 - INFO - Epoch/batch: 5/ 906, ibatch: 23406, loss: [0;36m0.5144[0m, std: 0.6026
2021-05-13 11:20:22,290 - INFO - Epoch/batch: 5/1057, ibatch: 23557, loss: [0;36m0.5381[0m, std: 0.6193
2021-05-13 11:20:28,435 - INFO - Epoch/batch: 5/1208, ibatch: 23708, loss: [0;36m0.5393[0m, std: 0.6179
2021-05-13 11:20:44,729 - INFO - loss: [0;32m0.5320[0m, std: 0.6055
2021-05-13 11:20:45,047 - INFO - Epoch/batch: 5/1359, ibatch: 23859, loss: [0;36m0.5320[0m, std: 0.6133
2021-05-13 11:20:51,194 - INFO - Epoch/batch: 5/1510, ibatch: 24010, loss: [0;36m0.5305[0m, std: 0.6188
2021-05-13 11:20:57,435 - INFO - Epoch/batch: 5/1661, ibatch: 24161, loss: [0;36m0.5178[0m, std: 0.6017
2021-05-13 11:21:13,254 - INFO - loss: [0;32m0.5309[0m, std: 0.6023
2021-05-13 11:21:13,745 - INFO - Epoch/batch: 5/1812, ibatch: 24312, loss: [0;36m0.5276[0m, std: 0.6033
2021-05-13 11:21:19,719 - INFO - Epoch/batch: 5/

Epoch 167: ReduceOnPlateau set learning rate to 0.001162261467.


2021-05-13 11:21:55,013 - INFO - Epoch/batch: 5/2567, ibatch: 25067, loss: [0;36m0.5332[0m, std: 0.6127
2021-05-13 11:22:11,250 - INFO - loss: [0;32m0.5298[0m, std: 0.6014
2021-05-13 11:22:12,021 - INFO - Epoch/batch: 5/2718, ibatch: 25218, loss: [0;36m0.5518[0m, std: 0.6234
2021-05-13 11:22:18,115 - INFO - Epoch/batch: 5/2869, ibatch: 25369, loss: [0;36m0.5381[0m, std: 0.6154
2021-05-13 11:22:24,309 - INFO - Epoch/batch: 5/3020, ibatch: 25520, loss: [0;36m0.5292[0m, std: 0.6113
2021-05-13 11:22:40,147 - INFO - loss: [0;32m0.5316[0m, std: 0.6178
2021-05-13 11:22:40,935 - INFO - Epoch/batch: 5/3171, ibatch: 25671, loss: [0;36m0.5342[0m, std: 0.6165
2021-05-13 11:22:47,119 - INFO - Epoch/batch: 5/3322, ibatch: 25822, loss: [0;36m0.5495[0m, std: 0.6194
2021-05-13 11:22:52,932 - INFO - Epoch/batch: 5/3473, ibatch: 25973, loss: [0;36m0.5286[0m, std: 0.6140
2021-05-13 11:23:08,955 - INFO - loss: [0;32m0.5287[0m, std: 0.5958
2021-05-13 11:23:08,977 - INFO - Saved model sta

Epoch 178: ReduceOnPlateau set learning rate to 0.0010460353203000001.


2021-05-13 11:23:44,104 - INFO - Epoch/batch: 5/4228, ibatch: 26728, loss: [0;36m0.5367[0m, std: 0.6049
2021-05-13 11:23:50,317 - INFO - Epoch/batch: 5/4379, ibatch: 26879, loss: [0;36m0.5401[0m, std: 0.6200
2021-05-13 11:24:05,806 - INFO - loss: [0;32m0.5307[0m, std: 0.6333
2021-05-13 11:24:06,950 - INFO - Epoch 5 average training loss: [0;46m0.5351[0m std: 0.6146
2021-05-13 11:24:06,955 - INFO - Epoch 5 average validate loss: [0;46m0.5310[0m std: 0.6115
2021-05-13 11:24:08,780 - INFO - Epoch/batch: 6/   0, ibatch: 27000, loss: [0;36m0.5184[0m, std: 0.6090
2021-05-13 11:24:19,183 - INFO - loss: [0;32m0.5306[0m, std: 0.6328
2021-05-13 11:24:25,185 - INFO - Epoch/batch: 6/ 151, ibatch: 27151, loss: [0;36m0.5322[0m, std: 0.6119
2021-05-13 11:24:31,590 - INFO - Epoch/batch: 6/ 302, ibatch: 27302, loss: [0;36m0.5397[0m, std: 0.6177
2021-05-13 11:24:48,147 - INFO - loss: [0;32m0.5304[0m, std: 0.5910
2021-05-13 11:24:48,323 - INFO - Epoch/batch: 6/ 453, ibatch: 27453, los

Epoch 189: ReduceOnPlateau set learning rate to 0.0009414317882700001.


2021-05-13 11:25:45,629 - INFO - loss: [0;32m0.5309[0m, std: 0.5961
2021-05-13 11:25:46,016 - INFO - Epoch/batch: 6/1359, ibatch: 28359, loss: [0;36m0.5290[0m, std: 0.6117
2021-05-13 11:25:52,482 - INFO - Epoch/batch: 6/1510, ibatch: 28510, loss: [0;36m0.5483[0m, std: 0.6182
2021-05-13 11:25:58,345 - INFO - Epoch/batch: 6/1661, ibatch: 28661, loss: [0;36m0.5233[0m, std: 0.6159
2021-05-13 11:26:14,406 - INFO - loss: [0;32m0.5296[0m, std: 0.6153
2021-05-13 11:26:14,932 - INFO - Epoch/batch: 6/1812, ibatch: 28812, loss: [0;36m0.5222[0m, std: 0.6061
2021-05-13 11:26:21,022 - INFO - Epoch/batch: 6/1963, ibatch: 28963, loss: [0;36m0.5185[0m, std: 0.6054
2021-05-13 11:26:26,839 - INFO - Epoch/batch: 6/2114, ibatch: 29114, loss: [0;36m0.5356[0m, std: 0.6108
2021-05-13 11:26:42,964 - INFO - loss: [0;32m0.5324[0m, std: 0.6383
2021-05-13 11:26:43,557 - INFO - Epoch/batch: 6/2265, ibatch: 29265, loss: [0;36m0.5366[0m, std: 0.6178
2021-05-13 11:26:50,007 - INFO - Epoch/batch: 6/

Epoch 200: ReduceOnPlateau set learning rate to 0.0008472886094430002.


2021-05-13 11:27:24,533 - INFO - Epoch/batch: 6/3020, ibatch: 30020, loss: [0;36m0.5231[0m, std: 0.6095
2021-05-13 11:27:40,259 - INFO - loss: [0;32m0.5283[0m, std: 0.6141
2021-05-13 11:27:41,328 - INFO - Epoch/batch: 6/3171, ibatch: 30171, loss: [0;36m0.5479[0m, std: 0.6252
2021-05-13 11:27:48,198 - INFO - Epoch/batch: 6/3322, ibatch: 30322, loss: [0;36m0.5316[0m, std: 0.6091
2021-05-13 11:27:55,397 - INFO - Epoch/batch: 6/3473, ibatch: 30473, loss: [0;36m0.5374[0m, std: 0.6164
2021-05-13 11:28:12,219 - INFO - loss: [0;32m0.5293[0m, std: 0.6127
2021-05-13 11:28:13,188 - INFO - Epoch/batch: 6/3624, ibatch: 30624, loss: [0;36m0.5342[0m, std: 0.6066
2021-05-13 11:28:19,267 - INFO - Epoch/batch: 6/3775, ibatch: 30775, loss: [0;36m0.5336[0m, std: 0.6107
2021-05-13 11:28:25,360 - INFO - Epoch/batch: 6/3926, ibatch: 30926, loss: [0;36m0.5432[0m, std: 0.6095
2021-05-13 11:28:41,147 - INFO - loss: [0;32m0.5319[0m, std: 0.6336
2021-05-13 11:28:42,368 - INFO - Epoch/batch: 6/

Epoch 211: ReduceOnPlateau set learning rate to 0.0007625597484987002.


2021-05-13 11:29:23,073 - INFO - loss: [0;32m0.5306[0m, std: 0.6124
2021-05-13 11:29:29,575 - INFO - Epoch/batch: 7/ 151, ibatch: 31651, loss: [0;36m0.5510[0m, std: 0.6256
2021-05-13 11:29:35,788 - INFO - Epoch/batch: 7/ 302, ibatch: 31802, loss: [0;36m0.5297[0m, std: 0.6131
2021-05-13 11:29:52,229 - INFO - loss: [0;32m0.5292[0m, std: 0.6160
2021-05-13 11:29:52,349 - INFO - Epoch/batch: 7/ 453, ibatch: 31953, loss: [0;36m0.5481[0m, std: 0.6206
2021-05-13 11:29:58,483 - INFO - Epoch/batch: 7/ 604, ibatch: 32104, loss: [0;36m0.5373[0m, std: 0.6134
2021-05-13 11:30:04,538 - INFO - Epoch/batch: 7/ 755, ibatch: 32255, loss: [0;36m0.5161[0m, std: 0.6004
2021-05-13 11:30:20,312 - INFO - loss: [0;32m0.5288[0m, std: 0.6139
2021-05-13 11:30:20,571 - INFO - Epoch/batch: 7/ 906, ibatch: 32406, loss: [0;36m0.5331[0m, std: 0.6166
2021-05-13 11:30:26,284 - INFO - Epoch/batch: 7/1057, ibatch: 32557, loss: [0;36m0.5268[0m, std: 0.6039
2021-05-13 11:30:32,373 - INFO - Epoch/batch: 7/

Epoch 222: ReduceOnPlateau set learning rate to 0.0006863037736488302.


2021-05-13 11:31:17,601 - INFO - loss: [0;32m0.5290[0m, std: 0.6157
2021-05-13 11:31:18,175 - INFO - Epoch/batch: 7/1812, ibatch: 33312, loss: [0;36m0.5230[0m, std: 0.6102
2021-05-13 11:31:24,937 - INFO - Epoch/batch: 7/1963, ibatch: 33463, loss: [0;36m0.5269[0m, std: 0.6043
2021-05-13 11:31:32,103 - INFO - Epoch/batch: 7/2114, ibatch: 33614, loss: [0;36m0.5427[0m, std: 0.6097
2021-05-13 11:31:48,951 - INFO - loss: [0;32m0.5295[0m, std: 0.6220
2021-05-13 11:31:49,546 - INFO - Epoch/batch: 7/2265, ibatch: 33765, loss: [0;36m0.5254[0m, std: 0.6121
2021-05-13 11:31:55,714 - INFO - Epoch/batch: 7/2416, ibatch: 33916, loss: [0;36m0.5388[0m, std: 0.6223
2021-05-13 11:32:01,765 - INFO - Epoch/batch: 7/2567, ibatch: 34067, loss: [0;36m0.5343[0m, std: 0.6126
2021-05-13 11:32:17,215 - INFO - loss: [0;32m0.5277[0m, std: 0.5989
2021-05-13 11:32:17,255 - INFO - Saved model states in: earlystop_0.5277
2021-05-13 11:32:17,258 - INFO - Saved net python code: earlystop_0.5277/paddle_n

Epoch 233: ReduceOnPlateau set learning rate to 0.0006176733962839472.


2021-05-13 11:32:57,239 - INFO - Epoch/batch: 7/3473, ibatch: 34973, loss: [0;36m0.5323[0m, std: 0.6052
2021-05-13 11:33:12,889 - INFO - loss: [0;32m0.5289[0m, std: 0.6235
2021-05-13 11:33:13,790 - INFO - Epoch/batch: 7/3624, ibatch: 35124, loss: [0;36m0.5341[0m, std: 0.6137
2021-05-13 11:33:19,960 - INFO - Epoch/batch: 7/3775, ibatch: 35275, loss: [0;36m0.5428[0m, std: 0.6214
2021-05-13 11:33:25,797 - INFO - Epoch/batch: 7/3926, ibatch: 35426, loss: [0;36m0.5135[0m, std: 0.5948
2021-05-13 11:33:40,901 - INFO - loss: [0;32m0.5292[0m, std: 0.6237
2021-05-13 11:33:42,042 - INFO - Epoch/batch: 7/4077, ibatch: 35577, loss: [0;36m0.5366[0m, std: 0.6131
2021-05-13 11:33:48,016 - INFO - Epoch/batch: 7/4228, ibatch: 35728, loss: [0;36m0.5240[0m, std: 0.6079
2021-05-13 11:33:53,977 - INFO - Epoch/batch: 7/4379, ibatch: 35879, loss: [0;36m0.5339[0m, std: 0.6111
2021-05-13 11:34:08,653 - INFO - loss: [0;32m0.5278[0m, std: 0.5942
2021-05-13 11:34:09,717 - INFO - Epoch 7 average

Epoch 244: ReduceOnPlateau set learning rate to 0.0005559060566555524.


2021-05-13 11:34:55,463 - INFO - Epoch/batch: 8/ 604, ibatch: 36604, loss: [0;36m0.5252[0m, std: 0.6011
2021-05-13 11:35:01,696 - INFO - Epoch/batch: 8/ 755, ibatch: 36755, loss: [0;36m0.5264[0m, std: 0.6077
2021-05-13 11:35:17,407 - INFO - loss: [0;32m0.5278[0m, std: 0.6116
2021-05-13 11:35:17,669 - INFO - Epoch/batch: 8/ 906, ibatch: 36906, loss: [0;36m0.5341[0m, std: 0.6136
2021-05-13 11:35:23,884 - INFO - Epoch/batch: 8/1057, ibatch: 37057, loss: [0;36m0.5323[0m, std: 0.6157
2021-05-13 11:35:30,053 - INFO - Epoch/batch: 8/1208, ibatch: 37208, loss: [0;36m0.5374[0m, std: 0.6234
2021-05-13 11:35:45,906 - INFO - loss: [0;32m0.5293[0m, std: 0.6269
2021-05-13 11:35:46,247 - INFO - Epoch/batch: 8/1359, ibatch: 37359, loss: [0;36m0.5292[0m, std: 0.6098
2021-05-13 11:35:52,412 - INFO - Epoch/batch: 8/1510, ibatch: 37510, loss: [0;36m0.5361[0m, std: 0.6153
2021-05-13 11:35:58,656 - INFO - Epoch/batch: 8/1661, ibatch: 37661, loss: [0;36m0.5502[0m, std: 0.6238
2021-05-13 1

Epoch 255: ReduceOnPlateau set learning rate to 0.0005003154509899972.


2021-05-13 11:36:43,402 - INFO - loss: [0;32m0.5279[0m, std: 0.6093
2021-05-13 11:36:44,056 - INFO - Epoch/batch: 8/2265, ibatch: 38265, loss: [0;36m0.5195[0m, std: 0.5992
2021-05-13 11:36:50,188 - INFO - Epoch/batch: 8/2416, ibatch: 38416, loss: [0;36m0.5108[0m, std: 0.6008
2021-05-13 11:36:56,386 - INFO - Epoch/batch: 8/2567, ibatch: 38567, loss: [0;36m0.5483[0m, std: 0.6222
2021-05-13 11:37:12,021 - INFO - loss: [0;32m0.5275[0m, std: 0.6075
2021-05-13 11:37:12,042 - INFO - Saved model states in: earlystop_0.5275
2021-05-13 11:37:12,044 - INFO - Saved net python code: earlystop_0.5275/paddle_nets.py
2021-05-13 11:37:12,055 - INFO - Saved best model: earlystop_0.5275
2021-05-13 11:37:12,057 - INFO - Removing earlystop model: earlystop_0.5277
2021-05-13 11:37:12,868 - INFO - Epoch/batch: 8/2718, ibatch: 38718, loss: [0;36m0.5315[0m, std: 0.6225
2021-05-13 11:37:18,802 - INFO - Epoch/batch: 8/2869, ibatch: 38869, loss: [0;36m0.5282[0m, std: 0.6083
2021-05-13 11:37:24,949 -

Epoch 266: ReduceOnPlateau set learning rate to 0.00045028390589099747.


2021-05-13 11:38:21,761 - INFO - Epoch/batch: 8/3926, ibatch: 39926, loss: [0;36m0.5516[0m, std: 0.6232
2021-05-13 11:38:37,370 - INFO - loss: [0;32m0.5287[0m, std: 0.6097
2021-05-13 11:38:38,708 - INFO - Epoch/batch: 8/4077, ibatch: 40077, loss: [0;36m0.5368[0m, std: 0.6148
2021-05-13 11:38:44,803 - INFO - Epoch/batch: 8/4228, ibatch: 40228, loss: [0;36m0.5332[0m, std: 0.6045
2021-05-13 11:38:51,111 - INFO - Epoch/batch: 8/4379, ibatch: 40379, loss: [0;36m0.5315[0m, std: 0.6137
2021-05-13 11:39:05,508 - INFO - loss: [0;32m0.5274[0m, std: 0.6073
2021-05-13 11:39:05,527 - INFO - Saved model states in: earlystop_0.5274
2021-05-13 11:39:05,528 - INFO - Saved net python code: earlystop_0.5274/paddle_nets.py
2021-05-13 11:39:05,536 - INFO - Saved best model: earlystop_0.5274
2021-05-13 11:39:05,537 - INFO - Removing earlystop model: earlystop_0.5275
2021-05-13 11:39:06,650 - INFO - Epoch 8 average training loss: [0;46m0.5309[0m std: 0.6100
2021-05-13 11:39:06,655 - INFO - Epoc

Epoch 277: ReduceOnPlateau set learning rate to 0.0004052555153018977.


2021-05-13 11:40:22,184 - INFO - Epoch/batch: 9/1057, ibatch: 41557, loss: [0;36m0.5424[0m, std: 0.6199
2021-05-13 11:40:28,520 - INFO - Epoch/batch: 9/1208, ibatch: 41708, loss: [0;36m0.5179[0m, std: 0.5975
2021-05-13 11:40:44,329 - INFO - loss: [0;32m0.5280[0m, std: 0.6187
2021-05-13 11:40:44,744 - INFO - Epoch/batch: 9/1359, ibatch: 41859, loss: [0;36m0.5396[0m, std: 0.6165
2021-05-13 11:40:51,050 - INFO - Epoch/batch: 9/1510, ibatch: 42010, loss: [0;36m0.5358[0m, std: 0.6128
2021-05-13 11:40:57,870 - INFO - Epoch/batch: 9/1661, ibatch: 42161, loss: [0;36m0.5461[0m, std: 0.6147
2021-05-13 11:41:14,087 - INFO - loss: [0;32m0.5297[0m, std: 0.6293
2021-05-13 11:41:14,641 - INFO - Epoch/batch: 9/1812, ibatch: 42312, loss: [0;36m0.5329[0m, std: 0.6124
2021-05-13 11:41:20,745 - INFO - Epoch/batch: 9/1963, ibatch: 42463, loss: [0;36m0.5206[0m, std: 0.6011
2021-05-13 11:41:26,743 - INFO - Epoch/batch: 9/2114, ibatch: 42614, loss: [0;36m0.5282[0m, std: 0.6049
2021-05-13 1

Epoch 288: ReduceOnPlateau set learning rate to 0.00036472996377170795.


2021-05-13 11:42:11,587 - INFO - loss: [0;32m0.5274[0m, std: 0.6123
2021-05-13 11:42:12,480 - INFO - Epoch/batch: 9/2718, ibatch: 43218, loss: [0;36m0.5435[0m, std: 0.6221
2021-05-13 11:42:19,283 - INFO - Epoch/batch: 9/2869, ibatch: 43369, loss: [0;36m0.5196[0m, std: 0.6030
2021-05-13 11:42:26,321 - INFO - Epoch/batch: 9/3020, ibatch: 43520, loss: [0;36m0.5413[0m, std: 0.6143
2021-05-13 11:42:41,838 - INFO - loss: [0;32m0.5324[0m, std: 0.6456
2021-05-13 11:42:42,670 - INFO - Epoch/batch: 9/3171, ibatch: 43671, loss: [0;36m0.5199[0m, std: 0.6046
2021-05-13 11:42:48,653 - INFO - Epoch/batch: 9/3322, ibatch: 43822, loss: [0;36m0.5185[0m, std: 0.5880
2021-05-13 11:42:54,709 - INFO - Epoch/batch: 9/3473, ibatch: 43973, loss: [0;36m0.5350[0m, std: 0.6204
2021-05-13 11:43:10,507 - INFO - loss: [0;32m0.5292[0m, std: 0.6235
2021-05-13 11:43:11,719 - INFO - Epoch/batch: 9/3624, ibatch: 44124, loss: [0;36m0.5166[0m, std: 0.6047
2021-05-13 11:43:18,359 - INFO - Epoch/batch: 9/

Epoch 299: ReduceOnPlateau set learning rate to 0.00032825696739453717.


2021-05-13 11:43:53,887 - INFO - Epoch/batch: 9/4379, ibatch: 44879, loss: [0;36m0.5167[0m, std: 0.5919
2021-05-13 11:44:08,350 - INFO - loss: [0;32m0.5274[0m, std: 0.6139
2021-05-13 11:44:09,453 - INFO - Epoch 9 average training loss: [0;46m0.5301[0m std: 0.6094
2021-05-13 11:44:09,458 - INFO - Epoch 9 average validate loss: [0;46m0.5284[0m std: 0.6177
2021-05-13 11:44:11,339 - INFO - Epoch/batch: 10/   0, ibatch: 45000, loss: [0;36m0.5243[0m, std: 0.6075
2021-05-13 11:44:21,370 - INFO - loss: [0;32m0.5274[0m, std: 0.6147
2021-05-13 11:44:27,512 - INFO - Epoch/batch: 10/ 151, ibatch: 45151, loss: [0;36m0.5381[0m, std: 0.6153
2021-05-13 11:44:33,465 - INFO - Epoch/batch: 10/ 302, ibatch: 45302, loss: [0;36m0.5085[0m, std: 0.5913
2021-05-13 11:44:49,325 - INFO - loss: [0;32m0.5275[0m, std: 0.6173
2021-05-13 11:44:49,484 - INFO - Epoch/batch: 10/ 453, ibatch: 45453, loss: [0;36m0.5221[0m, std: 0.6026
2021-05-13 11:44:55,492 - INFO - Epoch/batch: 10/ 604, ibatch: 45604

Epoch 310: ReduceOnPlateau set learning rate to 0.00029543127065508344.


2021-05-13 11:45:52,904 - INFO - Epoch/batch: 10/1510, ibatch: 46510, loss: [0;36m0.5580[0m, std: 0.6268
2021-05-13 11:45:58,935 - INFO - Epoch/batch: 10/1661, ibatch: 46661, loss: [0;36m0.5300[0m, std: 0.6046
2021-05-13 11:46:14,598 - INFO - loss: [0;32m0.5278[0m, std: 0.6165
2021-05-13 11:46:15,166 - INFO - Epoch/batch: 10/1812, ibatch: 46812, loss: [0;36m0.5200[0m, std: 0.5972
2021-05-13 11:46:21,408 - INFO - Epoch/batch: 10/1963, ibatch: 46963, loss: [0;36m0.5207[0m, std: 0.6051
2021-05-13 11:46:27,859 - INFO - Epoch/batch: 10/2114, ibatch: 47114, loss: [0;36m0.5292[0m, std: 0.6060
2021-05-13 11:46:44,167 - INFO - loss: [0;32m0.5276[0m, std: 0.6184
2021-05-13 11:46:44,680 - INFO - Epoch/batch: 10/2265, ibatch: 47265, loss: [0;36m0.5077[0m, std: 0.5905
2021-05-13 11:46:50,559 - INFO - Epoch/batch: 10/2416, ibatch: 47416, loss: [0;36m0.5111[0m, std: 0.6008
2021-05-13 11:46:56,560 - INFO - Epoch/batch: 10/2567, ibatch: 47567, loss: [0;36m0.5298[0m, std: 0.6141
2021

Epoch 321: ReduceOnPlateau set learning rate to 0.0002658881435895751.


2021-05-13 11:47:41,154 - INFO - loss: [0;32m0.5280[0m, std: 0.6130
2021-05-13 11:47:42,059 - INFO - Epoch/batch: 10/3171, ibatch: 48171, loss: [0;36m0.5399[0m, std: 0.6131
2021-05-13 11:47:48,455 - INFO - Epoch/batch: 10/3322, ibatch: 48322, loss: [0;36m0.5160[0m, std: 0.5983
2021-05-13 11:47:54,489 - INFO - Epoch/batch: 10/3473, ibatch: 48473, loss: [0;36m0.5326[0m, std: 0.6142
2021-05-13 11:48:09,610 - INFO - loss: [0;32m0.5276[0m, std: 0.6158
2021-05-13 11:48:10,426 - INFO - Epoch/batch: 10/3624, ibatch: 48624, loss: [0;36m0.5328[0m, std: 0.6066
2021-05-13 11:48:16,458 - INFO - Epoch/batch: 10/3775, ibatch: 48775, loss: [0;36m0.5385[0m, std: 0.6149
2021-05-13 11:48:22,655 - INFO - Epoch/batch: 10/3926, ibatch: 48926, loss: [0;36m0.5431[0m, std: 0.6127
2021-05-13 11:48:37,313 - INFO - loss: [0;32m0.5281[0m, std: 0.6193
2021-05-13 11:48:38,400 - INFO - Epoch/batch: 10/4077, ibatch: 49077, loss: [0;36m0.5261[0m, std: 0.6043
2021-05-13 11:48:44,497 - INFO - Epoch/ba

Epoch 332: ReduceOnPlateau set learning rate to 0.0002392993292306176.


2021-05-13 11:49:30,400 - INFO - Epoch/batch: 11/ 302, ibatch: 49802, loss: [0;36m0.5159[0m, std: 0.5955
2021-05-13 11:49:46,670 - INFO - loss: [0;32m0.5288[0m, std: 0.6276
2021-05-13 11:49:46,784 - INFO - Epoch/batch: 11/ 453, ibatch: 49953, loss: [0;36m0.5215[0m, std: 0.6003
2021-05-13 11:49:52,871 - INFO - Epoch/batch: 11/ 604, ibatch: 50104, loss: [0;36m0.5241[0m, std: 0.6031
2021-05-13 11:49:58,860 - INFO - Epoch/batch: 11/ 755, ibatch: 50255, loss: [0;36m0.5152[0m, std: 0.5911
2021-05-13 11:50:14,604 - INFO - loss: [0;32m0.5282[0m, std: 0.6196
2021-05-13 11:50:14,825 - INFO - Epoch/batch: 11/ 906, ibatch: 50406, loss: [0;36m0.5092[0m, std: 0.5953
2021-05-13 11:50:21,113 - INFO - Epoch/batch: 11/1057, ibatch: 50557, loss: [0;36m0.5230[0m, std: 0.6075
2021-05-13 11:50:27,683 - INFO - Epoch/batch: 11/1208, ibatch: 50708, loss: [0;36m0.5321[0m, std: 0.6097
2021-05-13 11:50:43,230 - INFO - loss: [0;32m0.5274[0m, std: 0.5997
2021-05-13 11:50:43,607 - INFO - Epoch/ba

Epoch 343: ReduceOnPlateau set learning rate to 0.00021536939630755584.


2021-05-13 11:51:18,404 - INFO - Epoch/batch: 11/1963, ibatch: 51463, loss: [0;36m0.5177[0m, std: 0.5964
2021-05-13 11:51:24,671 - INFO - Epoch/batch: 11/2114, ibatch: 51614, loss: [0;36m0.5161[0m, std: 0.6001
2021-05-13 11:51:40,470 - INFO - loss: [0;32m0.5275[0m, std: 0.6097
2021-05-13 11:51:41,102 - INFO - Epoch/batch: 11/2265, ibatch: 51765, loss: [0;36m0.5248[0m, std: 0.6079
2021-05-13 11:51:47,548 - INFO - Epoch/batch: 11/2416, ibatch: 51916, loss: [0;36m0.5395[0m, std: 0.6079
2021-05-13 11:51:53,503 - INFO - Epoch/batch: 11/2567, ibatch: 52067, loss: [0;36m0.5178[0m, std: 0.6063
2021-05-13 11:52:09,115 - INFO - loss: [0;32m0.5292[0m, std: 0.6274
2021-05-13 11:52:09,861 - INFO - Epoch/batch: 11/2718, ibatch: 52218, loss: [0;36m0.5281[0m, std: 0.6064
2021-05-13 11:52:16,135 - INFO - Epoch/batch: 11/2869, ibatch: 52369, loss: [0;36m0.5343[0m, std: 0.6168
2021-05-13 11:52:22,042 - INFO - Epoch/batch: 11/3020, ibatch: 52520, loss: [0;36m0.5255[0m, std: 0.6075
2021

Epoch 354: ReduceOnPlateau set learning rate to 0.00019383245667680025.


2021-05-13 11:53:05,806 - INFO - loss: [0;32m0.5272[0m, std: 0.6033
2021-05-13 11:53:05,827 - INFO - Saved model states in: earlystop_0.5272
2021-05-13 11:53:05,829 - INFO - Saved net python code: earlystop_0.5272/paddle_nets.py
2021-05-13 11:53:05,839 - INFO - Saved best model: earlystop_0.5272
2021-05-13 11:53:05,839 - INFO - Removing earlystop model: earlystop_0.5273
2021-05-13 11:53:06,867 - INFO - Epoch/batch: 11/3624, ibatch: 53124, loss: [0;36m0.5458[0m, std: 0.6286
2021-05-13 11:53:12,804 - INFO - Epoch/batch: 11/3775, ibatch: 53275, loss: [0;36m0.5177[0m, std: 0.6034
2021-05-13 11:53:18,923 - INFO - Epoch/batch: 11/3926, ibatch: 53426, loss: [0;36m0.5414[0m, std: 0.6077
2021-05-13 11:53:33,855 - INFO - loss: [0;32m0.5273[0m, std: 0.6082
2021-05-13 11:53:34,979 - INFO - Epoch/batch: 11/4077, ibatch: 53577, loss: [0;36m0.5346[0m, std: 0.6128
2021-05-13 11:53:41,186 - INFO - Epoch/batch: 11/4228, ibatch: 53728, loss: [0;36m0.5430[0m, std: 0.6122
2021-05-13 11:53:47,

Epoch 365: ReduceOnPlateau set learning rate to 0.00017444921100912022.


2021-05-13 11:54:56,360 - INFO - Epoch/batch: 12/ 755, ibatch: 54755, loss: [0;36m0.5198[0m, std: 0.6011
2021-05-13 11:55:12,321 - INFO - loss: [0;32m0.5268[0m, std: 0.6163
2021-05-13 11:55:12,339 - INFO - Saved model states in: earlystop_0.5268
2021-05-13 11:55:12,340 - INFO - Saved net python code: earlystop_0.5268/paddle_nets.py
2021-05-13 11:55:12,348 - INFO - Saved best model: earlystop_0.5268
2021-05-13 11:55:12,349 - INFO - Removing earlystop model: earlystop_0.5270.1
2021-05-13 11:55:12,544 - INFO - Epoch/batch: 12/ 906, ibatch: 54906, loss: [0;36m0.5275[0m, std: 0.6025
2021-05-13 11:55:18,703 - INFO - Epoch/batch: 12/1057, ibatch: 55057, loss: [0;36m0.5260[0m, std: 0.6000
2021-05-13 11:55:24,307 - INFO - Epoch/batch: 12/1208, ibatch: 55208, loss: [0;36m0.5353[0m, std: 0.6050
2021-05-13 11:55:39,667 - INFO - loss: [0;32m0.5276[0m, std: 0.6190
2021-05-13 11:55:40,019 - INFO - Epoch/batch: 12/1359, ibatch: 55359, loss: [0;36m0.5220[0m, std: 0.5952
2021-05-13 11:55:4

Epoch 376: ReduceOnPlateau set learning rate to 0.0001570042899082082.


2021-05-13 11:56:43,904 - INFO - Epoch/batch: 12/2416, ibatch: 56416, loss: [0;36m0.5297[0m, std: 0.6095
2021-05-13 11:56:50,166 - INFO - Epoch/batch: 12/2567, ibatch: 56567, loss: [0;36m0.5262[0m, std: 0.6066
2021-05-13 11:57:05,482 - INFO - loss: [0;32m0.5265[0m, std: 0.6096
2021-05-13 11:57:05,500 - INFO - Saved model states in: earlystop_0.5265
2021-05-13 11:57:05,502 - INFO - Saved net python code: earlystop_0.5265/paddle_nets.py
2021-05-13 11:57:05,509 - INFO - Saved best model: earlystop_0.5265
2021-05-13 11:57:05,510 - INFO - Removing earlystop model: earlystop_0.5267
2021-05-13 11:57:06,253 - INFO - Epoch/batch: 12/2718, ibatch: 56718, loss: [0;36m0.5324[0m, std: 0.6187
2021-05-13 11:57:12,537 - INFO - Epoch/batch: 12/2869, ibatch: 56869, loss: [0;36m0.5371[0m, std: 0.6172
2021-05-13 11:57:18,509 - INFO - Epoch/batch: 12/3020, ibatch: 57020, loss: [0;36m0.5273[0m, std: 0.5988
2021-05-13 11:57:33,927 - INFO - loss: [0;32m0.5269[0m, std: 0.5963
2021-05-13 11:57:34,

Epoch 387: ReduceOnPlateau set learning rate to 0.0001413038609173874.


2021-05-13 11:58:29,587 - INFO - loss: [0;32m0.5266[0m, std: 0.6081
2021-05-13 11:58:30,865 - INFO - Epoch/batch: 12/4077, ibatch: 58077, loss: [0;36m0.5413[0m, std: 0.6159
2021-05-13 11:58:37,169 - INFO - Epoch/batch: 12/4228, ibatch: 58228, loss: [0;36m0.5187[0m, std: 0.5941
2021-05-13 11:58:43,218 - INFO - Epoch/batch: 12/4379, ibatch: 58379, loss: [0;36m0.5123[0m, std: 0.6018
2021-05-13 11:58:57,957 - INFO - loss: [0;32m0.5280[0m, std: 0.6244
2021-05-13 11:58:59,165 - INFO - Epoch 12 average training loss: [0;46m0.5283[0m std: 0.6073
2021-05-13 11:58:59,170 - INFO - Epoch 12 average validate loss: [0;46m0.5271[0m std: 0.6133
2021-05-13 11:59:00,896 - INFO - Epoch/batch: 13/   0, ibatch: 58500, loss: [0;36m0.5191[0m, std: 0.6008
2021-05-13 11:59:10,783 - INFO - loss: [0;32m0.5281[0m, std: 0.6248
2021-05-13 11:59:17,067 - INFO - Epoch/batch: 13/ 151, ibatch: 58651, loss: [0;36m0.5426[0m, std: 0.6293
2021-05-13 11:59:23,359 - INFO - Epoch/batch: 13/ 302, ibatch: 58

Epoch 408: ReduceOnPlateau set learning rate to 0.00012717347482564865.


2021-05-13 12:02:04,198 - INFO - loss: [0;32m0.5272[0m, std: 0.6139
2021-05-13 12:02:04,984 - INFO - Epoch/batch: 13/2718, ibatch: 61218, loss: [0;36m0.5393[0m, std: 0.6207
2021-05-13 12:02:11,555 - INFO - Epoch/batch: 13/2869, ibatch: 61369, loss: [0;36m0.5449[0m, std: 0.6204
2021-05-13 12:02:17,590 - INFO - Epoch/batch: 13/3020, ibatch: 61520, loss: [0;36m0.5290[0m, std: 0.6145
2021-05-13 12:02:33,612 - INFO - loss: [0;32m0.5275[0m, std: 0.6159
2021-05-13 12:02:34,402 - INFO - Epoch/batch: 13/3171, ibatch: 61671, loss: [0;36m0.5225[0m, std: 0.5964
2021-05-13 12:02:40,358 - INFO - Epoch/batch: 13/3322, ibatch: 61822, loss: [0;36m0.5256[0m, std: 0.6078
2021-05-13 12:02:46,324 - INFO - Epoch/batch: 13/3473, ibatch: 61973, loss: [0;36m0.5371[0m, std: 0.6092
2021-05-13 12:03:01,568 - INFO - loss: [0;32m0.5273[0m, std: 0.6155
2021-05-13 12:03:02,616 - INFO - Epoch/batch: 13/3624, ibatch: 62124, loss: [0;36m0.5274[0m, std: 0.6054
2021-05-13 12:03:08,821 - INFO - Epoch/ba

Epoch 419: ReduceOnPlateau set learning rate to 0.00011445612734308378.


2021-05-13 12:03:43,124 - INFO - Epoch/batch: 13/4379, ibatch: 62879, loss: [0;36m0.5330[0m, std: 0.6075
2021-05-13 12:03:57,901 - INFO - loss: [0;32m0.5266[0m, std: 0.6080
2021-05-13 12:03:58,950 - INFO - Epoch 13 average training loss: [0;46m0.5282[0m std: 0.6077
2021-05-13 12:03:58,955 - INFO - Epoch 13 average validate loss: [0;46m0.5273[0m std: 0.6164
2021-05-13 12:04:00,805 - INFO - Epoch/batch: 14/   0, ibatch: 63000, loss: [0;36m0.5374[0m, std: 0.6115
2021-05-13 12:04:11,091 - INFO - loss: [0;32m0.5266[0m, std: 0.6083
2021-05-13 12:04:17,528 - INFO - Epoch/batch: 14/ 151, ibatch: 63151, loss: [0;36m0.5306[0m, std: 0.6145
2021-05-13 12:04:24,427 - INFO - Epoch/batch: 14/ 302, ibatch: 63302, loss: [0;36m0.5192[0m, std: 0.6012
2021-05-13 12:04:42,115 - INFO - loss: [0;32m0.5270[0m, std: 0.6159
2021-05-13 12:04:42,245 - INFO - Epoch/batch: 14/ 453, ibatch: 63453, loss: [0;36m0.5470[0m, std: 0.6146
2021-05-13 12:04:47,885 - INFO - Epoch/batch: 14/ 604, ibatch: 63

Epoch 430: ReduceOnPlateau set learning rate to 0.00010301051460877541.


2021-05-13 12:05:44,884 - INFO - Epoch/batch: 14/1510, ibatch: 64510, loss: [0;36m0.5412[0m, std: 0.6159
2021-05-13 12:05:50,811 - INFO - Epoch/batch: 14/1661, ibatch: 64661, loss: [0;36m0.5237[0m, std: 0.5978
2021-05-13 12:06:06,219 - INFO - loss: [0;32m0.5276[0m, std: 0.6183
2021-05-13 12:06:06,705 - INFO - Epoch/batch: 14/1812, ibatch: 64812, loss: [0;36m0.5198[0m, std: 0.5996
2021-05-13 12:06:12,560 - INFO - Epoch/batch: 14/1963, ibatch: 64963, loss: [0;36m0.5072[0m, std: 0.5924
2021-05-13 12:06:18,245 - INFO - Epoch/batch: 14/2114, ibatch: 65114, loss: [0;36m0.5323[0m, std: 0.6056
2021-05-13 12:06:34,208 - INFO - loss: [0;32m0.5268[0m, std: 0.6081
2021-05-13 12:06:34,805 - INFO - Epoch/batch: 14/2265, ibatch: 65265, loss: [0;36m0.5342[0m, std: 0.6070
2021-05-13 12:06:41,017 - INFO - Epoch/batch: 14/2416, ibatch: 65416, loss: [0;36m0.5411[0m, std: 0.6180
2021-05-13 12:06:47,179 - INFO - Epoch/batch: 14/2567, ibatch: 65567, loss: [0;36m0.5330[0m, std: 0.6068
2021

Epoch 441: ReduceOnPlateau set learning rate to 9.270946314789788e-05.


2021-05-13 12:07:30,500 - INFO - loss: [0;32m0.5268[0m, std: 0.6153
2021-05-13 12:07:31,386 - INFO - Epoch/batch: 14/3171, ibatch: 66171, loss: [0;36m0.5180[0m, std: 0.5996
2021-05-13 12:07:37,904 - INFO - Epoch/batch: 14/3322, ibatch: 66322, loss: [0;36m0.5358[0m, std: 0.6064
2021-05-13 12:07:43,807 - INFO - Epoch/batch: 14/3473, ibatch: 66473, loss: [0;36m0.5432[0m, std: 0.6276
2021-05-13 12:07:59,000 - INFO - loss: [0;32m0.5269[0m, std: 0.6114
2021-05-13 12:07:59,919 - INFO - Epoch/batch: 14/3624, ibatch: 66624, loss: [0;36m0.5245[0m, std: 0.6023
2021-05-13 12:08:06,113 - INFO - Epoch/batch: 14/3775, ibatch: 66775, loss: [0;36m0.4950[0m, std: 0.5821
2021-05-13 12:08:12,583 - INFO - Epoch/batch: 14/3926, ibatch: 66926, loss: [0;36m0.5399[0m, std: 0.6233
2021-05-13 12:08:27,698 - INFO - loss: [0;32m0.5272[0m, std: 0.6114
2021-05-13 12:08:28,938 - INFO - Epoch/batch: 14/4077, ibatch: 67077, loss: [0;36m0.5265[0m, std: 0.6105
2021-05-13 12:08:35,102 - INFO - Epoch/ba

Epoch 452: ReduceOnPlateau set learning rate to 8.343851683310809e-05.


2021-05-13 12:09:21,155 - INFO - Epoch/batch: 15/ 302, ibatch: 67802, loss: [0;36m0.5174[0m, std: 0.6015
2021-05-13 12:09:37,010 - INFO - loss: [0;32m0.5273[0m, std: 0.6177
2021-05-13 12:09:37,124 - INFO - Epoch/batch: 15/ 453, ibatch: 67953, loss: [0;36m0.5179[0m, std: 0.5923
2021-05-13 12:09:43,075 - INFO - Epoch/batch: 15/ 604, ibatch: 68104, loss: [0;36m0.5408[0m, std: 0.6145
2021-05-13 12:09:49,337 - INFO - Epoch/batch: 15/ 755, ibatch: 68255, loss: [0;36m0.5324[0m, std: 0.6048
2021-05-13 12:10:05,863 - INFO - loss: [0;32m0.5267[0m, std: 0.6006
2021-05-13 12:10:06,110 - INFO - Epoch/batch: 15/ 906, ibatch: 68406, loss: [0;36m0.5455[0m, std: 0.6161
2021-05-13 12:10:12,023 - INFO - Epoch/batch: 15/1057, ibatch: 68557, loss: [0;36m0.5156[0m, std: 0.5900
2021-05-13 12:10:18,313 - INFO - Epoch/batch: 15/1208, ibatch: 68708, loss: [0;36m0.5474[0m, std: 0.6230
2021-05-13 12:10:34,002 - INFO - loss: [0;32m0.5266[0m, std: 0.6093
2021-05-13 12:10:34,377 - INFO - Epoch/ba

Epoch 463: ReduceOnPlateau set learning rate to 7.509466514979728e-05.


2021-05-13 12:11:09,125 - INFO - Epoch/batch: 15/1963, ibatch: 69463, loss: [0;36m0.5200[0m, std: 0.5965
2021-05-13 12:11:15,737 - INFO - Epoch/batch: 15/2114, ibatch: 69614, loss: [0;36m0.5309[0m, std: 0.6096
2021-05-13 12:11:31,889 - INFO - loss: [0;32m0.5268[0m, std: 0.6156
2021-05-13 12:11:32,457 - INFO - Epoch/batch: 15/2265, ibatch: 69765, loss: [0;36m0.5309[0m, std: 0.6141
2021-05-13 12:11:38,630 - INFO - Epoch/batch: 15/2416, ibatch: 69916, loss: [0;36m0.5261[0m, std: 0.6056
2021-05-13 12:11:44,699 - INFO - Epoch/batch: 15/2567, ibatch: 70067, loss: [0;36m0.5218[0m, std: 0.6021
2021-05-13 12:11:59,726 - INFO - loss: [0;32m0.5268[0m, std: 0.6135
2021-05-13 12:12:00,582 - INFO - Epoch/batch: 15/2718, ibatch: 70218, loss: [0;36m0.5306[0m, std: 0.6165
2021-05-13 12:12:06,775 - INFO - Epoch/batch: 15/2869, ibatch: 70369, loss: [0;36m0.5340[0m, std: 0.6135
2021-05-13 12:12:13,029 - INFO - Epoch/batch: 15/3020, ibatch: 70520, loss: [0;36m0.5340[0m, std: 0.6185
2021

Epoch 474: ReduceOnPlateau set learning rate to 6.758519863481756e-05.


2021-05-13 12:12:57,414 - INFO - loss: [0;32m0.5272[0m, std: 0.6177
2021-05-13 12:12:58,404 - INFO - Epoch/batch: 15/3624, ibatch: 71124, loss: [0;36m0.5188[0m, std: 0.6026
2021-05-13 12:13:04,570 - INFO - Epoch/batch: 15/3775, ibatch: 71275, loss: [0;36m0.5355[0m, std: 0.6118
2021-05-13 12:13:10,624 - INFO - Epoch/batch: 15/3926, ibatch: 71426, loss: [0;36m0.5322[0m, std: 0.6063
2021-05-13 12:13:25,722 - INFO - loss: [0;32m0.5272[0m, std: 0.6166
2021-05-13 12:13:26,894 - INFO - Epoch/batch: 15/4077, ibatch: 71577, loss: [0;36m0.5095[0m, std: 0.5894
2021-05-13 12:13:33,034 - INFO - Epoch/batch: 15/4228, ibatch: 71728, loss: [0;36m0.5118[0m, std: 0.5973
2021-05-13 12:13:39,486 - INFO - Epoch/batch: 15/4379, ibatch: 71879, loss: [0;36m0.5391[0m, std: 0.6221
2021-05-13 12:13:54,165 - INFO - loss: [0;32m0.5270[0m, std: 0.6123
2021-05-13 12:13:55,274 - INFO - Epoch 15 average training loss: [0;46m0.5278[0m std: 0.6068
2021-05-13 12:13:55,278 - INFO - Epoch 15 average val

Epoch 485: ReduceOnPlateau set learning rate to 6.0826678771335806e-05.


2021-05-13 12:14:47,610 - INFO - Epoch/batch: 16/ 755, ibatch: 72755, loss: [0;36m0.5327[0m, std: 0.6172
2021-05-13 12:15:03,384 - INFO - loss: [0;32m0.5264[0m, std: 0.6102
2021-05-13 12:15:03,404 - INFO - Saved model states in: earlystop_0.5264
2021-05-13 12:15:03,405 - INFO - Saved net python code: earlystop_0.5264/paddle_nets.py
2021-05-13 12:15:03,415 - INFO - Saved best model: earlystop_0.5264
2021-05-13 12:15:03,416 - INFO - Removing earlystop model: earlystop_0.5265
2021-05-13 12:15:03,666 - INFO - Epoch/batch: 16/ 906, ibatch: 72906, loss: [0;36m0.5323[0m, std: 0.6194
2021-05-13 12:15:10,121 - INFO - Epoch/batch: 16/1057, ibatch: 73057, loss: [0;36m0.5324[0m, std: 0.6079
2021-05-13 12:15:16,444 - INFO - Epoch/batch: 16/1208, ibatch: 73208, loss: [0;36m0.5343[0m, std: 0.6094
2021-05-13 12:15:32,816 - INFO - loss: [0;32m0.5272[0m, std: 0.6179
2021-05-13 12:15:33,247 - INFO - Epoch/batch: 16/1359, ibatch: 73359, loss: [0;36m0.5284[0m, std: 0.5978
2021-05-13 12:15:39,

Epoch 496: ReduceOnPlateau set learning rate to 5.4744010894202224e-05.


2021-05-13 12:16:36,682 - INFO - Epoch/batch: 16/2416, ibatch: 74416, loss: [0;36m0.5329[0m, std: 0.6073
2021-05-13 12:16:42,669 - INFO - Epoch/batch: 16/2567, ibatch: 74567, loss: [0;36m0.5104[0m, std: 0.5913
2021-05-13 12:16:57,783 - INFO - loss: [0;32m0.5273[0m, std: 0.6189
2021-05-13 12:16:58,464 - INFO - Epoch/batch: 16/2718, ibatch: 74718, loss: [0;36m0.5124[0m, std: 0.5946
2021-05-13 12:17:04,470 - INFO - Epoch/batch: 16/2869, ibatch: 74869, loss: [0;36m0.5195[0m, std: 0.6108
2021-05-13 12:17:10,648 - INFO - Epoch/batch: 16/3020, ibatch: 75020, loss: [0;36m0.5395[0m, std: 0.6178
2021-05-13 12:17:25,893 - INFO - loss: [0;32m0.5265[0m, std: 0.6107
2021-05-13 12:17:26,826 - INFO - Epoch/batch: 16/3171, ibatch: 75171, loss: [0;36m0.5350[0m, std: 0.6154
2021-05-13 12:17:32,704 - INFO - Epoch/batch: 16/3322, ibatch: 75322, loss: [0;36m0.5282[0m, std: 0.6066
2021-05-13 12:17:38,780 - INFO - Epoch/batch: 16/3473, ibatch: 75473, loss: [0;36m0.5279[0m, std: 0.6065
2021

Epoch 507: ReduceOnPlateau set learning rate to 4.9269609804782e-05.


2021-05-13 12:18:22,421 - INFO - loss: [0;32m0.5266[0m, std: 0.6096
2021-05-13 12:18:23,644 - INFO - Epoch/batch: 16/4077, ibatch: 76077, loss: [0;36m0.5278[0m, std: 0.6093
2021-05-13 12:18:29,749 - INFO - Epoch/batch: 16/4228, ibatch: 76228, loss: [0;36m0.5316[0m, std: 0.6083
2021-05-13 12:18:35,870 - INFO - Epoch/batch: 16/4379, ibatch: 76379, loss: [0;36m0.5133[0m, std: 0.6013
2021-05-13 12:18:50,355 - INFO - loss: [0;32m0.5267[0m, std: 0.6126
2021-05-13 12:18:51,450 - INFO - Epoch 16 average training loss: [0;46m0.5275[0m std: 0.6069
2021-05-13 12:18:51,455 - INFO - Epoch 16 average validate loss: [0;46m0.5268[0m std: 0.6125
2021-05-13 12:18:53,241 - INFO - Epoch/batch: 17/   0, ibatch: 76500, loss: [0;36m0.5348[0m, std: 0.6073
2021-05-13 12:19:03,452 - INFO - loss: [0;32m0.5267[0m, std: 0.6126
2021-05-13 12:19:09,656 - INFO - Epoch/batch: 17/ 151, ibatch: 76651, loss: [0;36m0.5412[0m, std: 0.6231
2021-05-13 12:19:15,707 - INFO - Epoch/batch: 17/ 302, ibatch: 76

Epoch 518: ReduceOnPlateau set learning rate to 4.43426488243038e-05.


2021-05-13 12:20:11,178 - INFO - Epoch/batch: 17/1208, ibatch: 77708, loss: [0;36m0.5272[0m, std: 0.6045
2021-05-13 12:20:27,155 - INFO - loss: [0;32m0.5273[0m, std: 0.6138
2021-05-13 12:20:27,536 - INFO - Epoch/batch: 17/1359, ibatch: 77859, loss: [0;36m0.5256[0m, std: 0.6047
2021-05-13 12:20:33,445 - INFO - Epoch/batch: 17/1510, ibatch: 78010, loss: [0;36m0.5164[0m, std: 0.5847
2021-05-13 12:20:39,529 - INFO - Epoch/batch: 17/1661, ibatch: 78161, loss: [0;36m0.5238[0m, std: 0.6095
2021-05-13 12:20:54,720 - INFO - loss: [0;32m0.5268[0m, std: 0.6100
2021-05-13 12:20:55,312 - INFO - Epoch/batch: 17/1812, ibatch: 78312, loss: [0;36m0.5321[0m, std: 0.6153
2021-05-13 12:21:01,833 - INFO - Epoch/batch: 17/1963, ibatch: 78463, loss: [0;36m0.5334[0m, std: 0.6141
2021-05-13 12:21:08,242 - INFO - Epoch/batch: 17/2114, ibatch: 78614, loss: [0;36m0.5393[0m, std: 0.6178
2021-05-13 12:21:23,633 - INFO - loss: [0;32m0.5267[0m, std: 0.6092
2021-05-13 12:21:24,271 - INFO - Epoch/ba

Epoch 529: ReduceOnPlateau set learning rate to 3.990838394187342e-05.


2021-05-13 12:21:58,797 - INFO - Epoch/batch: 17/2869, ibatch: 79369, loss: [0;36m0.5292[0m, std: 0.6077
2021-05-13 12:22:04,841 - INFO - Epoch/batch: 17/3020, ibatch: 79520, loss: [0;36m0.5394[0m, std: 0.6178
2021-05-13 12:22:20,122 - INFO - loss: [0;32m0.5268[0m, std: 0.6144
2021-05-13 12:22:21,014 - INFO - Epoch/batch: 17/3171, ibatch: 79671, loss: [0;36m0.5252[0m, std: 0.5975
2021-05-13 12:22:28,070 - INFO - Epoch/batch: 17/3322, ibatch: 79822, loss: [0;36m0.5443[0m, std: 0.6171
2021-05-13 12:22:34,635 - INFO - Epoch/batch: 17/3473, ibatch: 79973, loss: [0;36m0.5155[0m, std: 0.5952
2021-05-13 12:22:49,871 - INFO - loss: [0;32m0.5266[0m, std: 0.6102
2021-05-13 12:22:50,738 - INFO - Epoch/batch: 17/3624, ibatch: 80124, loss: [0;36m0.5310[0m, std: 0.6110
2021-05-13 12:22:56,991 - INFO - Epoch/batch: 17/3775, ibatch: 80275, loss: [0;36m0.5163[0m, std: 0.5962
2021-05-13 12:23:03,891 - INFO - Epoch/batch: 17/3926, ibatch: 80426, loss: [0;36m0.5108[0m, std: 0.5880
2021

Epoch 540: ReduceOnPlateau set learning rate to 3.591754554768608e-05.


2021-05-13 12:23:46,101 - INFO - loss: [0;32m0.5269[0m, std: 0.6148
2021-05-13 12:23:47,208 - INFO - Epoch 17 average training loss: [0;46m0.5273[0m std: 0.6061
2021-05-13 12:23:47,255 - INFO - Epoch 17 average validate loss: [0;46m0.5269[0m std: 0.6122
2021-05-13 12:23:49,130 - INFO - Epoch/batch: 18/   0, ibatch: 81000, loss: [0;36m0.5255[0m, std: 0.6096
2021-05-13 12:23:59,574 - INFO - loss: [0;32m0.5269[0m, std: 0.6147
2021-05-13 12:24:05,992 - INFO - Epoch/batch: 18/ 151, ibatch: 81151, loss: [0;36m0.5405[0m, std: 0.6207
2021-05-13 12:24:12,096 - INFO - Epoch/batch: 18/ 302, ibatch: 81302, loss: [0;36m0.5284[0m, std: 0.6090
2021-05-13 12:24:28,194 - INFO - loss: [0;32m0.5265[0m, std: 0.6094
2021-05-13 12:24:28,297 - INFO - Epoch/batch: 18/ 453, ibatch: 81453, loss: [0;36m0.5179[0m, std: 0.5999
2021-05-13 12:24:34,186 - INFO - Epoch/batch: 18/ 604, ibatch: 81604, loss: [0;36m0.5056[0m, std: 0.5789
2021-05-13 12:24:40,281 - INFO - Epoch/batch: 18/ 755, ibatch: 81

Epoch 551: ReduceOnPlateau set learning rate to 3.232579099291747e-05.


2021-05-13 12:25:36,732 - INFO - Epoch/batch: 18/1661, ibatch: 82661, loss: [0;36m0.5312[0m, std: 0.6174
2021-05-13 12:25:51,722 - INFO - loss: [0;32m0.5270[0m, std: 0.6171
2021-05-13 12:25:52,264 - INFO - Epoch/batch: 18/1812, ibatch: 82812, loss: [0;36m0.5102[0m, std: 0.5861
2021-05-13 12:25:58,559 - INFO - Epoch/batch: 18/1963, ibatch: 82963, loss: [0;36m0.5394[0m, std: 0.6177
2021-05-13 12:26:04,922 - INFO - Epoch/batch: 18/2114, ibatch: 83114, loss: [0;36m0.5352[0m, std: 0.6130
2021-05-13 12:26:21,222 - INFO - loss: [0;32m0.5266[0m, std: 0.6112
2021-05-13 12:26:21,952 - INFO - Epoch/batch: 18/2265, ibatch: 83265, loss: [0;36m0.5347[0m, std: 0.6119
2021-05-13 12:26:28,627 - INFO - Epoch/batch: 18/2416, ibatch: 83416, loss: [0;36m0.5216[0m, std: 0.5965
2021-05-13 12:26:35,214 - INFO - Epoch/batch: 18/2567, ibatch: 83567, loss: [0;36m0.5506[0m, std: 0.6319
2021-05-13 12:26:50,215 - INFO - loss: [0;32m0.5265[0m, std: 0.6105
2021-05-13 12:26:51,128 - INFO - Epoch/ba

Epoch 562: ReduceOnPlateau set learning rate to 2.9093211893625727e-05.


2021-05-13 12:27:27,872 - INFO - Epoch/batch: 18/3322, ibatch: 84322, loss: [0;36m0.5324[0m, std: 0.6140
2021-05-13 12:27:34,304 - INFO - Epoch/batch: 18/3473, ibatch: 84473, loss: [0;36m0.5337[0m, std: 0.6124
2021-05-13 12:27:49,303 - INFO - loss: [0;32m0.5266[0m, std: 0.6103
2021-05-13 12:27:50,337 - INFO - Epoch/batch: 18/3624, ibatch: 84624, loss: [0;36m0.5288[0m, std: 0.6045
2021-05-13 12:27:56,604 - INFO - Epoch/batch: 18/3775, ibatch: 84775, loss: [0;36m0.5180[0m, std: 0.5981
2021-05-13 12:28:02,411 - INFO - Epoch/batch: 18/3926, ibatch: 84926, loss: [0;36m0.5175[0m, std: 0.6011
2021-05-13 12:28:17,625 - INFO - loss: [0;32m0.5266[0m, std: 0.6098
2021-05-13 12:28:18,845 - INFO - Epoch/batch: 18/4077, ibatch: 85077, loss: [0;36m0.5274[0m, std: 0.6062
2021-05-13 12:28:25,226 - INFO - Epoch/batch: 18/4228, ibatch: 85228, loss: [0;36m0.5396[0m, std: 0.6283
2021-05-13 12:28:31,749 - INFO - Epoch/batch: 18/4379, ibatch: 85379, loss: [0;36m0.5342[0m, std: 0.6067
2021

Epoch 573: ReduceOnPlateau set learning rate to 2.6183890704263157e-05.


2021-05-13 12:29:28,018 - INFO - loss: [0;32m0.5268[0m, std: 0.6124
2021-05-13 12:29:28,133 - INFO - Epoch/batch: 19/ 453, ibatch: 85953, loss: [0;36m0.5153[0m, std: 0.5848
2021-05-13 12:29:34,508 - INFO - Epoch/batch: 19/ 604, ibatch: 86104, loss: [0;36m0.5241[0m, std: 0.5981
2021-05-13 12:29:40,621 - INFO - Epoch/batch: 19/ 755, ibatch: 86255, loss: [0;36m0.5357[0m, std: 0.6073
2021-05-13 12:29:56,704 - INFO - loss: [0;32m0.5272[0m, std: 0.6182
2021-05-13 12:29:57,018 - INFO - Epoch/batch: 19/ 906, ibatch: 86406, loss: [0;36m0.5058[0m, std: 0.5889
2021-05-13 12:30:03,148 - INFO - Epoch/batch: 19/1057, ibatch: 86557, loss: [0;36m0.5175[0m, std: 0.6020
2021-05-13 12:30:09,376 - INFO - Epoch/batch: 19/1208, ibatch: 86708, loss: [0;36m0.5371[0m, std: 0.6182
2021-05-13 12:30:25,117 - INFO - loss: [0;32m0.5270[0m, std: 0.6147
2021-05-13 12:30:25,451 - INFO - Epoch/batch: 19/1359, ibatch: 86859, loss: [0;36m0.5246[0m, std: 0.6021
2021-05-13 12:30:31,921 - INFO - Epoch/ba

Epoch 584: ReduceOnPlateau set learning rate to 2.356550163383684e-05.


2021-05-13 12:31:06,092 - INFO - Epoch/batch: 19/2114, ibatch: 87614, loss: [0;36m0.5177[0m, std: 0.5887
2021-05-13 12:31:21,599 - INFO - loss: [0;32m0.5267[0m, std: 0.6118
2021-05-13 12:31:22,237 - INFO - Epoch/batch: 19/2265, ibatch: 87765, loss: [0;36m0.5352[0m, std: 0.6181
2021-05-13 12:31:29,590 - INFO - Epoch/batch: 19/2416, ibatch: 87916, loss: [0;36m0.5398[0m, std: 0.6194
2021-05-13 12:31:35,812 - INFO - Epoch/batch: 19/2567, ibatch: 88067, loss: [0;36m0.5237[0m, std: 0.5931
2021-05-13 12:31:50,617 - INFO - loss: [0;32m0.5268[0m, std: 0.6140
2021-05-13 12:31:51,407 - INFO - Epoch/batch: 19/2718, ibatch: 88218, loss: [0;36m0.5113[0m, std: 0.5903
2021-05-13 12:31:57,650 - INFO - Epoch/batch: 19/2869, ibatch: 88369, loss: [0;36m0.5301[0m, std: 0.6069
2021-05-13 12:32:03,942 - INFO - Epoch/batch: 19/3020, ibatch: 88520, loss: [0;36m0.5157[0m, std: 0.6030
2021-05-13 12:32:19,726 - INFO - loss: [0;32m0.5267[0m, std: 0.6118
2021-05-13 12:32:20,634 - INFO - Epoch/ba

Epoch 595: ReduceOnPlateau set learning rate to 2.1208951470453157e-05.


2021-05-13 12:32:55,713 - INFO - Epoch/batch: 19/3775, ibatch: 89275, loss: [0;36m0.5182[0m, std: 0.5927
2021-05-13 12:33:02,381 - INFO - Epoch/batch: 19/3926, ibatch: 89426, loss: [0;36m0.5198[0m, std: 0.5986
2021-05-13 12:33:17,754 - INFO - loss: [0;32m0.5269[0m, std: 0.6148
2021-05-13 12:33:18,889 - INFO - Epoch/batch: 19/4077, ibatch: 89577, loss: [0;36m0.5255[0m, std: 0.6075
2021-05-13 12:33:25,096 - INFO - Epoch/batch: 19/4228, ibatch: 89728, loss: [0;36m0.5392[0m, std: 0.6194
2021-05-13 12:33:31,138 - INFO - Epoch/batch: 19/4379, ibatch: 89879, loss: [0;36m0.5352[0m, std: 0.6117
2021-05-13 12:33:45,614 - INFO - loss: [0;32m0.5267[0m, std: 0.6128
2021-05-13 12:33:46,752 - INFO - Epoch 19 average training loss: [0;46m0.5273[0m std: 0.6064
2021-05-13 12:33:46,757 - INFO - Epoch 19 average validate loss: [0;46m0.5268[0m std: 0.6130
2021-05-13 12:33:48,498 - INFO - Epoch/batch: 20/   0, ibatch: 90000, loss: [0;36m0.5184[0m, std: 0.6033
2021-05-13 12:33:58,679 - IN

Epoch 606: ReduceOnPlateau set learning rate to 1.9088056323407842e-05.


2021-05-13 12:34:55,416 - INFO - loss: [0;32m0.5272[0m, std: 0.6176
2021-05-13 12:34:55,671 - INFO - Epoch/batch: 20/ 906, ibatch: 90906, loss: [0;36m0.5170[0m, std: 0.5945
2021-05-13 12:35:02,121 - INFO - Epoch/batch: 20/1057, ibatch: 91057, loss: [0;36m0.5300[0m, std: 0.6093
2021-05-13 12:35:08,647 - INFO - Epoch/batch: 20/1208, ibatch: 91208, loss: [0;36m0.5092[0m, std: 0.5901
2021-05-13 12:35:24,327 - INFO - loss: [0;32m0.5272[0m, std: 0.6179
2021-05-13 12:35:24,727 - INFO - Epoch/batch: 20/1359, ibatch: 91359, loss: [0;36m0.5227[0m, std: 0.6010
2021-05-13 12:35:30,948 - INFO - Epoch/batch: 20/1510, ibatch: 91510, loss: [0;36m0.5485[0m, std: 0.6408
2021-05-13 12:35:36,877 - INFO - Epoch/batch: 20/1661, ibatch: 91661, loss: [0;36m0.5230[0m, std: 0.6096
2021-05-13 12:35:52,153 - INFO - loss: [0;32m0.5267[0m, std: 0.6120
2021-05-13 12:35:52,644 - INFO - Epoch/batch: 20/1812, ibatch: 91812, loss: [0;36m0.5288[0m, std: 0.6013
2021-05-13 12:35:58,827 - INFO - Epoch/ba

Epoch 617: ReduceOnPlateau set learning rate to 1.717925069106706e-05.


2021-05-13 12:36:33,773 - INFO - Epoch/batch: 20/2567, ibatch: 92567, loss: [0;36m0.5118[0m, std: 0.5949
2021-05-13 12:36:48,851 - INFO - loss: [0;32m0.5266[0m, std: 0.6117
2021-05-13 12:36:49,588 - INFO - Epoch/batch: 20/2718, ibatch: 92718, loss: [0;36m0.5361[0m, std: 0.6191
2021-05-13 12:36:55,771 - INFO - Epoch/batch: 20/2869, ibatch: 92869, loss: [0;36m0.5348[0m, std: 0.6114
2021-05-13 12:37:01,712 - INFO - Epoch/batch: 20/3020, ibatch: 93020, loss: [0;36m0.5353[0m, std: 0.6062
2021-05-13 12:37:17,022 - INFO - loss: [0;32m0.5265[0m, std: 0.6087
2021-05-13 12:37:17,916 - INFO - Epoch/batch: 20/3171, ibatch: 93171, loss: [0;36m0.5280[0m, std: 0.6126
2021-05-13 12:37:23,763 - INFO - Epoch/batch: 20/3322, ibatch: 93322, loss: [0;36m0.5355[0m, std: 0.6133
2021-05-13 12:37:29,869 - INFO - Epoch/batch: 20/3473, ibatch: 93473, loss: [0;36m0.5235[0m, std: 0.5930
2021-05-13 12:37:45,210 - INFO - loss: [0;32m0.5266[0m, std: 0.6110
2021-05-13 12:37:46,192 - INFO - Epoch/ba

Epoch 628: ReduceOnPlateau set learning rate to 1.5461325621960354e-05.


2021-05-13 12:38:21,114 - INFO - Epoch/batch: 20/4228, ibatch: 94228, loss: [0;36m0.5238[0m, std: 0.6005
2021-05-13 12:38:27,253 - INFO - Epoch/batch: 20/4379, ibatch: 94379, loss: [0;36m0.5276[0m, std: 0.6061
2021-05-13 12:38:41,611 - INFO - loss: [0;32m0.5268[0m, std: 0.6126
2021-05-13 12:38:42,732 - INFO - Epoch 20 average training loss: [0;46m0.5270[0m std: 0.6064
2021-05-13 12:38:42,752 - INFO - Epoch 20 average validate loss: [0;46m0.5268[0m std: 0.6127


In [7]:
# 读取最后一个checkpoint目录 (忽略优化器state_dict读取错误)
# read the last saved earlystop directory （ignore the error in optimizer state_dict loading)
fp.state_dict_load(model, model.validate_hist.saved_dirs[-1])
# 可以改动损失函数，检测mse损失（单个模型最好的结果： ～0.20）
# the loss_fn can be changed to softmax+mse to check the mse loss.
# (the best/lowest loss obtained from a single model was ~0.20)
args.loss_fn = ['softmax+mse']
model.loss_fn = fp.get_loss_fn(args)
valid_loss = fp.validate(model, valid_data, verbose=1, batch_size=64) # try a larger batch_size, should make no difference though

2021-05-13 12:57:33,231 - INFO - Loading model states from: earlystop_0.5264
2021-05-13 12:57:33,565 - INFO - Loaded net state: earlystop_0.5264/net.state
2021-05-13 12:57:33,571 - INFO - Getting loss function: ['softmax+mse']
2021-05-13 12:57:33,573 - INFO - Validating, data size: 500
2021-05-13 12:57:33,573 - INFO -            batch size: 64
2021-05-13 12:57:33,574 - INFO -               shuffle: False
2021-05-13 12:57:33,574 - INFO -          # of batches: 8
2021-05-13 12:57:33,575 - INFO -        recap interval: 1
2021-05-13 12:57:33,575 - INFO -          loss padding: False
2021-05-13 12:57:36,627 - INFO - ibatch:    0, loss: 0.2021, std: 0.2891
2021-05-13 12:57:37,426 - INFO - ibatch:    1, loss: 0.2071, std: 0.3006
2021-05-13 12:57:38,265 - INFO - ibatch:    2, loss: 0.1919, std: 0.2813
2021-05-13 12:57:39,052 - INFO - ibatch:    3, loss: 0.2279, std: 0.3225
2021-05-13 12:57:39,839 - INFO - ibatch:    4, loss: 0.2210, std: 0.3141
2021-05-13 12:57:40,624 - INFO - ibatch:    5, lo

In [None]:
# 读取预测数据， 存储预测结果
# 提交的结果是平均了三次运行最好checkpoint存储， 它们分别得到了0.24, 0.24, 0.242的sqrt(mse)损失， 平均后得到了0.238
# 可惜前两次的checkpoint没有被保存， 只保存了预测的结果.
# 虽然是同一个网络架构， 因为每次训练的train_test_split是随机的， 模型平均的效果和cross_validate相近
# Read in prediction data, and save the predicted results
# The submitted results are the average of the best checkpoints/earlystops from three independent trainings.
# Unfortunately the checkpoints for the first two were not kept, except for the predicted results。
# As each training randomly splits the train and validation data, this model averaging approximates
# the effect/benefit of cross-validation.
predict_data = fp.get_midata(args, data_name='predict', seq_length=-1)
y_model, std_model = fp.predict(model, predict_data, save_dir='predict.files', batch_size=1)

2021-05-13 12:57:54,373 - INFO - Loading data: work/data/predict.pkl
2021-05-13 12:57:55,946 - INFO -    # of data: 112,  max seqlen: 861, user seq_length: -1
2021-05-13 12:57:55,947 - INFO -  residue fmt: vector, nn: 0, dbn: True, attr: False, genre: upp
2021-05-13 12:57:58,986 - INFO - Predicting, data size: 112
2021-05-13 12:57:58,989 - INFO -            batch size: 1
2021-05-13 12:57:58,990 - INFO -               shuffle: False
2021-05-13 12:57:58,990 - INFO -          # of batches: 112
2021-05-13 12:57:58,991 - INFO -        recap interval: 4
2021-05-13 12:57:58,992 - INFO - Predicted files will be saved in: predict.files
  0%|          | 0/112 [00:00<?, ?it/s]