-
Notifications
You must be signed in to change notification settings - Fork 154
/
Copy pathfinetune_squad1.1_base_mx1.6.0rc1.log
309 lines (309 loc) · 35.2 KB
/
finetune_squad1.1_base_mx1.6.0rc1.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
INFO:gluonnlp:16:37:28 Namespace(accumulate=None, batch_size=12, bert_dataset='book_corpus_wiki_en_uncased', bert_model='bert_12_768_12', debug=False, doc_stride=128, epochs=2, gpu=0, load_feature_from_pickle=False, log_interval=50, lr=3e-05, max_answer_length=30, max_query_length=64, max_seq_length=384, model_parameters=None, n_best_size=20, null_score_diff_threshold=0.0, only_predict=False, optimizer='adam', output_dir='./output_dir', pretrained_bert_parameters=None, sentencepiece=None, test_batch_size=24, training_steps=None, uncased=True, version_2=False, warmup_ratio=0.1)
INFO:gluonnlp:16:37:35 Loading train data...
INFO:gluonnlp:16:37:36 Number of records in Train data:87599
INFO:gluonnlp:16:38:17 The number of examples after preprocessing:88641
INFO:gluonnlp:16:38:18 Start Training
INFO:gluonnlp:16:38:39 Epoch: 0, Batch: 49/7387, Loss=5.6445, lr=0.0000010 Time cost=21.1 Thoughput=28.48 samples/s
INFO:gluonnlp:16:38:58 Epoch: 0, Batch: 99/7387, Loss=5.4744, lr=0.0000020 Time cost=19.1 Thoughput=31.46 samples/s
INFO:gluonnlp:16:39:17 Epoch: 0, Batch: 149/7387, Loss=5.2426, lr=0.0000030 Time cost=19.4 Thoughput=30.92 samples/s
INFO:gluonnlp:16:39:38 Epoch: 0, Batch: 199/7387, Loss=4.9389, lr=0.0000041 Time cost=20.9 Thoughput=28.78 samples/s
INFO:gluonnlp:16:39:58 Epoch: 0, Batch: 249/7387, Loss=4.5655, lr=0.0000051 Time cost=19.6 Thoughput=30.67 samples/s
INFO:gluonnlp:16:40:18 Epoch: 0, Batch: 299/7387, Loss=4.2618, lr=0.0000061 Time cost=20.4 Thoughput=29.47 samples/s
INFO:gluonnlp:16:40:37 Epoch: 0, Batch: 349/7387, Loss=3.8745, lr=0.0000071 Time cost=19.4 Thoughput=30.98 samples/s
INFO:gluonnlp:16:40:58 Epoch: 0, Batch: 399/7387, Loss=3.3431, lr=0.0000081 Time cost=20.4 Thoughput=29.46 samples/s
INFO:gluonnlp:16:41:18 Epoch: 0, Batch: 449/7387, Loss=3.1488, lr=0.0000091 Time cost=20.1 Thoughput=29.81 samples/s
INFO:gluonnlp:16:41:37 Epoch: 0, Batch: 499/7387, Loss=2.8444, lr=0.0000102 Time cost=19.6 Thoughput=30.66 samples/s
INFO:gluonnlp:16:41:59 Epoch: 0, Batch: 549/7387, Loss=2.7493, lr=0.0000112 Time cost=21.2 Thoughput=28.34 samples/s
INFO:gluonnlp:16:42:18 Epoch: 0, Batch: 599/7387, Loss=2.5771, lr=0.0000122 Time cost=19.7 Thoughput=30.53 samples/s
INFO:gluonnlp:16:42:38 Epoch: 0, Batch: 649/7387, Loss=2.3768, lr=0.0000132 Time cost=19.9 Thoughput=30.11 samples/s
INFO:gluonnlp:16:42:57 Epoch: 0, Batch: 699/7387, Loss=2.1559, lr=0.0000142 Time cost=18.8 Thoughput=31.95 samples/s
INFO:gluonnlp:16:43:17 Epoch: 0, Batch: 749/7387, Loss=2.0994, lr=0.0000152 Time cost=20.0 Thoughput=30.04 samples/s
INFO:gluonnlp:16:43:36 Epoch: 0, Batch: 799/7387, Loss=1.9900, lr=0.0000162 Time cost=19.3 Thoughput=31.03 samples/s
INFO:gluonnlp:16:43:55 Epoch: 0, Batch: 849/7387, Loss=2.0263, lr=0.0000173 Time cost=18.4 Thoughput=32.68 samples/s
INFO:gluonnlp:16:44:15 Epoch: 0, Batch: 899/7387, Loss=1.9718, lr=0.0000183 Time cost=20.4 Thoughput=29.37 samples/s
INFO:gluonnlp:16:44:35 Epoch: 0, Batch: 949/7387, Loss=1.9293, lr=0.0000193 Time cost=19.8 Thoughput=30.28 samples/s
INFO:gluonnlp:16:44:55 Epoch: 0, Batch: 999/7387, Loss=1.7776, lr=0.0000203 Time cost=20.2 Thoughput=29.67 samples/s
INFO:gluonnlp:16:45:14 Epoch: 0, Batch: 1049/7387, Loss=1.8060, lr=0.0000213 Time cost=19.1 Thoughput=31.49 samples/s
INFO:gluonnlp:16:45:34 Epoch: 0, Batch: 1099/7387, Loss=1.7341, lr=0.0000223 Time cost=20.2 Thoughput=29.73 samples/s
INFO:gluonnlp:16:45:54 Epoch: 0, Batch: 1149/7387, Loss=1.6819, lr=0.0000234 Time cost=19.5 Thoughput=30.82 samples/s
INFO:gluonnlp:16:46:14 Epoch: 0, Batch: 1199/7387, Loss=1.7083, lr=0.0000244 Time cost=20.1 Thoughput=29.90 samples/s
INFO:gluonnlp:16:46:34 Epoch: 0, Batch: 1249/7387, Loss=1.7227, lr=0.0000254 Time cost=19.6 Thoughput=30.57 samples/s
INFO:gluonnlp:16:46:52 Epoch: 0, Batch: 1299/7387, Loss=1.5141, lr=0.0000264 Time cost=18.8 Thoughput=32.00 samples/s
INFO:gluonnlp:16:47:12 Epoch: 0, Batch: 1349/7387, Loss=1.6368, lr=0.0000274 Time cost=19.6 Thoughput=30.54 samples/s
INFO:gluonnlp:16:47:30 Epoch: 0, Batch: 1399/7387, Loss=1.5311, lr=0.0000284 Time cost=18.4 Thoughput=32.66 samples/s
INFO:gluonnlp:16:47:51 Epoch: 0, Batch: 1449/7387, Loss=1.5743, lr=0.0000295 Time cost=20.4 Thoughput=29.46 samples/s
INFO:gluonnlp:16:48:10 Epoch: 0, Batch: 1499/7387, Loss=1.5064, lr=0.0000299 Time cost=19.4 Thoughput=30.88 samples/s
INFO:gluonnlp:16:48:30 Epoch: 0, Batch: 1549/7387, Loss=1.5756, lr=0.0000298 Time cost=20.1 Thoughput=29.81 samples/s
INFO:gluonnlp:16:48:50 Epoch: 0, Batch: 1599/7387, Loss=1.4438, lr=0.0000297 Time cost=20.0 Thoughput=30.06 samples/s
INFO:gluonnlp:16:49:10 Epoch: 0, Batch: 1649/7387, Loss=1.4909, lr=0.0000296 Time cost=19.5 Thoughput=30.84 samples/s
INFO:gluonnlp:16:49:29 Epoch: 0, Batch: 1699/7387, Loss=1.4661, lr=0.0000295 Time cost=19.5 Thoughput=30.76 samples/s
INFO:gluonnlp:16:49:49 Epoch: 0, Batch: 1749/7387, Loss=1.3533, lr=0.0000294 Time cost=20.2 Thoughput=29.70 samples/s
INFO:gluonnlp:16:50:10 Epoch: 0, Batch: 1799/7387, Loss=1.3865, lr=0.0000293 Time cost=20.2 Thoughput=29.68 samples/s
INFO:gluonnlp:16:50:29 Epoch: 0, Batch: 1849/7387, Loss=1.3464, lr=0.0000292 Time cost=19.4 Thoughput=30.95 samples/s
INFO:gluonnlp:16:50:48 Epoch: 0, Batch: 1899/7387, Loss=1.3781, lr=0.0000290 Time cost=19.2 Thoughput=31.30 samples/s
INFO:gluonnlp:16:51:08 Epoch: 0, Batch: 1949/7387, Loss=1.4296, lr=0.0000289 Time cost=19.8 Thoughput=30.32 samples/s
INFO:gluonnlp:16:51:27 Epoch: 0, Batch: 1999/7387, Loss=1.3216, lr=0.0000288 Time cost=19.6 Thoughput=30.65 samples/s
INFO:gluonnlp:16:51:47 Epoch: 0, Batch: 2049/7387, Loss=1.3834, lr=0.0000287 Time cost=19.7 Thoughput=30.52 samples/s
INFO:gluonnlp:16:52:06 Epoch: 0, Batch: 2099/7387, Loss=1.4313, lr=0.0000286 Time cost=18.9 Thoughput=31.70 samples/s
INFO:gluonnlp:16:52:26 Epoch: 0, Batch: 2149/7387, Loss=1.4777, lr=0.0000285 Time cost=20.4 Thoughput=29.43 samples/s
INFO:gluonnlp:16:52:46 Epoch: 0, Batch: 2199/7387, Loss=1.3184, lr=0.0000284 Time cost=19.4 Thoughput=30.87 samples/s
INFO:gluonnlp:16:53:06 Epoch: 0, Batch: 2249/7387, Loss=1.2972, lr=0.0000283 Time cost=20.0 Thoughput=29.95 samples/s
INFO:gluonnlp:16:53:25 Epoch: 0, Batch: 2299/7387, Loss=1.4108, lr=0.0000281 Time cost=19.5 Thoughput=30.71 samples/s
INFO:gluonnlp:16:53:45 Epoch: 0, Batch: 2349/7387, Loss=1.3219, lr=0.0000280 Time cost=19.2 Thoughput=31.30 samples/s
INFO:gluonnlp:16:54:05 Epoch: 0, Batch: 2399/7387, Loss=1.4035, lr=0.0000279 Time cost=20.1 Thoughput=29.87 samples/s
INFO:gluonnlp:16:54:24 Epoch: 0, Batch: 2449/7387, Loss=1.3281, lr=0.0000278 Time cost=19.4 Thoughput=30.88 samples/s
INFO:gluonnlp:16:54:46 Epoch: 0, Batch: 2499/7387, Loss=1.2082, lr=0.0000277 Time cost=22.2 Thoughput=27.06 samples/s
INFO:gluonnlp:16:55:07 Epoch: 0, Batch: 2549/7387, Loss=1.3549, lr=0.0000276 Time cost=20.5 Thoughput=29.25 samples/s
INFO:gluonnlp:16:55:26 Epoch: 0, Batch: 2599/7387, Loss=1.2542, lr=0.0000275 Time cost=19.4 Thoughput=30.97 samples/s
INFO:gluonnlp:16:55:46 Epoch: 0, Batch: 2649/7387, Loss=1.2245, lr=0.0000274 Time cost=20.1 Thoughput=29.80 samples/s
INFO:gluonnlp:16:56:07 Epoch: 0, Batch: 2699/7387, Loss=1.3127, lr=0.0000272 Time cost=20.7 Thoughput=28.93 samples/s
INFO:gluonnlp:16:56:26 Epoch: 0, Batch: 2749/7387, Loss=1.2607, lr=0.0000271 Time cost=18.9 Thoughput=31.70 samples/s
INFO:gluonnlp:16:56:45 Epoch: 0, Batch: 2799/7387, Loss=1.2425, lr=0.0000270 Time cost=18.5 Thoughput=32.35 samples/s
INFO:gluonnlp:16:57:05 Epoch: 0, Batch: 2849/7387, Loss=1.1845, lr=0.0000269 Time cost=20.4 Thoughput=29.41 samples/s
INFO:gluonnlp:16:57:24 Epoch: 0, Batch: 2899/7387, Loss=1.3811, lr=0.0000268 Time cost=19.5 Thoughput=30.73 samples/s
INFO:gluonnlp:16:57:45 Epoch: 0, Batch: 2949/7387, Loss=1.2267, lr=0.0000267 Time cost=20.9 Thoughput=28.65 samples/s
INFO:gluonnlp:16:58:06 Epoch: 0, Batch: 2999/7387, Loss=1.3173, lr=0.0000266 Time cost=20.8 Thoughput=28.87 samples/s
INFO:gluonnlp:16:58:26 Epoch: 0, Batch: 3049/7387, Loss=1.2148, lr=0.0000265 Time cost=20.1 Thoughput=29.84 samples/s
INFO:gluonnlp:16:58:46 Epoch: 0, Batch: 3099/7387, Loss=1.2363, lr=0.0000263 Time cost=19.5 Thoughput=30.79 samples/s
INFO:gluonnlp:16:59:06 Epoch: 0, Batch: 3149/7387, Loss=1.2641, lr=0.0000262 Time cost=19.8 Thoughput=30.38 samples/s
INFO:gluonnlp:16:59:26 Epoch: 0, Batch: 3199/7387, Loss=1.2118, lr=0.0000261 Time cost=20.2 Thoughput=29.77 samples/s
INFO:gluonnlp:16:59:45 Epoch: 0, Batch: 3249/7387, Loss=1.2306, lr=0.0000260 Time cost=18.9 Thoughput=31.80 samples/s
INFO:gluonnlp:17:00:04 Epoch: 0, Batch: 3299/7387, Loss=1.2147, lr=0.0000259 Time cost=19.6 Thoughput=30.62 samples/s
INFO:gluonnlp:17:00:23 Epoch: 0, Batch: 3349/7387, Loss=1.1165, lr=0.0000258 Time cost=19.0 Thoughput=31.60 samples/s
INFO:gluonnlp:17:00:43 Epoch: 0, Batch: 3399/7387, Loss=1.2025, lr=0.0000257 Time cost=19.7 Thoughput=30.50 samples/s
INFO:gluonnlp:17:01:03 Epoch: 0, Batch: 3449/7387, Loss=1.2294, lr=0.0000255 Time cost=19.8 Thoughput=30.23 samples/s
INFO:gluonnlp:17:01:22 Epoch: 0, Batch: 3499/7387, Loss=1.2234, lr=0.0000254 Time cost=19.4 Thoughput=30.86 samples/s
INFO:gluonnlp:17:01:41 Epoch: 0, Batch: 3549/7387, Loss=1.1593, lr=0.0000253 Time cost=18.6 Thoughput=32.31 samples/s
INFO:gluonnlp:17:01:59 Epoch: 0, Batch: 3599/7387, Loss=1.2073, lr=0.0000252 Time cost=18.2 Thoughput=32.91 samples/s
INFO:gluonnlp:17:02:18 Epoch: 0, Batch: 3649/7387, Loss=1.2921, lr=0.0000251 Time cost=19.0 Thoughput=31.63 samples/s
INFO:gluonnlp:17:02:39 Epoch: 0, Batch: 3699/7387, Loss=1.2404, lr=0.0000250 Time cost=21.2 Thoughput=28.33 samples/s
INFO:gluonnlp:17:02:58 Epoch: 0, Batch: 3749/7387, Loss=1.1849, lr=0.0000249 Time cost=19.2 Thoughput=31.26 samples/s
INFO:gluonnlp:17:03:18 Epoch: 0, Batch: 3799/7387, Loss=1.1510, lr=0.0000248 Time cost=19.9 Thoughput=30.14 samples/s
INFO:gluonnlp:17:03:38 Epoch: 0, Batch: 3849/7387, Loss=1.2296, lr=0.0000246 Time cost=19.4 Thoughput=30.89 samples/s
INFO:gluonnlp:17:03:57 Epoch: 0, Batch: 3899/7387, Loss=1.2310, lr=0.0000245 Time cost=19.3 Thoughput=31.13 samples/s
INFO:gluonnlp:17:04:16 Epoch: 0, Batch: 3949/7387, Loss=1.2833, lr=0.0000244 Time cost=19.4 Thoughput=30.96 samples/s
INFO:gluonnlp:17:04:36 Epoch: 0, Batch: 3999/7387, Loss=1.1703, lr=0.0000243 Time cost=19.3 Thoughput=31.07 samples/s
INFO:gluonnlp:17:04:55 Epoch: 0, Batch: 4049/7387, Loss=1.1476, lr=0.0000242 Time cost=19.6 Thoughput=30.54 samples/s
INFO:gluonnlp:17:05:15 Epoch: 0, Batch: 4099/7387, Loss=1.1410, lr=0.0000241 Time cost=19.5 Thoughput=30.84 samples/s
INFO:gluonnlp:17:05:34 Epoch: 0, Batch: 4149/7387, Loss=1.1468, lr=0.0000240 Time cost=19.3 Thoughput=31.07 samples/s
INFO:gluonnlp:17:05:53 Epoch: 0, Batch: 4199/7387, Loss=1.1367, lr=0.0000239 Time cost=18.8 Thoughput=31.96 samples/s
INFO:gluonnlp:17:06:12 Epoch: 0, Batch: 4249/7387, Loss=1.2364, lr=0.0000237 Time cost=19.4 Thoughput=30.98 samples/s
INFO:gluonnlp:17:06:32 Epoch: 0, Batch: 4299/7387, Loss=1.1918, lr=0.0000236 Time cost=19.9 Thoughput=30.19 samples/s
INFO:gluonnlp:17:06:52 Epoch: 0, Batch: 4349/7387, Loss=1.3233, lr=0.0000235 Time cost=19.7 Thoughput=30.48 samples/s
INFO:gluonnlp:17:07:11 Epoch: 0, Batch: 4399/7387, Loss=1.1882, lr=0.0000234 Time cost=19.7 Thoughput=30.52 samples/s
INFO:gluonnlp:17:07:31 Epoch: 0, Batch: 4449/7387, Loss=1.1582, lr=0.0000233 Time cost=20.0 Thoughput=30.07 samples/s
INFO:gluonnlp:17:07:51 Epoch: 0, Batch: 4499/7387, Loss=1.0041, lr=0.0000232 Time cost=20.1 Thoughput=29.90 samples/s
INFO:gluonnlp:17:08:12 Epoch: 0, Batch: 4549/7387, Loss=1.1503, lr=0.0000231 Time cost=20.8 Thoughput=28.86 samples/s
INFO:gluonnlp:17:08:34 Epoch: 0, Batch: 4599/7387, Loss=1.1471, lr=0.0000230 Time cost=21.5 Thoughput=27.88 samples/s
INFO:gluonnlp:17:08:54 Epoch: 0, Batch: 4649/7387, Loss=1.1902, lr=0.0000228 Time cost=20.5 Thoughput=29.27 samples/s
INFO:gluonnlp:17:09:15 Epoch: 0, Batch: 4699/7387, Loss=1.0659, lr=0.0000227 Time cost=20.6 Thoughput=29.09 samples/s
INFO:gluonnlp:17:09:35 Epoch: 0, Batch: 4749/7387, Loss=1.1564, lr=0.0000226 Time cost=20.3 Thoughput=29.62 samples/s
INFO:gluonnlp:17:09:55 Epoch: 0, Batch: 4799/7387, Loss=1.0901, lr=0.0000225 Time cost=20.4 Thoughput=29.48 samples/s
INFO:gluonnlp:17:10:16 Epoch: 0, Batch: 4849/7387, Loss=1.2246, lr=0.0000224 Time cost=20.8 Thoughput=28.80 samples/s
INFO:gluonnlp:17:10:37 Epoch: 0, Batch: 4899/7387, Loss=1.0877, lr=0.0000223 Time cost=20.7 Thoughput=29.02 samples/s
INFO:gluonnlp:17:10:57 Epoch: 0, Batch: 4949/7387, Loss=1.1942, lr=0.0000222 Time cost=19.6 Thoughput=30.61 samples/s
INFO:gluonnlp:17:11:17 Epoch: 0, Batch: 4999/7387, Loss=1.1684, lr=0.0000221 Time cost=20.7 Thoughput=28.93 samples/s
INFO:gluonnlp:17:11:37 Epoch: 0, Batch: 5049/7387, Loss=1.1071, lr=0.0000219 Time cost=19.4 Thoughput=30.94 samples/s
INFO:gluonnlp:17:11:56 Epoch: 0, Batch: 5099/7387, Loss=1.0725, lr=0.0000218 Time cost=19.2 Thoughput=31.24 samples/s
INFO:gluonnlp:17:12:15 Epoch: 0, Batch: 5149/7387, Loss=1.1496, lr=0.0000217 Time cost=18.7 Thoughput=32.06 samples/s
INFO:gluonnlp:17:12:34 Epoch: 0, Batch: 5199/7387, Loss=1.1098, lr=0.0000216 Time cost=19.8 Thoughput=30.33 samples/s
INFO:gluonnlp:17:12:54 Epoch: 0, Batch: 5249/7387, Loss=1.0491, lr=0.0000215 Time cost=19.6 Thoughput=30.60 samples/s
INFO:gluonnlp:17:13:12 Epoch: 0, Batch: 5299/7387, Loss=1.1495, lr=0.0000214 Time cost=18.4 Thoughput=32.60 samples/s
INFO:gluonnlp:17:13:32 Epoch: 0, Batch: 5349/7387, Loss=1.1494, lr=0.0000213 Time cost=19.4 Thoughput=30.93 samples/s
INFO:gluonnlp:17:13:52 Epoch: 0, Batch: 5399/7387, Loss=1.0467, lr=0.0000211 Time cost=20.3 Thoughput=29.61 samples/s
INFO:gluonnlp:17:14:12 Epoch: 0, Batch: 5449/7387, Loss=1.1077, lr=0.0000210 Time cost=19.5 Thoughput=30.78 samples/s
INFO:gluonnlp:17:14:31 Epoch: 0, Batch: 5499/7387, Loss=1.1237, lr=0.0000209 Time cost=19.0 Thoughput=31.57 samples/s
INFO:gluonnlp:17:14:50 Epoch: 0, Batch: 5549/7387, Loss=1.1175, lr=0.0000208 Time cost=19.6 Thoughput=30.60 samples/s
INFO:gluonnlp:17:15:09 Epoch: 0, Batch: 5599/7387, Loss=1.2176, lr=0.0000207 Time cost=18.9 Thoughput=31.69 samples/s
INFO:gluonnlp:17:15:28 Epoch: 0, Batch: 5649/7387, Loss=1.0332, lr=0.0000206 Time cost=18.6 Thoughput=32.33 samples/s
INFO:gluonnlp:17:15:48 Epoch: 0, Batch: 5699/7387, Loss=1.1217, lr=0.0000205 Time cost=20.3 Thoughput=29.50 samples/s
INFO:gluonnlp:17:16:09 Epoch: 0, Batch: 5749/7387, Loss=0.9873, lr=0.0000204 Time cost=21.1 Thoughput=28.42 samples/s
INFO:gluonnlp:17:16:29 Epoch: 0, Batch: 5799/7387, Loss=0.9650, lr=0.0000202 Time cost=20.1 Thoughput=29.92 samples/s
INFO:gluonnlp:17:16:49 Epoch: 0, Batch: 5849/7387, Loss=1.1189, lr=0.0000201 Time cost=19.7 Thoughput=30.39 samples/s
INFO:gluonnlp:17:17:08 Epoch: 0, Batch: 5899/7387, Loss=1.1479, lr=0.0000200 Time cost=19.6 Thoughput=30.62 samples/s
INFO:gluonnlp:17:17:29 Epoch: 0, Batch: 5949/7387, Loss=1.0597, lr=0.0000199 Time cost=20.2 Thoughput=29.63 samples/s
INFO:gluonnlp:17:17:48 Epoch: 0, Batch: 5999/7387, Loss=1.0234, lr=0.0000198 Time cost=19.5 Thoughput=30.78 samples/s
INFO:gluonnlp:17:18:07 Epoch: 0, Batch: 6049/7387, Loss=1.0678, lr=0.0000197 Time cost=19.1 Thoughput=31.43 samples/s
INFO:gluonnlp:17:18:28 Epoch: 0, Batch: 6099/7387, Loss=1.0517, lr=0.0000196 Time cost=20.7 Thoughput=28.95 samples/s
INFO:gluonnlp:17:18:48 Epoch: 0, Batch: 6149/7387, Loss=1.0137, lr=0.0000195 Time cost=20.0 Thoughput=29.97 samples/s
INFO:gluonnlp:17:19:08 Epoch: 0, Batch: 6199/7387, Loss=1.0925, lr=0.0000193 Time cost=19.9 Thoughput=30.15 samples/s
INFO:gluonnlp:17:19:28 Epoch: 0, Batch: 6249/7387, Loss=1.0118, lr=0.0000192 Time cost=20.0 Thoughput=29.95 samples/s
INFO:gluonnlp:17:19:48 Epoch: 0, Batch: 6299/7387, Loss=1.0734, lr=0.0000191 Time cost=19.8 Thoughput=30.34 samples/s
INFO:gluonnlp:17:20:08 Epoch: 0, Batch: 6349/7387, Loss=1.0607, lr=0.0000190 Time cost=20.4 Thoughput=29.35 samples/s
INFO:gluonnlp:17:20:28 Epoch: 0, Batch: 6399/7387, Loss=1.0828, lr=0.0000189 Time cost=20.0 Thoughput=30.04 samples/s
INFO:gluonnlp:17:20:47 Epoch: 0, Batch: 6449/7387, Loss=1.0847, lr=0.0000188 Time cost=18.8 Thoughput=31.87 samples/s
INFO:gluonnlp:17:21:07 Epoch: 0, Batch: 6499/7387, Loss=1.0289, lr=0.0000187 Time cost=19.9 Thoughput=30.21 samples/s
INFO:gluonnlp:17:21:27 Epoch: 0, Batch: 6549/7387, Loss=1.0208, lr=0.0000186 Time cost=19.7 Thoughput=30.39 samples/s
INFO:gluonnlp:17:21:46 Epoch: 0, Batch: 6599/7387, Loss=1.0990, lr=0.0000184 Time cost=19.1 Thoughput=31.49 samples/s
INFO:gluonnlp:17:22:06 Epoch: 0, Batch: 6649/7387, Loss=1.1225, lr=0.0000183 Time cost=20.3 Thoughput=29.55 samples/s
INFO:gluonnlp:17:22:26 Epoch: 0, Batch: 6699/7387, Loss=1.0890, lr=0.0000182 Time cost=20.1 Thoughput=29.91 samples/s
INFO:gluonnlp:17:22:45 Epoch: 0, Batch: 6749/7387, Loss=1.0238, lr=0.0000181 Time cost=19.4 Thoughput=30.86 samples/s
INFO:gluonnlp:17:23:06 Epoch: 0, Batch: 6799/7387, Loss=1.2041, lr=0.0000180 Time cost=20.1 Thoughput=29.83 samples/s
INFO:gluonnlp:17:23:26 Epoch: 0, Batch: 6849/7387, Loss=1.0780, lr=0.0000179 Time cost=20.1 Thoughput=29.85 samples/s
INFO:gluonnlp:17:23:46 Epoch: 0, Batch: 6899/7387, Loss=1.0308, lr=0.0000178 Time cost=20.7 Thoughput=29.02 samples/s
INFO:gluonnlp:17:24:06 Epoch: 0, Batch: 6949/7387, Loss=1.0612, lr=0.0000177 Time cost=20.0 Thoughput=29.93 samples/s
INFO:gluonnlp:17:24:25 Epoch: 0, Batch: 6999/7387, Loss=1.0918, lr=0.0000175 Time cost=19.0 Thoughput=31.54 samples/s
INFO:gluonnlp:17:24:45 Epoch: 0, Batch: 7049/7387, Loss=1.0125, lr=0.0000174 Time cost=19.2 Thoughput=31.27 samples/s
INFO:gluonnlp:17:25:04 Epoch: 0, Batch: 7099/7387, Loss=0.9771, lr=0.0000173 Time cost=19.7 Thoughput=30.38 samples/s
INFO:gluonnlp:17:25:25 Epoch: 0, Batch: 7149/7387, Loss=0.9219, lr=0.0000172 Time cost=20.6 Thoughput=29.12 samples/s
INFO:gluonnlp:17:25:45 Epoch: 0, Batch: 7199/7387, Loss=0.9338, lr=0.0000171 Time cost=19.9 Thoughput=30.20 samples/s
INFO:gluonnlp:17:26:07 Epoch: 0, Batch: 7249/7387, Loss=1.0247, lr=0.0000170 Time cost=22.0 Thoughput=27.28 samples/s
INFO:gluonnlp:17:26:26 Epoch: 0, Batch: 7299/7387, Loss=1.0440, lr=0.0000169 Time cost=19.4 Thoughput=31.00 samples/s
INFO:gluonnlp:17:26:45 Epoch: 0, Batch: 7349/7387, Loss=1.0705, lr=0.0000167 Time cost=18.5 Thoughput=32.41 samples/s
INFO:gluonnlp:17:26:59 Time cost=2921.47 s, Thoughput=30.34 samples/s
INFO:gluonnlp:17:27:19 Epoch: 1, Batch: 49/7387, Loss=0.7383, lr=0.0000166 Time cost=20.1 Thoughput=51.86 samples/s
INFO:gluonnlp:17:27:38 Epoch: 1, Batch: 99/7387, Loss=0.8432, lr=0.0000164 Time cost=18.4 Thoughput=32.67 samples/s
INFO:gluonnlp:17:27:57 Epoch: 1, Batch: 149/7387, Loss=0.7800, lr=0.0000163 Time cost=19.7 Thoughput=30.41 samples/s
INFO:gluonnlp:17:28:17 Epoch: 1, Batch: 199/7387, Loss=0.7916, lr=0.0000162 Time cost=20.1 Thoughput=29.92 samples/s
INFO:gluonnlp:17:28:37 Epoch: 1, Batch: 249/7387, Loss=0.7056, lr=0.0000161 Time cost=19.7 Thoughput=30.53 samples/s
INFO:gluonnlp:17:28:56 Epoch: 1, Batch: 299/7387, Loss=0.7403, lr=0.0000160 Time cost=19.2 Thoughput=31.21 samples/s
INFO:gluonnlp:17:29:17 Epoch: 1, Batch: 349/7387, Loss=0.7434, lr=0.0000159 Time cost=20.3 Thoughput=29.50 samples/s
INFO:gluonnlp:17:29:37 Epoch: 1, Batch: 399/7387, Loss=0.8222, lr=0.0000158 Time cost=20.5 Thoughput=29.23 samples/s
INFO:gluonnlp:17:29:58 Epoch: 1, Batch: 449/7387, Loss=0.7154, lr=0.0000156 Time cost=20.8 Thoughput=28.83 samples/s
INFO:gluonnlp:17:30:17 Epoch: 1, Batch: 499/7387, Loss=0.7769, lr=0.0000155 Time cost=19.0 Thoughput=31.58 samples/s
INFO:gluonnlp:17:30:38 Epoch: 1, Batch: 549/7387, Loss=0.8512, lr=0.0000154 Time cost=20.8 Thoughput=28.88 samples/s
INFO:gluonnlp:17:30:57 Epoch: 1, Batch: 599/7387, Loss=0.7676, lr=0.0000153 Time cost=19.6 Thoughput=30.67 samples/s
INFO:gluonnlp:17:31:17 Epoch: 1, Batch: 649/7387, Loss=0.7384, lr=0.0000152 Time cost=20.0 Thoughput=30.01 samples/s
INFO:gluonnlp:17:31:38 Epoch: 1, Batch: 699/7387, Loss=0.7531, lr=0.0000151 Time cost=20.7 Thoughput=29.02 samples/s
INFO:gluonnlp:17:31:58 Epoch: 1, Batch: 749/7387, Loss=0.7504, lr=0.0000150 Time cost=20.5 Thoughput=29.28 samples/s
INFO:gluonnlp:17:32:18 Epoch: 1, Batch: 799/7387, Loss=0.8049, lr=0.0000149 Time cost=19.4 Thoughput=30.98 samples/s
INFO:gluonnlp:17:32:39 Epoch: 1, Batch: 849/7387, Loss=0.7850, lr=0.0000147 Time cost=21.0 Thoughput=28.55 samples/s
INFO:gluonnlp:17:32:59 Epoch: 1, Batch: 899/7387, Loss=0.7811, lr=0.0000146 Time cost=20.6 Thoughput=29.17 samples/s
INFO:gluonnlp:17:33:18 Epoch: 1, Batch: 949/7387, Loss=0.7248, lr=0.0000145 Time cost=18.9 Thoughput=31.77 samples/s
INFO:gluonnlp:17:33:38 Epoch: 1, Batch: 999/7387, Loss=0.8588, lr=0.0000144 Time cost=19.7 Thoughput=30.49 samples/s
INFO:gluonnlp:17:33:58 Epoch: 1, Batch: 1049/7387, Loss=0.7291, lr=0.0000143 Time cost=19.7 Thoughput=30.44 samples/s
INFO:gluonnlp:17:34:18 Epoch: 1, Batch: 1099/7387, Loss=0.7501, lr=0.0000142 Time cost=20.0 Thoughput=29.96 samples/s
INFO:gluonnlp:17:34:38 Epoch: 1, Batch: 1149/7387, Loss=0.7375, lr=0.0000141 Time cost=20.2 Thoughput=29.73 samples/s
INFO:gluonnlp:17:34:57 Epoch: 1, Batch: 1199/7387, Loss=0.7529, lr=0.0000140 Time cost=19.6 Thoughput=30.69 samples/s
INFO:gluonnlp:17:35:17 Epoch: 1, Batch: 1249/7387, Loss=0.7425, lr=0.0000138 Time cost=19.6 Thoughput=30.55 samples/s
INFO:gluonnlp:17:35:36 Epoch: 1, Batch: 1299/7387, Loss=0.7106, lr=0.0000137 Time cost=19.2 Thoughput=31.21 samples/s
INFO:gluonnlp:17:35:54 Epoch: 1, Batch: 1349/7387, Loss=0.7754, lr=0.0000136 Time cost=18.1 Thoughput=33.18 samples/s
INFO:gluonnlp:17:36:14 Epoch: 1, Batch: 1399/7387, Loss=0.7816, lr=0.0000135 Time cost=19.4 Thoughput=30.93 samples/s
INFO:gluonnlp:17:36:34 Epoch: 1, Batch: 1449/7387, Loss=0.8308, lr=0.0000134 Time cost=20.2 Thoughput=29.76 samples/s
INFO:gluonnlp:17:36:55 Epoch: 1, Batch: 1499/7387, Loss=0.8485, lr=0.0000133 Time cost=20.9 Thoughput=28.65 samples/s
INFO:gluonnlp:17:37:13 Epoch: 1, Batch: 1549/7387, Loss=0.8011, lr=0.0000132 Time cost=18.4 Thoughput=32.53 samples/s
INFO:gluonnlp:17:37:33 Epoch: 1, Batch: 1599/7387, Loss=0.7331, lr=0.0000131 Time cost=19.4 Thoughput=30.93 samples/s
INFO:gluonnlp:17:37:51 Epoch: 1, Batch: 1649/7387, Loss=0.7152, lr=0.0000129 Time cost=18.4 Thoughput=32.54 samples/s
INFO:gluonnlp:17:38:11 Epoch: 1, Batch: 1699/7387, Loss=0.6854, lr=0.0000128 Time cost=19.9 Thoughput=30.11 samples/s
INFO:gluonnlp:17:38:31 Epoch: 1, Batch: 1749/7387, Loss=0.9012, lr=0.0000127 Time cost=19.5 Thoughput=30.77 samples/s
INFO:gluonnlp:17:38:50 Epoch: 1, Batch: 1799/7387, Loss=0.7334, lr=0.0000126 Time cost=19.8 Thoughput=30.24 samples/s
INFO:gluonnlp:17:39:10 Epoch: 1, Batch: 1849/7387, Loss=0.8560, lr=0.0000125 Time cost=19.8 Thoughput=30.24 samples/s
INFO:gluonnlp:17:39:30 Epoch: 1, Batch: 1899/7387, Loss=0.8229, lr=0.0000124 Time cost=19.7 Thoughput=30.53 samples/s
INFO:gluonnlp:17:39:50 Epoch: 1, Batch: 1949/7387, Loss=0.7971, lr=0.0000123 Time cost=19.6 Thoughput=30.63 samples/s
INFO:gluonnlp:17:40:10 Epoch: 1, Batch: 1999/7387, Loss=0.7004, lr=0.0000122 Time cost=20.3 Thoughput=29.58 samples/s
INFO:gluonnlp:17:40:30 Epoch: 1, Batch: 2049/7387, Loss=0.7305, lr=0.0000120 Time cost=19.7 Thoughput=30.42 samples/s
INFO:gluonnlp:17:40:49 Epoch: 1, Batch: 2099/7387, Loss=0.7722, lr=0.0000119 Time cost=19.6 Thoughput=30.67 samples/s
INFO:gluonnlp:17:41:09 Epoch: 1, Batch: 2149/7387, Loss=0.7867, lr=0.0000118 Time cost=20.3 Thoughput=29.63 samples/s
INFO:gluonnlp:17:41:31 Epoch: 1, Batch: 2199/7387, Loss=0.7575, lr=0.0000117 Time cost=21.5 Thoughput=27.86 samples/s
INFO:gluonnlp:17:41:51 Epoch: 1, Batch: 2249/7387, Loss=0.7163, lr=0.0000116 Time cost=19.8 Thoughput=30.29 samples/s
INFO:gluonnlp:17:42:10 Epoch: 1, Batch: 2299/7387, Loss=0.7256, lr=0.0000115 Time cost=19.5 Thoughput=30.73 samples/s
INFO:gluonnlp:17:42:29 Epoch: 1, Batch: 2349/7387, Loss=0.7392, lr=0.0000114 Time cost=18.9 Thoughput=31.74 samples/s
INFO:gluonnlp:17:42:49 Epoch: 1, Batch: 2399/7387, Loss=0.7306, lr=0.0000113 Time cost=19.9 Thoughput=30.15 samples/s
INFO:gluonnlp:17:43:09 Epoch: 1, Batch: 2449/7387, Loss=0.8440, lr=0.0000111 Time cost=19.5 Thoughput=30.72 samples/s
INFO:gluonnlp:17:43:28 Epoch: 1, Batch: 2499/7387, Loss=0.8254, lr=0.0000110 Time cost=19.8 Thoughput=30.31 samples/s
INFO:gluonnlp:17:43:49 Epoch: 1, Batch: 2549/7387, Loss=0.8118, lr=0.0000109 Time cost=20.8 Thoughput=28.86 samples/s
INFO:gluonnlp:17:44:09 Epoch: 1, Batch: 2599/7387, Loss=0.8206, lr=0.0000108 Time cost=19.8 Thoughput=30.29 samples/s
INFO:gluonnlp:17:44:29 Epoch: 1, Batch: 2649/7387, Loss=0.8325, lr=0.0000107 Time cost=20.5 Thoughput=29.24 samples/s
INFO:gluonnlp:17:44:48 Epoch: 1, Batch: 2699/7387, Loss=0.7658, lr=0.0000106 Time cost=18.9 Thoughput=31.77 samples/s
INFO:gluonnlp:17:45:08 Epoch: 1, Batch: 2749/7387, Loss=0.7950, lr=0.0000105 Time cost=20.0 Thoughput=29.95 samples/s
INFO:gluonnlp:17:45:28 Epoch: 1, Batch: 2799/7387, Loss=0.7553, lr=0.0000103 Time cost=19.3 Thoughput=31.06 samples/s
INFO:gluonnlp:17:45:47 Epoch: 1, Batch: 2849/7387, Loss=0.7081, lr=0.0000102 Time cost=19.1 Thoughput=31.49 samples/s
INFO:gluonnlp:17:46:07 Epoch: 1, Batch: 2899/7387, Loss=0.7565, lr=0.0000101 Time cost=19.9 Thoughput=30.17 samples/s
INFO:gluonnlp:17:46:26 Epoch: 1, Batch: 2949/7387, Loss=0.8297, lr=0.0000100 Time cost=19.1 Thoughput=31.45 samples/s
INFO:gluonnlp:17:46:46 Epoch: 1, Batch: 2999/7387, Loss=0.7651, lr=0.0000099 Time cost=20.6 Thoughput=29.11 samples/s
INFO:gluonnlp:17:47:06 Epoch: 1, Batch: 3049/7387, Loss=0.7424, lr=0.0000098 Time cost=19.4 Thoughput=30.89 samples/s
INFO:gluonnlp:17:47:27 Epoch: 1, Batch: 3099/7387, Loss=0.8469, lr=0.0000097 Time cost=21.6 Thoughput=27.73 samples/s
INFO:gluonnlp:17:47:46 Epoch: 1, Batch: 3149/7387, Loss=0.7725, lr=0.0000096 Time cost=18.9 Thoughput=31.76 samples/s
INFO:gluonnlp:17:48:06 Epoch: 1, Batch: 3199/7387, Loss=0.7464, lr=0.0000094 Time cost=19.2 Thoughput=31.22 samples/s
INFO:gluonnlp:17:48:25 Epoch: 1, Batch: 3249/7387, Loss=0.7031, lr=0.0000093 Time cost=19.4 Thoughput=30.88 samples/s
INFO:gluonnlp:17:48:45 Epoch: 1, Batch: 3299/7387, Loss=0.7473, lr=0.0000092 Time cost=19.7 Thoughput=30.52 samples/s
INFO:gluonnlp:17:49:04 Epoch: 1, Batch: 3349/7387, Loss=0.8240, lr=0.0000091 Time cost=19.0 Thoughput=31.53 samples/s
INFO:gluonnlp:17:49:24 Epoch: 1, Batch: 3399/7387, Loss=0.7652, lr=0.0000090 Time cost=20.3 Thoughput=29.52 samples/s
INFO:gluonnlp:17:49:43 Epoch: 1, Batch: 3449/7387, Loss=0.8415, lr=0.0000089 Time cost=19.3 Thoughput=31.15 samples/s
INFO:gluonnlp:17:50:02 Epoch: 1, Batch: 3499/7387, Loss=0.7680, lr=0.0000088 Time cost=19.2 Thoughput=31.18 samples/s
INFO:gluonnlp:17:50:23 Epoch: 1, Batch: 3549/7387, Loss=0.8113, lr=0.0000087 Time cost=20.1 Thoughput=29.83 samples/s
INFO:gluonnlp:17:50:43 Epoch: 1, Batch: 3599/7387, Loss=0.7503, lr=0.0000085 Time cost=20.5 Thoughput=29.26 samples/s
INFO:gluonnlp:17:51:02 Epoch: 1, Batch: 3649/7387, Loss=0.7887, lr=0.0000084 Time cost=19.1 Thoughput=31.45 samples/s
INFO:gluonnlp:17:51:21 Epoch: 1, Batch: 3699/7387, Loss=0.7683, lr=0.0000083 Time cost=19.2 Thoughput=31.22 samples/s
INFO:gluonnlp:17:51:41 Epoch: 1, Batch: 3749/7387, Loss=0.7416, lr=0.0000082 Time cost=20.1 Thoughput=29.89 samples/s
INFO:gluonnlp:17:52:02 Epoch: 1, Batch: 3799/7387, Loss=0.8123, lr=0.0000081 Time cost=20.5 Thoughput=29.33 samples/s
INFO:gluonnlp:17:52:22 Epoch: 1, Batch: 3849/7387, Loss=0.7494, lr=0.0000080 Time cost=19.8 Thoughput=30.30 samples/s
INFO:gluonnlp:17:52:40 Epoch: 1, Batch: 3899/7387, Loss=0.7171, lr=0.0000079 Time cost=18.6 Thoughput=32.18 samples/s
INFO:gluonnlp:17:52:59 Epoch: 1, Batch: 3949/7387, Loss=0.7487, lr=0.0000078 Time cost=18.9 Thoughput=31.74 samples/s
INFO:gluonnlp:17:53:19 Epoch: 1, Batch: 3999/7387, Loss=0.7090, lr=0.0000076 Time cost=19.8 Thoughput=30.30 samples/s
INFO:gluonnlp:17:53:40 Epoch: 1, Batch: 4049/7387, Loss=0.6508, lr=0.0000075 Time cost=21.0 Thoughput=28.60 samples/s
INFO:gluonnlp:17:53:59 Epoch: 1, Batch: 4099/7387, Loss=0.7414, lr=0.0000074 Time cost=19.2 Thoughput=31.21 samples/s
INFO:gluonnlp:17:54:18 Epoch: 1, Batch: 4149/7387, Loss=0.7010, lr=0.0000073 Time cost=19.0 Thoughput=31.61 samples/s
INFO:gluonnlp:17:54:39 Epoch: 1, Batch: 4199/7387, Loss=0.8015, lr=0.0000072 Time cost=21.0 Thoughput=28.54 samples/s
INFO:gluonnlp:17:54:59 Epoch: 1, Batch: 4249/7387, Loss=0.7402, lr=0.0000071 Time cost=19.5 Thoughput=30.74 samples/s
INFO:gluonnlp:17:55:19 Epoch: 1, Batch: 4299/7387, Loss=0.7673, lr=0.0000070 Time cost=20.0 Thoughput=29.99 samples/s
INFO:gluonnlp:17:55:40 Epoch: 1, Batch: 4349/7387, Loss=0.7761, lr=0.0000069 Time cost=21.3 Thoughput=28.16 samples/s
INFO:gluonnlp:17:56:00 Epoch: 1, Batch: 4399/7387, Loss=0.7706, lr=0.0000067 Time cost=19.8 Thoughput=30.35 samples/s
INFO:gluonnlp:17:56:19 Epoch: 1, Batch: 4449/7387, Loss=0.6970, lr=0.0000066 Time cost=19.3 Thoughput=31.15 samples/s
INFO:gluonnlp:17:56:40 Epoch: 1, Batch: 4499/7387, Loss=0.7685, lr=0.0000065 Time cost=20.4 Thoughput=29.38 samples/s
INFO:gluonnlp:17:57:00 Epoch: 1, Batch: 4549/7387, Loss=0.6991, lr=0.0000064 Time cost=20.6 Thoughput=29.07 samples/s
INFO:gluonnlp:17:57:20 Epoch: 1, Batch: 4599/7387, Loss=0.7287, lr=0.0000063 Time cost=19.8 Thoughput=30.37 samples/s
INFO:gluonnlp:17:57:40 Epoch: 1, Batch: 4649/7387, Loss=0.7847, lr=0.0000062 Time cost=19.7 Thoughput=30.51 samples/s
INFO:gluonnlp:17:57:59 Epoch: 1, Batch: 4699/7387, Loss=0.7575, lr=0.0000061 Time cost=19.3 Thoughput=31.15 samples/s
INFO:gluonnlp:17:58:18 Epoch: 1, Batch: 4749/7387, Loss=0.7102, lr=0.0000059 Time cost=19.0 Thoughput=31.51 samples/s
INFO:gluonnlp:17:58:37 Epoch: 1, Batch: 4799/7387, Loss=0.7590, lr=0.0000058 Time cost=18.9 Thoughput=31.74 samples/s
INFO:gluonnlp:17:58:56 Epoch: 1, Batch: 4849/7387, Loss=0.7977, lr=0.0000057 Time cost=18.7 Thoughput=32.05 samples/s
INFO:gluonnlp:17:59:14 Epoch: 1, Batch: 4899/7387, Loss=0.7095, lr=0.0000056 Time cost=18.9 Thoughput=31.78 samples/s
INFO:gluonnlp:17:59:35 Epoch: 1, Batch: 4949/7387, Loss=0.7566, lr=0.0000055 Time cost=20.4 Thoughput=29.46 samples/s
INFO:gluonnlp:17:59:55 Epoch: 1, Batch: 4999/7387, Loss=0.7105, lr=0.0000054 Time cost=19.9 Thoughput=30.18 samples/s
INFO:gluonnlp:18:00:15 Epoch: 1, Batch: 5049/7387, Loss=0.8611, lr=0.0000053 Time cost=20.3 Thoughput=29.57 samples/s
INFO:gluonnlp:18:00:35 Epoch: 1, Batch: 5099/7387, Loss=0.7304, lr=0.0000052 Time cost=19.7 Thoughput=30.51 samples/s
INFO:gluonnlp:18:00:54 Epoch: 1, Batch: 5149/7387, Loss=0.6513, lr=0.0000050 Time cost=19.7 Thoughput=30.52 samples/s
INFO:gluonnlp:18:01:15 Epoch: 1, Batch: 5199/7387, Loss=0.6123, lr=0.0000049 Time cost=20.9 Thoughput=28.72 samples/s
INFO:gluonnlp:18:01:35 Epoch: 1, Batch: 5249/7387, Loss=0.7779, lr=0.0000048 Time cost=20.2 Thoughput=29.71 samples/s
INFO:gluonnlp:18:01:54 Epoch: 1, Batch: 5299/7387, Loss=0.7096, lr=0.0000047 Time cost=18.5 Thoughput=32.35 samples/s
INFO:gluonnlp:18:02:14 Epoch: 1, Batch: 5349/7387, Loss=0.6884, lr=0.0000046 Time cost=20.0 Thoughput=29.96 samples/s
INFO:gluonnlp:18:02:33 Epoch: 1, Batch: 5399/7387, Loss=0.6612, lr=0.0000045 Time cost=19.4 Thoughput=30.96 samples/s
INFO:gluonnlp:18:02:53 Epoch: 1, Batch: 5449/7387, Loss=0.8553, lr=0.0000044 Time cost=19.7 Thoughput=30.43 samples/s
INFO:gluonnlp:18:03:15 Epoch: 1, Batch: 5499/7387, Loss=0.7904, lr=0.0000043 Time cost=21.5 Thoughput=27.85 samples/s
INFO:gluonnlp:18:03:34 Epoch: 1, Batch: 5549/7387, Loss=0.6533, lr=0.0000041 Time cost=19.9 Thoughput=30.19 samples/s
INFO:gluonnlp:18:03:55 Epoch: 1, Batch: 5599/7387, Loss=0.7421, lr=0.0000040 Time cost=20.7 Thoughput=29.01 samples/s
INFO:gluonnlp:18:04:14 Epoch: 1, Batch: 5649/7387, Loss=0.6381, lr=0.0000039 Time cost=19.3 Thoughput=31.12 samples/s
INFO:gluonnlp:18:04:36 Epoch: 1, Batch: 5699/7387, Loss=0.7950, lr=0.0000038 Time cost=21.3 Thoughput=28.14 samples/s
INFO:gluonnlp:18:04:55 Epoch: 1, Batch: 5749/7387, Loss=0.6746, lr=0.0000037 Time cost=19.2 Thoughput=31.27 samples/s
INFO:gluonnlp:18:05:15 Epoch: 1, Batch: 5799/7387, Loss=0.7075, lr=0.0000036 Time cost=19.9 Thoughput=30.10 samples/s
INFO:gluonnlp:18:05:34 Epoch: 1, Batch: 5849/7387, Loss=0.6931, lr=0.0000035 Time cost=18.9 Thoughput=31.76 samples/s
INFO:gluonnlp:18:05:54 Epoch: 1, Batch: 5899/7387, Loss=0.7216, lr=0.0000034 Time cost=20.4 Thoughput=29.35 samples/s
INFO:gluonnlp:18:06:14 Epoch: 1, Batch: 5949/7387, Loss=0.6590, lr=0.0000032 Time cost=20.2 Thoughput=29.69 samples/s
INFO:gluonnlp:18:06:34 Epoch: 1, Batch: 5999/7387, Loss=0.7205, lr=0.0000031 Time cost=19.3 Thoughput=31.03 samples/s
INFO:gluonnlp:18:06:56 Epoch: 1, Batch: 6049/7387, Loss=0.7693, lr=0.0000030 Time cost=21.8 Thoughput=27.52 samples/s
INFO:gluonnlp:18:07:16 Epoch: 1, Batch: 6099/7387, Loss=0.7801, lr=0.0000029 Time cost=20.1 Thoughput=29.88 samples/s
INFO:gluonnlp:18:07:36 Epoch: 1, Batch: 6149/7387, Loss=0.7402, lr=0.0000028 Time cost=20.2 Thoughput=29.68 samples/s
INFO:gluonnlp:18:07:54 Epoch: 1, Batch: 6199/7387, Loss=0.7524, lr=0.0000027 Time cost=18.1 Thoughput=33.10 samples/s
INFO:gluonnlp:18:08:12 Epoch: 1, Batch: 6249/7387, Loss=0.7354, lr=0.0000026 Time cost=18.4 Thoughput=32.53 samples/s
INFO:gluonnlp:18:08:32 Epoch: 1, Batch: 6299/7387, Loss=0.7584, lr=0.0000025 Time cost=20.0 Thoughput=30.04 samples/s
INFO:gluonnlp:18:08:52 Epoch: 1, Batch: 6349/7387, Loss=0.7124, lr=0.0000023 Time cost=19.1 Thoughput=31.42 samples/s
INFO:gluonnlp:18:09:11 Epoch: 1, Batch: 6399/7387, Loss=0.7461, lr=0.0000022 Time cost=20.0 Thoughput=30.01 samples/s
INFO:gluonnlp:18:09:31 Epoch: 1, Batch: 6449/7387, Loss=0.7452, lr=0.0000021 Time cost=19.4 Thoughput=30.89 samples/s
INFO:gluonnlp:18:09:50 Epoch: 1, Batch: 6499/7387, Loss=0.7357, lr=0.0000020 Time cost=19.1 Thoughput=31.35 samples/s
INFO:gluonnlp:18:10:10 Epoch: 1, Batch: 6549/7387, Loss=0.7401, lr=0.0000019 Time cost=19.5 Thoughput=30.73 samples/s
INFO:gluonnlp:18:10:29 Epoch: 1, Batch: 6599/7387, Loss=0.6658, lr=0.0000018 Time cost=19.2 Thoughput=31.25 samples/s
INFO:gluonnlp:18:10:49 Epoch: 1, Batch: 6649/7387, Loss=0.7373, lr=0.0000017 Time cost=20.3 Thoughput=29.51 samples/s
INFO:gluonnlp:18:11:09 Epoch: 1, Batch: 6699/7387, Loss=0.8346, lr=0.0000015 Time cost=20.1 Thoughput=29.88 samples/s
INFO:gluonnlp:18:11:30 Epoch: 1, Batch: 6749/7387, Loss=0.7577, lr=0.0000014 Time cost=20.4 Thoughput=29.40 samples/s
INFO:gluonnlp:18:11:49 Epoch: 1, Batch: 6799/7387, Loss=0.6842, lr=0.0000013 Time cost=19.3 Thoughput=31.03 samples/s
INFO:gluonnlp:18:12:10 Epoch: 1, Batch: 6849/7387, Loss=0.7970, lr=0.0000012 Time cost=20.6 Thoughput=29.15 samples/s
INFO:gluonnlp:18:12:28 Epoch: 1, Batch: 6899/7387, Loss=0.7036, lr=0.0000011 Time cost=18.6 Thoughput=32.25 samples/s
INFO:gluonnlp:18:12:47 Epoch: 1, Batch: 6949/7387, Loss=0.7592, lr=0.0000010 Time cost=19.2 Thoughput=31.32 samples/s
INFO:gluonnlp:18:13:07 Epoch: 1, Batch: 6999/7387, Loss=0.7050, lr=0.0000009 Time cost=19.6 Thoughput=30.60 samples/s
INFO:gluonnlp:18:13:27 Epoch: 1, Batch: 7049/7387, Loss=0.7941, lr=0.0000008 Time cost=19.6 Thoughput=30.60 samples/s
INFO:gluonnlp:18:13:47 Epoch: 1, Batch: 7099/7387, Loss=0.7580, lr=0.0000006 Time cost=20.3 Thoughput=29.51 samples/s
INFO:gluonnlp:18:14:07 Epoch: 1, Batch: 7149/7387, Loss=0.7016, lr=0.0000005 Time cost=19.8 Thoughput=30.25 samples/s
INFO:gluonnlp:18:14:27 Epoch: 1, Batch: 7199/7387, Loss=0.7169, lr=0.0000004 Time cost=20.0 Thoughput=29.98 samples/s
INFO:gluonnlp:18:14:46 Epoch: 1, Batch: 7249/7387, Loss=0.6893, lr=0.0000003 Time cost=19.5 Thoughput=30.76 samples/s
INFO:gluonnlp:18:15:06 Epoch: 1, Batch: 7299/7387, Loss=0.7869, lr=0.0000002 Time cost=19.6 Thoughput=30.61 samples/s
INFO:gluonnlp:18:15:26 Epoch: 1, Batch: 7349/7387, Loss=0.6837, lr=0.0000001 Time cost=19.9 Thoughput=30.23 samples/s
INFO:gluonnlp:18:15:40 Finish training step: 14773
INFO:gluonnlp:18:15:40 Time cost=5842.42 s, Thoughput=30.34 samples/s
INFO:gluonnlp:18:15:44 Loading dev data...
INFO:gluonnlp:18:15:44 Number of records in dev data:10570
INFO:gluonnlp:18:15:55 The number of examples after preprocessing:10833
INFO:gluonnlp:18:15:56 start prediction
INFO:gluonnlp:18:16:55 Time cost=59.30 s, Thoughput=182.69 samples/s
INFO:gluonnlp:18:16:55 Get prediction results...
INFO:gluonnlp:18:17:39 {'exact_match': 81.26773888363293, 'f1': 88.5857634479705}