In [16]:
import tensorflow as tf
import numpy as np

class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        # MNIST中的图像默认为uint8（0-255的数字）。以下代码将其归一化到0-1之间的浮点数，并在最后增加一维作为颜色通道
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):
        # 从数据集中随机取出batch_size个元素并返回
        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    



In [17]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    # Flatten层将除第一维（batch_size）以外的维度展平
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [18]:
num_epochs = 5
batch_size = 50
learning_rate = 0.001

In [19]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
    
# loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)

loss = tf.keras.losses.categorical_crossentropy(
    y_true=tf.one_hot(y, depth=tf.shape(y_pred)[-1]),
    y_pred=y_pred
)

batch 0: loss 2.463164
batch 1: loss 2.329427
batch 2: loss 2.226733
batch 3: loss 2.184752
batch 4: loss 2.079787
batch 5: loss 2.057952
batch 6: loss 2.023297
batch 7: loss 1.854117
batch 8: loss 1.827185
batch 9: loss 1.948629
batch 10: loss 1.804448
batch 11: loss 1.828422
batch 12: loss 1.670462
batch 13: loss 1.688965
batch 14: loss 1.619276
batch 15: loss 1.448691
batch 16: loss 1.453458
batch 17: loss 1.345556
batch 18: loss 1.283378
batch 19: loss 1.350428
batch 20: loss 1.233858
batch 21: loss 1.324239
batch 22: loss 1.088438
batch 23: loss 1.183948
batch 24: loss 1.160830
batch 25: loss 1.017268
batch 26: loss 1.039001
batch 27: loss 1.076135
batch 28: loss 1.139312
batch 29: loss 0.765663
batch 30: loss 0.910619
batch 31: loss 0.972463
batch 32: loss 0.998353
batch 33: loss 0.928287
batch 34: loss 0.812141
batch 35: loss 0.689508
batch 36: loss 0.693365
batch 37: loss 0.901580
batch 38: loss 0.752819
batch 39: loss 0.652170
batch 40: loss 0.924243
batch 41: loss 0.610802
ba

batch 333: loss 0.253211
batch 334: loss 0.257843
batch 335: loss 0.362800
batch 336: loss 0.232516
batch 337: loss 0.260476
batch 338: loss 0.301259
batch 339: loss 0.207969
batch 340: loss 0.301451
batch 341: loss 0.309310
batch 342: loss 0.371709
batch 343: loss 0.217413
batch 344: loss 0.210255
batch 345: loss 0.218872
batch 346: loss 0.306511
batch 347: loss 0.177002
batch 348: loss 0.386756
batch 349: loss 0.179457
batch 350: loss 0.374544
batch 351: loss 0.470752
batch 352: loss 0.351747
batch 353: loss 0.364021
batch 354: loss 0.172743
batch 355: loss 0.242191
batch 356: loss 0.255442
batch 357: loss 0.183986
batch 358: loss 0.274145
batch 359: loss 0.514135
batch 360: loss 0.138253
batch 361: loss 0.285020
batch 362: loss 0.301796
batch 363: loss 0.126180
batch 364: loss 0.307437
batch 365: loss 0.186660
batch 366: loss 0.136364
batch 367: loss 0.205007
batch 368: loss 0.389847
batch 369: loss 0.232950
batch 370: loss 0.144104
batch 371: loss 0.217567
batch 372: loss 0.244963


batch 668: loss 0.209678
batch 669: loss 0.171292
batch 670: loss 0.380335
batch 671: loss 0.123146
batch 672: loss 0.077417
batch 673: loss 0.282443
batch 674: loss 0.150041
batch 675: loss 0.120664
batch 676: loss 0.205467
batch 677: loss 0.346384
batch 678: loss 0.459174
batch 679: loss 0.467361
batch 680: loss 0.183555
batch 681: loss 0.190550
batch 682: loss 0.342124
batch 683: loss 0.618057
batch 684: loss 0.141719
batch 685: loss 0.276949
batch 686: loss 0.296018
batch 687: loss 0.213707
batch 688: loss 0.270262
batch 689: loss 0.176399
batch 690: loss 0.089959
batch 691: loss 0.219825
batch 692: loss 0.204274
batch 693: loss 0.188214
batch 694: loss 0.136253
batch 695: loss 0.261920
batch 696: loss 0.322237
batch 697: loss 0.286735
batch 698: loss 0.160725
batch 699: loss 0.120363
batch 700: loss 0.234784
batch 701: loss 0.318542
batch 702: loss 0.420437
batch 703: loss 0.496758
batch 704: loss 0.195181
batch 705: loss 0.204118
batch 706: loss 0.119673
batch 707: loss 0.294663


batch 999: loss 0.102081
batch 1000: loss 0.091579
batch 1001: loss 0.097571
batch 1002: loss 0.112934
batch 1003: loss 0.299119
batch 1004: loss 0.147794
batch 1005: loss 0.115244
batch 1006: loss 0.190162
batch 1007: loss 0.232350
batch 1008: loss 0.103608
batch 1009: loss 0.094037
batch 1010: loss 0.211796
batch 1011: loss 0.118106
batch 1012: loss 0.141576
batch 1013: loss 0.250718
batch 1014: loss 0.098538
batch 1015: loss 0.220300
batch 1016: loss 0.194602
batch 1017: loss 0.161767
batch 1018: loss 0.144177
batch 1019: loss 0.083516
batch 1020: loss 0.145494
batch 1021: loss 0.087950
batch 1022: loss 0.066205
batch 1023: loss 0.049624
batch 1024: loss 0.164483
batch 1025: loss 0.199892
batch 1026: loss 0.339678
batch 1027: loss 0.122032
batch 1028: loss 0.172090
batch 1029: loss 0.173596
batch 1030: loss 0.039287
batch 1031: loss 0.268121
batch 1032: loss 0.192974
batch 1033: loss 0.109798
batch 1034: loss 0.102580
batch 1035: loss 0.255389
batch 1036: loss 0.092591
batch 1037: l

batch 1317: loss 0.173558
batch 1318: loss 0.041273
batch 1319: loss 0.135531
batch 1320: loss 0.154461
batch 1321: loss 0.182042
batch 1322: loss 0.148510
batch 1323: loss 0.072756
batch 1324: loss 0.301741
batch 1325: loss 0.068157
batch 1326: loss 0.137719
batch 1327: loss 0.254315
batch 1328: loss 0.072923
batch 1329: loss 0.052599
batch 1330: loss 0.107099
batch 1331: loss 0.309207
batch 1332: loss 0.065255
batch 1333: loss 0.091063
batch 1334: loss 0.121969
batch 1335: loss 0.232770
batch 1336: loss 0.376215
batch 1337: loss 0.112030
batch 1338: loss 0.173735
batch 1339: loss 0.088498
batch 1340: loss 0.099941
batch 1341: loss 0.167961
batch 1342: loss 0.153770
batch 1343: loss 0.146531
batch 1344: loss 0.088794
batch 1345: loss 0.133106
batch 1346: loss 0.116146
batch 1347: loss 0.177872
batch 1348: loss 0.146045
batch 1349: loss 0.161863
batch 1350: loss 0.074565
batch 1351: loss 0.041981
batch 1352: loss 0.292417
batch 1353: loss 0.374321
batch 1354: loss 0.058137
batch 1355: 

batch 1639: loss 0.200820
batch 1640: loss 0.080706
batch 1641: loss 0.070386
batch 1642: loss 0.138247
batch 1643: loss 0.193655
batch 1644: loss 0.195100
batch 1645: loss 0.118393
batch 1646: loss 0.204198
batch 1647: loss 0.117410
batch 1648: loss 0.093719
batch 1649: loss 0.113364
batch 1650: loss 0.155296
batch 1651: loss 0.083294
batch 1652: loss 0.055465
batch 1653: loss 0.039653
batch 1654: loss 0.104368
batch 1655: loss 0.086658
batch 1656: loss 0.240974
batch 1657: loss 0.045492
batch 1658: loss 0.232786
batch 1659: loss 0.199386
batch 1660: loss 0.091900
batch 1661: loss 0.084532
batch 1662: loss 0.104135
batch 1663: loss 0.208935
batch 1664: loss 0.197926
batch 1665: loss 0.089308
batch 1666: loss 0.121189
batch 1667: loss 0.092472
batch 1668: loss 0.121638
batch 1669: loss 0.153014
batch 1670: loss 0.241416
batch 1671: loss 0.122262
batch 1672: loss 0.107922
batch 1673: loss 0.055922
batch 1674: loss 0.173229
batch 1675: loss 0.042498
batch 1676: loss 0.263112
batch 1677: 

batch 1966: loss 0.096863
batch 1967: loss 0.061587
batch 1968: loss 0.021349
batch 1969: loss 0.070352
batch 1970: loss 0.110496
batch 1971: loss 0.077160
batch 1972: loss 0.063531
batch 1973: loss 0.082940
batch 1974: loss 0.061600
batch 1975: loss 0.102823
batch 1976: loss 0.043219
batch 1977: loss 0.438117
batch 1978: loss 0.140456
batch 1979: loss 0.076490
batch 1980: loss 0.040213
batch 1981: loss 0.077090
batch 1982: loss 0.101928
batch 1983: loss 0.296434
batch 1984: loss 0.050068
batch 1985: loss 0.196301
batch 1986: loss 0.096901
batch 1987: loss 0.078396
batch 1988: loss 0.094027
batch 1989: loss 0.195082
batch 1990: loss 0.060630
batch 1991: loss 0.207694
batch 1992: loss 0.172970
batch 1993: loss 0.019858
batch 1994: loss 0.100056
batch 1995: loss 0.113526
batch 1996: loss 0.069641
batch 1997: loss 0.226695
batch 1998: loss 0.051167
batch 1999: loss 0.135230
batch 2000: loss 0.079860
batch 2001: loss 0.174885
batch 2002: loss 0.141104
batch 2003: loss 0.135336
batch 2004: 

batch 2288: loss 0.148409
batch 2289: loss 0.187326
batch 2290: loss 0.188490
batch 2291: loss 0.046549
batch 2292: loss 0.240186
batch 2293: loss 0.046358
batch 2294: loss 0.098782
batch 2295: loss 0.061775
batch 2296: loss 0.096758
batch 2297: loss 0.085278
batch 2298: loss 0.038579
batch 2299: loss 0.222758
batch 2300: loss 0.047613
batch 2301: loss 0.191127
batch 2302: loss 0.207254
batch 2303: loss 0.123159
batch 2304: loss 0.063021
batch 2305: loss 0.247319
batch 2306: loss 0.106223
batch 2307: loss 0.159232
batch 2308: loss 0.073827
batch 2309: loss 0.039136
batch 2310: loss 0.054485
batch 2311: loss 0.092931
batch 2312: loss 0.075619
batch 2313: loss 0.079380
batch 2314: loss 0.165706
batch 2315: loss 0.086405
batch 2316: loss 0.043257
batch 2317: loss 0.056784
batch 2318: loss 0.091517
batch 2319: loss 0.045654
batch 2320: loss 0.112024
batch 2321: loss 0.108507
batch 2322: loss 0.141648
batch 2323: loss 0.106895
batch 2324: loss 0.113192
batch 2325: loss 0.037258
batch 2326: 

batch 2611: loss 0.332350
batch 2612: loss 0.104227
batch 2613: loss 0.087023
batch 2614: loss 0.154729
batch 2615: loss 0.032653
batch 2616: loss 0.070871
batch 2617: loss 0.085731
batch 2618: loss 0.032871
batch 2619: loss 0.080400
batch 2620: loss 0.056846
batch 2621: loss 0.133118
batch 2622: loss 0.081904
batch 2623: loss 0.041260
batch 2624: loss 0.189843
batch 2625: loss 0.167361
batch 2626: loss 0.141207
batch 2627: loss 0.129583
batch 2628: loss 0.030266
batch 2629: loss 0.176298
batch 2630: loss 0.071406
batch 2631: loss 0.048772
batch 2632: loss 0.127514
batch 2633: loss 0.190510
batch 2634: loss 0.225054
batch 2635: loss 0.013923
batch 2636: loss 0.138590
batch 2637: loss 0.050618
batch 2638: loss 0.068502
batch 2639: loss 0.128952
batch 2640: loss 0.081538
batch 2641: loss 0.235280
batch 2642: loss 0.295651
batch 2643: loss 0.172265
batch 2644: loss 0.084211
batch 2645: loss 0.106911
batch 2646: loss 0.158391
batch 2647: loss 0.216732
batch 2648: loss 0.172366
batch 2649: 

batch 2941: loss 0.077261
batch 2942: loss 0.130320
batch 2943: loss 0.172237
batch 2944: loss 0.045586
batch 2945: loss 0.075779
batch 2946: loss 0.048096
batch 2947: loss 0.023837
batch 2948: loss 0.115366
batch 2949: loss 0.085364
batch 2950: loss 0.118604
batch 2951: loss 0.043253
batch 2952: loss 0.120836
batch 2953: loss 0.085314
batch 2954: loss 0.025334
batch 2955: loss 0.181788
batch 2956: loss 0.052115
batch 2957: loss 0.135269
batch 2958: loss 0.050544
batch 2959: loss 0.040453
batch 2960: loss 0.048717
batch 2961: loss 0.102489
batch 2962: loss 0.132976
batch 2963: loss 0.035416
batch 2964: loss 0.053562
batch 2965: loss 0.045584
batch 2966: loss 0.062154
batch 2967: loss 0.092669
batch 2968: loss 0.168681
batch 2969: loss 0.013856
batch 2970: loss 0.055091
batch 2971: loss 0.099618
batch 2972: loss 0.059550
batch 2973: loss 0.069171
batch 2974: loss 0.039188
batch 2975: loss 0.059740
batch 2976: loss 0.032228
batch 2977: loss 0.083419
batch 2978: loss 0.026133
batch 2979: 

batch 3264: loss 0.080243
batch 3265: loss 0.096286
batch 3266: loss 0.071540
batch 3267: loss 0.016923
batch 3268: loss 0.050663
batch 3269: loss 0.076283
batch 3270: loss 0.019987
batch 3271: loss 0.057451
batch 3272: loss 0.068977
batch 3273: loss 0.023411
batch 3274: loss 0.100639
batch 3275: loss 0.015417
batch 3276: loss 0.061661
batch 3277: loss 0.121884
batch 3278: loss 0.084322
batch 3279: loss 0.069975
batch 3280: loss 0.049173
batch 3281: loss 0.017161
batch 3282: loss 0.086478
batch 3283: loss 0.070419
batch 3284: loss 0.077277
batch 3285: loss 0.060616
batch 3286: loss 0.118189
batch 3287: loss 0.072592
batch 3288: loss 0.059482
batch 3289: loss 0.047448
batch 3290: loss 0.020054
batch 3291: loss 0.041038
batch 3292: loss 0.169448
batch 3293: loss 0.039608
batch 3294: loss 0.153761
batch 3295: loss 0.087788
batch 3296: loss 0.026657
batch 3297: loss 0.036507
batch 3298: loss 0.055869
batch 3299: loss 0.040842
batch 3300: loss 0.022381
batch 3301: loss 0.022298
batch 3302: 

batch 3581: loss 0.098398
batch 3582: loss 0.023749
batch 3583: loss 0.022484
batch 3584: loss 0.065780
batch 3585: loss 0.145894
batch 3586: loss 0.011073
batch 3587: loss 0.176812
batch 3588: loss 0.112988
batch 3589: loss 0.302671
batch 3590: loss 0.014855
batch 3591: loss 0.023762
batch 3592: loss 0.152798
batch 3593: loss 0.118960
batch 3594: loss 0.011438
batch 3595: loss 0.032094
batch 3596: loss 0.026576
batch 3597: loss 0.079864
batch 3598: loss 0.011381
batch 3599: loss 0.061910
batch 3600: loss 0.045277
batch 3601: loss 0.043461
batch 3602: loss 0.155580
batch 3603: loss 0.048741
batch 3604: loss 0.033932
batch 3605: loss 0.050537
batch 3606: loss 0.008973
batch 3607: loss 0.024557
batch 3608: loss 0.022373
batch 3609: loss 0.064181
batch 3610: loss 0.031001
batch 3611: loss 0.058472
batch 3612: loss 0.134237
batch 3613: loss 0.049176
batch 3614: loss 0.074810
batch 3615: loss 0.020001
batch 3616: loss 0.035679
batch 3617: loss 0.089636
batch 3618: loss 0.137989
batch 3619: 

batch 3909: loss 0.124568
batch 3910: loss 0.099024
batch 3911: loss 0.029847
batch 3912: loss 0.200007
batch 3913: loss 0.016170
batch 3914: loss 0.077895
batch 3915: loss 0.173383
batch 3916: loss 0.021025
batch 3917: loss 0.142641
batch 3918: loss 0.030841
batch 3919: loss 0.157681
batch 3920: loss 0.025519
batch 3921: loss 0.011515
batch 3922: loss 0.172558
batch 3923: loss 0.034702
batch 3924: loss 0.041066
batch 3925: loss 0.060775
batch 3926: loss 0.046616
batch 3927: loss 0.036604
batch 3928: loss 0.073993
batch 3929: loss 0.019993
batch 3930: loss 0.008959
batch 3931: loss 0.239281
batch 3932: loss 0.247881
batch 3933: loss 0.037957
batch 3934: loss 0.025797
batch 3935: loss 0.060787
batch 3936: loss 0.051932
batch 3937: loss 0.026588
batch 3938: loss 0.083134
batch 3939: loss 0.083633
batch 3940: loss 0.036106
batch 3941: loss 0.126618
batch 3942: loss 0.079345
batch 3943: loss 0.036760
batch 3944: loss 0.048769
batch 3945: loss 0.163558
batch 3946: loss 0.067234
batch 3947: 

batch 4228: loss 0.038184
batch 4229: loss 0.024419
batch 4230: loss 0.179587
batch 4231: loss 0.030090
batch 4232: loss 0.034058
batch 4233: loss 0.047810
batch 4234: loss 0.051329
batch 4235: loss 0.078990
batch 4236: loss 0.177652
batch 4237: loss 0.067167
batch 4238: loss 0.043344
batch 4239: loss 0.074382
batch 4240: loss 0.155972
batch 4241: loss 0.007031
batch 4242: loss 0.089540
batch 4243: loss 0.038389
batch 4244: loss 0.047887
batch 4245: loss 0.090773
batch 4246: loss 0.067867
batch 4247: loss 0.130937
batch 4248: loss 0.042774
batch 4249: loss 0.153772
batch 4250: loss 0.086203
batch 4251: loss 0.034453
batch 4252: loss 0.080918
batch 4253: loss 0.024636
batch 4254: loss 0.006059
batch 4255: loss 0.032066
batch 4256: loss 0.200530
batch 4257: loss 0.040331
batch 4258: loss 0.030782
batch 4259: loss 0.064558
batch 4260: loss 0.099313
batch 4261: loss 0.041276
batch 4262: loss 0.008951
batch 4263: loss 0.071511
batch 4264: loss 0.025616
batch 4265: loss 0.048821
batch 4266: 

batch 4550: loss 0.055364
batch 4551: loss 0.049549
batch 4552: loss 0.082389
batch 4553: loss 0.114143
batch 4554: loss 0.065933
batch 4555: loss 0.013417
batch 4556: loss 0.059882
batch 4557: loss 0.010049
batch 4558: loss 0.059514
batch 4559: loss 0.050906
batch 4560: loss 0.061210
batch 4561: loss 0.023642
batch 4562: loss 0.045054
batch 4563: loss 0.025843
batch 4564: loss 0.042730
batch 4565: loss 0.038208
batch 4566: loss 0.038039
batch 4567: loss 0.115478
batch 4568: loss 0.028226
batch 4569: loss 0.083936
batch 4570: loss 0.071759
batch 4571: loss 0.049036
batch 4572: loss 0.168300
batch 4573: loss 0.017239
batch 4574: loss 0.007860
batch 4575: loss 0.031793
batch 4576: loss 0.057208
batch 4577: loss 0.032656
batch 4578: loss 0.004469
batch 4579: loss 0.007291
batch 4580: loss 0.037493
batch 4581: loss 0.044278
batch 4582: loss 0.109469
batch 4583: loss 0.049799
batch 4584: loss 0.025134
batch 4585: loss 0.167594
batch 4586: loss 0.131798
batch 4587: loss 0.022080
batch 4588: 

batch 4869: loss 0.037331
batch 4870: loss 0.029328
batch 4871: loss 0.087959
batch 4872: loss 0.013642
batch 4873: loss 0.042845
batch 4874: loss 0.042531
batch 4875: loss 0.037840
batch 4876: loss 0.045819
batch 4877: loss 0.009591
batch 4878: loss 0.133054
batch 4879: loss 0.027927
batch 4880: loss 0.011997
batch 4881: loss 0.102106
batch 4882: loss 0.111798
batch 4883: loss 0.066808
batch 4884: loss 0.009873
batch 4885: loss 0.027590
batch 4886: loss 0.050220
batch 4887: loss 0.101981
batch 4888: loss 0.046474
batch 4889: loss 0.040230
batch 4890: loss 0.039701
batch 4891: loss 0.047262
batch 4892: loss 0.016511
batch 4893: loss 0.092825
batch 4894: loss 0.231085
batch 4895: loss 0.091766
batch 4896: loss 0.027980
batch 4897: loss 0.022247
batch 4898: loss 0.104148
batch 4899: loss 0.187079
batch 4900: loss 0.036077
batch 4901: loss 0.011405
batch 4902: loss 0.074317
batch 4903: loss 0.040317
batch 4904: loss 0.061426
batch 4905: loss 0.039905
batch 4906: loss 0.025084
batch 4907: 

batch 5194: loss 0.048873
batch 5195: loss 0.010623
batch 5196: loss 0.010853
batch 5197: loss 0.006764
batch 5198: loss 0.031038
batch 5199: loss 0.015207
batch 5200: loss 0.016665
batch 5201: loss 0.035983
batch 5202: loss 0.041047
batch 5203: loss 0.032903
batch 5204: loss 0.043214
batch 5205: loss 0.024522
batch 5206: loss 0.010779
batch 5207: loss 0.013390
batch 5208: loss 0.045146
batch 5209: loss 0.028033
batch 5210: loss 0.114529
batch 5211: loss 0.017711
batch 5212: loss 0.031411
batch 5213: loss 0.042245
batch 5214: loss 0.185466
batch 5215: loss 0.066877
batch 5216: loss 0.086145
batch 5217: loss 0.169298
batch 5218: loss 0.142586
batch 5219: loss 0.020049
batch 5220: loss 0.070115
batch 5221: loss 0.082418
batch 5222: loss 0.045124
batch 5223: loss 0.035692
batch 5224: loss 0.118886
batch 5225: loss 0.184881
batch 5226: loss 0.042784
batch 5227: loss 0.039528
batch 5228: loss 0.041845
batch 5229: loss 0.059084
batch 5230: loss 0.025225
batch 5231: loss 0.079711
batch 5232: 

batch 5521: loss 0.058042
batch 5522: loss 0.137813
batch 5523: loss 0.040254
batch 5524: loss 0.009870
batch 5525: loss 0.043979
batch 5526: loss 0.034344
batch 5527: loss 0.007785
batch 5528: loss 0.015855
batch 5529: loss 0.028852
batch 5530: loss 0.013879
batch 5531: loss 0.057791
batch 5532: loss 0.039287
batch 5533: loss 0.037894
batch 5534: loss 0.052096
batch 5535: loss 0.075576
batch 5536: loss 0.038825
batch 5537: loss 0.070583
batch 5538: loss 0.016787
batch 5539: loss 0.078063
batch 5540: loss 0.033981
batch 5541: loss 0.019553
batch 5542: loss 0.031474
batch 5543: loss 0.029300
batch 5544: loss 0.021164
batch 5545: loss 0.011492
batch 5546: loss 0.026142
batch 5547: loss 0.083465
batch 5548: loss 0.044796
batch 5549: loss 0.027070
batch 5550: loss 0.096888
batch 5551: loss 0.029336
batch 5552: loss 0.029817
batch 5553: loss 0.082841
batch 5554: loss 0.013491
batch 5555: loss 0.076437
batch 5556: loss 0.019327
batch 5557: loss 0.176731
batch 5558: loss 0.031427
batch 5559: 

batch 5848: loss 0.020795
batch 5849: loss 0.034001
batch 5850: loss 0.058644
batch 5851: loss 0.010545
batch 5852: loss 0.104767
batch 5853: loss 0.009613
batch 5854: loss 0.024480
batch 5855: loss 0.081853
batch 5856: loss 0.053790
batch 5857: loss 0.040652
batch 5858: loss 0.082049
batch 5859: loss 0.027128
batch 5860: loss 0.129504
batch 5861: loss 0.007892
batch 5862: loss 0.042430
batch 5863: loss 0.102017
batch 5864: loss 0.073715
batch 5865: loss 0.058165
batch 5866: loss 0.088181
batch 5867: loss 0.049257
batch 5868: loss 0.026796
batch 5869: loss 0.025832
batch 5870: loss 0.026421
batch 5871: loss 0.071924
batch 5872: loss 0.011536
batch 5873: loss 0.006929
batch 5874: loss 0.052736
batch 5875: loss 0.062920
batch 5876: loss 0.026880
batch 5877: loss 0.108939
batch 5878: loss 0.020108
batch 5879: loss 0.194893
batch 5880: loss 0.165083
batch 5881: loss 0.031524
batch 5882: loss 0.068711
batch 5883: loss 0.128021
batch 5884: loss 0.025645
batch 5885: loss 0.025448
batch 5886: 

In [20]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

test accuracy: 0.974600
