# 平均損失函數

In [2]:
import numpy as np
import tensorflow as tf
tf.compat.v1.reset_default_graph()
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()

        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):

        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

num_epochs = 5
batch_size = 5
learning_rate = 0.001

model = MLP()
data_loader = MNISTLoader()

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)

X, y = data_loader.get_batch(batch_size)
with tf.GradientTape() as tape:
    y_pred = model(X)
    loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
    loss = tf.reduce_mean(loss)
    print(loss)

tf.Tensor(2.621033, shape=(), dtype=float32)


# 優化神經網路

In [6]:
import numpy as np
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()

        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):

        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]
    
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

num_epochs = 5
batch_size = 50
learning_rate = 0.001

model = MLP()
data_loader = MNISTLoader()
#optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
#optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))


batch 0: loss 2.270030
batch 1: loss 2.224965
batch 2: loss 2.026703
batch 3: loss 1.971361
batch 4: loss 1.675312
batch 5: loss 1.727646
batch 6: loss 1.700375
batch 7: loss 1.589053
batch 8: loss 1.656189
batch 9: loss 1.352090
batch 10: loss 1.368398
batch 11: loss 1.185152
batch 12: loss 1.270373
batch 13: loss 1.259618
batch 14: loss 1.310029
batch 15: loss 1.102000
batch 16: loss 1.156472
batch 17: loss 0.945554
batch 18: loss 0.876053
batch 19: loss 1.122862
batch 20: loss 0.895682
batch 21: loss 1.022535
batch 22: loss 0.975804
batch 23: loss 0.775467
batch 24: loss 0.906837
batch 25: loss 1.011373
batch 26: loss 0.877039
batch 27: loss 0.672401
batch 28: loss 0.974840
batch 29: loss 0.705565
batch 30: loss 0.778809
batch 31: loss 0.796692
batch 32: loss 0.686874
batch 33: loss 0.799991
batch 34: loss 0.787389
batch 35: loss 0.689136
batch 36: loss 0.603588
batch 37: loss 0.938194
batch 38: loss 0.661362
batch 39: loss 0.699422
batch 40: loss 0.718282
batch 41: loss 0.464456
ba

batch 338: loss 0.293027
batch 339: loss 0.337338
batch 340: loss 0.320226
batch 341: loss 0.255111
batch 342: loss 0.306542
batch 343: loss 0.136120
batch 344: loss 0.164811
batch 345: loss 0.241897
batch 346: loss 0.283643
batch 347: loss 0.300994
batch 348: loss 0.444417
batch 349: loss 0.405856
batch 350: loss 0.171216
batch 351: loss 0.324004
batch 352: loss 0.237332
batch 353: loss 0.163725
batch 354: loss 0.218331
batch 355: loss 0.229987
batch 356: loss 0.265262
batch 357: loss 0.248960
batch 358: loss 0.388207
batch 359: loss 0.243178
batch 360: loss 0.338059
batch 361: loss 0.318350
batch 362: loss 0.302398
batch 363: loss 0.294826
batch 364: loss 0.535214
batch 365: loss 0.115851
batch 366: loss 0.231156
batch 367: loss 0.513939
batch 368: loss 0.281440
batch 369: loss 0.186867
batch 370: loss 0.212767
batch 371: loss 0.165725
batch 372: loss 0.407267
batch 373: loss 0.378571
batch 374: loss 0.315684
batch 375: loss 0.197395
batch 376: loss 0.406554
batch 377: loss 0.371423


batch 667: loss 0.095009
batch 668: loss 0.239383
batch 669: loss 0.110938
batch 670: loss 0.163227
batch 671: loss 0.288147
batch 672: loss 0.355464
batch 673: loss 0.725920
batch 674: loss 0.519828
batch 675: loss 0.142860
batch 676: loss 0.213494
batch 677: loss 0.113062
batch 678: loss 0.284629
batch 679: loss 0.190958
batch 680: loss 0.170837
batch 681: loss 0.289865
batch 682: loss 0.067399
batch 683: loss 0.045795
batch 684: loss 0.167481
batch 685: loss 0.418679
batch 686: loss 0.145204
batch 687: loss 0.155196
batch 688: loss 0.093097
batch 689: loss 0.321463
batch 690: loss 0.213799
batch 691: loss 0.323712
batch 692: loss 0.281795
batch 693: loss 0.295149
batch 694: loss 0.147488
batch 695: loss 0.165607
batch 696: loss 0.211938
batch 697: loss 0.100596
batch 698: loss 0.200391
batch 699: loss 0.139096
batch 700: loss 0.161488
batch 701: loss 0.109602
batch 702: loss 0.388603
batch 703: loss 0.455597
batch 704: loss 0.273524
batch 705: loss 0.329263
batch 706: loss 0.074830


batch 1013: loss 0.227252
batch 1014: loss 0.212458
batch 1015: loss 0.175069
batch 1016: loss 0.303141
batch 1017: loss 0.101159
batch 1018: loss 0.027033
batch 1019: loss 0.127897
batch 1020: loss 0.162528
batch 1021: loss 0.227250
batch 1022: loss 0.299529
batch 1023: loss 0.080822
batch 1024: loss 0.078067
batch 1025: loss 0.141415
batch 1026: loss 0.072861
batch 1027: loss 0.042950
batch 1028: loss 0.097188
batch 1029: loss 0.134337
batch 1030: loss 0.148465
batch 1031: loss 0.217348
batch 1032: loss 0.071166
batch 1033: loss 0.242039
batch 1034: loss 0.166021
batch 1035: loss 0.305340
batch 1036: loss 0.313694
batch 1037: loss 0.182614
batch 1038: loss 0.141579
batch 1039: loss 0.416232
batch 1040: loss 0.242914
batch 1041: loss 0.279742
batch 1042: loss 0.131496
batch 1043: loss 0.066605
batch 1044: loss 0.117856
batch 1045: loss 0.434851
batch 1046: loss 0.137880
batch 1047: loss 0.142020
batch 1048: loss 0.071676
batch 1049: loss 0.181042
batch 1050: loss 0.133184
batch 1051: 

batch 1332: loss 0.108420
batch 1333: loss 0.161353
batch 1334: loss 0.193646
batch 1335: loss 0.098706
batch 1336: loss 0.098412
batch 1337: loss 0.184345
batch 1338: loss 0.292084
batch 1339: loss 0.142562
batch 1340: loss 0.178267
batch 1341: loss 0.146524
batch 1342: loss 0.103421
batch 1343: loss 0.233666
batch 1344: loss 0.143089
batch 1345: loss 0.307664
batch 1346: loss 0.289419
batch 1347: loss 0.195126
batch 1348: loss 0.101401
batch 1349: loss 0.249201
batch 1350: loss 0.125330
batch 1351: loss 0.373374
batch 1352: loss 0.287937
batch 1353: loss 0.469142
batch 1354: loss 0.063360
batch 1355: loss 0.094057
batch 1356: loss 0.109865
batch 1357: loss 0.078330
batch 1358: loss 0.174047
batch 1359: loss 0.178290
batch 1360: loss 0.125335
batch 1361: loss 0.092282
batch 1362: loss 0.182436
batch 1363: loss 0.205936
batch 1364: loss 0.120597
batch 1365: loss 0.104306
batch 1366: loss 0.175929
batch 1367: loss 0.275998
batch 1368: loss 0.126327
batch 1369: loss 0.116055
batch 1370: 

batch 1653: loss 0.137665
batch 1654: loss 0.122972
batch 1655: loss 0.218709
batch 1656: loss 0.231130
batch 1657: loss 0.141205
batch 1658: loss 0.021103
batch 1659: loss 0.165515
batch 1660: loss 0.076575
batch 1661: loss 0.119971
batch 1662: loss 0.062528
batch 1663: loss 0.046269
batch 1664: loss 0.312767
batch 1665: loss 0.093986
batch 1666: loss 0.076674
batch 1667: loss 0.100662
batch 1668: loss 0.133134
batch 1669: loss 0.060880
batch 1670: loss 0.044527
batch 1671: loss 0.392951
batch 1672: loss 0.047841
batch 1673: loss 0.147441
batch 1674: loss 0.150332
batch 1675: loss 0.081217
batch 1676: loss 0.086010
batch 1677: loss 0.156796
batch 1678: loss 0.255053
batch 1679: loss 0.030327
batch 1680: loss 0.120486
batch 1681: loss 0.071654
batch 1682: loss 0.455598
batch 1683: loss 0.137173
batch 1684: loss 0.147296
batch 1685: loss 0.096371
batch 1686: loss 0.115493
batch 1687: loss 0.080156
batch 1688: loss 0.152720
batch 1689: loss 0.115194
batch 1690: loss 0.040630
batch 1691: 

batch 1999: loss 0.081239
batch 2000: loss 0.182242
batch 2001: loss 0.152795
batch 2002: loss 0.044224
batch 2003: loss 0.063918
batch 2004: loss 0.092937
batch 2005: loss 0.125149
batch 2006: loss 0.146928
batch 2007: loss 0.195771
batch 2008: loss 0.120309
batch 2009: loss 0.074203
batch 2010: loss 0.027275
batch 2011: loss 0.081362
batch 2012: loss 0.098603
batch 2013: loss 0.146827
batch 2014: loss 0.273916
batch 2015: loss 0.033157
batch 2016: loss 0.069536
batch 2017: loss 0.004274
batch 2018: loss 0.042132
batch 2019: loss 0.102464
batch 2020: loss 0.062296
batch 2021: loss 0.025280
batch 2022: loss 0.152736
batch 2023: loss 0.089130
batch 2024: loss 0.202581
batch 2025: loss 0.280044
batch 2026: loss 0.032381
batch 2027: loss 0.219582
batch 2028: loss 0.196285
batch 2029: loss 0.058156
batch 2030: loss 0.063743
batch 2031: loss 0.047296
batch 2032: loss 0.040720
batch 2033: loss 0.021988
batch 2034: loss 0.059420
batch 2035: loss 0.293812
batch 2036: loss 0.442371
batch 2037: 

batch 2342: loss 0.049703
batch 2343: loss 0.093230
batch 2344: loss 0.100412
batch 2345: loss 0.042733
batch 2346: loss 0.079692
batch 2347: loss 0.211299
batch 2348: loss 0.229251
batch 2349: loss 0.280431
batch 2350: loss 0.114532
batch 2351: loss 0.163572
batch 2352: loss 0.095436
batch 2353: loss 0.159246
batch 2354: loss 0.130655
batch 2355: loss 0.109675
batch 2356: loss 0.018581
batch 2357: loss 0.165064
batch 2358: loss 0.018480
batch 2359: loss 0.102053
batch 2360: loss 0.113865
batch 2361: loss 0.084242
batch 2362: loss 0.060566
batch 2363: loss 0.042116
batch 2364: loss 0.112032
batch 2365: loss 0.186591
batch 2366: loss 0.053553
batch 2367: loss 0.139149
batch 2368: loss 0.023960
batch 2369: loss 0.250174
batch 2370: loss 0.031659
batch 2371: loss 0.319478
batch 2372: loss 0.030369
batch 2373: loss 0.025964
batch 2374: loss 0.087462
batch 2375: loss 0.078477
batch 2376: loss 0.208879
batch 2377: loss 0.212456
batch 2378: loss 0.059304
batch 2379: loss 0.098019
batch 2380: 

batch 2687: loss 0.028768
batch 2688: loss 0.109817
batch 2689: loss 0.080873
batch 2690: loss 0.021700
batch 2691: loss 0.072432
batch 2692: loss 0.064206
batch 2693: loss 0.025222
batch 2694: loss 0.086307
batch 2695: loss 0.076004
batch 2696: loss 0.012480
batch 2697: loss 0.052197
batch 2698: loss 0.049579
batch 2699: loss 0.242408
batch 2700: loss 0.047452
batch 2701: loss 0.027878
batch 2702: loss 0.116016
batch 2703: loss 0.132081
batch 2704: loss 0.139559
batch 2705: loss 0.021431
batch 2706: loss 0.088694
batch 2707: loss 0.041103
batch 2708: loss 0.056534
batch 2709: loss 0.116160
batch 2710: loss 0.073084
batch 2711: loss 0.085825
batch 2712: loss 0.216167
batch 2713: loss 0.072352
batch 2714: loss 0.031419
batch 2715: loss 0.066066
batch 2716: loss 0.019459
batch 2717: loss 0.184787
batch 2718: loss 0.076269
batch 2719: loss 0.051924
batch 2720: loss 0.125162
batch 2721: loss 0.030660
batch 2722: loss 0.048597
batch 2723: loss 0.049259
batch 2724: loss 0.025952
batch 2725: 

batch 3006: loss 0.096236
batch 3007: loss 0.129393
batch 3008: loss 0.136235
batch 3009: loss 0.023810
batch 3010: loss 0.073841
batch 3011: loss 0.075305
batch 3012: loss 0.149539
batch 3013: loss 0.239087
batch 3014: loss 0.033588
batch 3015: loss 0.049844
batch 3016: loss 0.170665
batch 3017: loss 0.167267
batch 3018: loss 0.084633
batch 3019: loss 0.128367
batch 3020: loss 0.308781
batch 3021: loss 0.112713
batch 3022: loss 0.121767
batch 3023: loss 0.239170
batch 3024: loss 0.055598
batch 3025: loss 0.095566
batch 3026: loss 0.056912
batch 3027: loss 0.047629
batch 3028: loss 0.240995
batch 3029: loss 0.096922
batch 3030: loss 0.118793
batch 3031: loss 0.167329
batch 3032: loss 0.023292
batch 3033: loss 0.206107
batch 3034: loss 0.115127
batch 3035: loss 0.130066
batch 3036: loss 0.023799
batch 3037: loss 0.142039
batch 3038: loss 0.073479
batch 3039: loss 0.029860
batch 3040: loss 0.048749
batch 3041: loss 0.249323
batch 3042: loss 0.032497
batch 3043: loss 0.150842
batch 3044: 

batch 3345: loss 0.186221
batch 3346: loss 0.041424
batch 3347: loss 0.011100
batch 3348: loss 0.094803
batch 3349: loss 0.087771
batch 3350: loss 0.084402
batch 3351: loss 0.088635
batch 3352: loss 0.046262
batch 3353: loss 0.057244
batch 3354: loss 0.145244
batch 3355: loss 0.028458
batch 3356: loss 0.079486
batch 3357: loss 0.039799
batch 3358: loss 0.010903
batch 3359: loss 0.037818
batch 3360: loss 0.024161
batch 3361: loss 0.020602
batch 3362: loss 0.014053
batch 3363: loss 0.018965
batch 3364: loss 0.009781
batch 3365: loss 0.025024
batch 3366: loss 0.169363
batch 3367: loss 0.084732
batch 3368: loss 0.135589
batch 3369: loss 0.086258
batch 3370: loss 0.034436
batch 3371: loss 0.078822
batch 3372: loss 0.017730
batch 3373: loss 0.192898
batch 3374: loss 0.024876
batch 3375: loss 0.005026
batch 3376: loss 0.014920
batch 3377: loss 0.122432
batch 3378: loss 0.148401
batch 3379: loss 0.079168
batch 3380: loss 0.110325
batch 3381: loss 0.023531
batch 3382: loss 0.055226
batch 3383: 

batch 3678: loss 0.041510
batch 3679: loss 0.038816
batch 3680: loss 0.035756
batch 3681: loss 0.092629
batch 3682: loss 0.030600
batch 3683: loss 0.066297
batch 3684: loss 0.396790
batch 3685: loss 0.045994
batch 3686: loss 0.051021
batch 3687: loss 0.101682
batch 3688: loss 0.022218
batch 3689: loss 0.012454
batch 3690: loss 0.087925
batch 3691: loss 0.080853
batch 3692: loss 0.051387
batch 3693: loss 0.137348
batch 3694: loss 0.022106
batch 3695: loss 0.044595
batch 3696: loss 0.078319
batch 3697: loss 0.043317
batch 3698: loss 0.039110
batch 3699: loss 0.338066
batch 3700: loss 0.244529
batch 3701: loss 0.105149
batch 3702: loss 0.038963
batch 3703: loss 0.029376
batch 3704: loss 0.080898
batch 3705: loss 0.186988
batch 3706: loss 0.012138
batch 3707: loss 0.031487
batch 3708: loss 0.027042
batch 3709: loss 0.096129
batch 3710: loss 0.062994
batch 3711: loss 0.018211
batch 3712: loss 0.019060
batch 3713: loss 0.012787
batch 3714: loss 0.181703
batch 3715: loss 0.012506
batch 3716: 

batch 4023: loss 0.014879
batch 4024: loss 0.025031
batch 4025: loss 0.050275
batch 4026: loss 0.058741
batch 4027: loss 0.194563
batch 4028: loss 0.013297
batch 4029: loss 0.010212
batch 4030: loss 0.021281
batch 4031: loss 0.359482
batch 4032: loss 0.145610
batch 4033: loss 0.039723
batch 4034: loss 0.018279
batch 4035: loss 0.155978
batch 4036: loss 0.083382
batch 4037: loss 0.084898
batch 4038: loss 0.213180
batch 4039: loss 0.093754
batch 4040: loss 0.010950
batch 4041: loss 0.034485
batch 4042: loss 0.024718
batch 4043: loss 0.201422
batch 4044: loss 0.068749
batch 4045: loss 0.016897
batch 4046: loss 0.052528
batch 4047: loss 0.030146
batch 4048: loss 0.050564
batch 4049: loss 0.096731
batch 4050: loss 0.045758
batch 4051: loss 0.058647
batch 4052: loss 0.168968
batch 4053: loss 0.235885
batch 4054: loss 0.138793
batch 4055: loss 0.012547
batch 4056: loss 0.021557
batch 4057: loss 0.196891
batch 4058: loss 0.126009
batch 4059: loss 0.026120
batch 4060: loss 0.087818
batch 4061: 

batch 4343: loss 0.061123
batch 4344: loss 0.127976
batch 4345: loss 0.050857
batch 4346: loss 0.084945
batch 4347: loss 0.060179
batch 4348: loss 0.060749
batch 4349: loss 0.076776
batch 4350: loss 0.116200
batch 4351: loss 0.020874
batch 4352: loss 0.063085
batch 4353: loss 0.015031
batch 4354: loss 0.074213
batch 4355: loss 0.037756
batch 4356: loss 0.012176
batch 4357: loss 0.301693
batch 4358: loss 0.003130
batch 4359: loss 0.027510
batch 4360: loss 0.025946
batch 4361: loss 0.023059
batch 4362: loss 0.017593
batch 4363: loss 0.159298
batch 4364: loss 0.137292
batch 4365: loss 0.097159
batch 4366: loss 0.011669
batch 4367: loss 0.026802
batch 4368: loss 0.091387
batch 4369: loss 0.045886
batch 4370: loss 0.356094
batch 4371: loss 0.074508
batch 4372: loss 0.089135
batch 4373: loss 0.089733
batch 4374: loss 0.048039
batch 4375: loss 0.121099
batch 4376: loss 0.101482
batch 4377: loss 0.025267
batch 4378: loss 0.023516
batch 4379: loss 0.022883
batch 4380: loss 0.046158
batch 4381: 

batch 4678: loss 0.064380
batch 4679: loss 0.065826
batch 4680: loss 0.179925
batch 4681: loss 0.228675
batch 4682: loss 0.027302
batch 4683: loss 0.017821
batch 4684: loss 0.015114
batch 4685: loss 0.010101
batch 4686: loss 0.035687
batch 4687: loss 0.046773
batch 4688: loss 0.081744
batch 4689: loss 0.142664
batch 4690: loss 0.077093
batch 4691: loss 0.013771
batch 4692: loss 0.019290
batch 4693: loss 0.121882
batch 4694: loss 0.007091
batch 4695: loss 0.079090
batch 4696: loss 0.016581
batch 4697: loss 0.040817
batch 4698: loss 0.076563
batch 4699: loss 0.036403
batch 4700: loss 0.017149
batch 4701: loss 0.095641
batch 4702: loss 0.033032
batch 4703: loss 0.022807
batch 4704: loss 0.123039
batch 4705: loss 0.107880
batch 4706: loss 0.023904
batch 4707: loss 0.052050
batch 4708: loss 0.028069
batch 4709: loss 0.055239
batch 4710: loss 0.145685
batch 4711: loss 0.084552
batch 4712: loss 0.076980
batch 4713: loss 0.153365
batch 4714: loss 0.075195
batch 4715: loss 0.007195
batch 4716: 

batch 5013: loss 0.078273
batch 5014: loss 0.118155
batch 5015: loss 0.069166
batch 5016: loss 0.154311
batch 5017: loss 0.171946
batch 5018: loss 0.015201
batch 5019: loss 0.013858
batch 5020: loss 0.008954
batch 5021: loss 0.039841
batch 5022: loss 0.005172
batch 5023: loss 0.010417
batch 5024: loss 0.119260
batch 5025: loss 0.044323
batch 5026: loss 0.082670
batch 5027: loss 0.105096
batch 5028: loss 0.036712
batch 5029: loss 0.152027
batch 5030: loss 0.008172
batch 5031: loss 0.099625
batch 5032: loss 0.203980
batch 5033: loss 0.404458
batch 5034: loss 0.271370
batch 5035: loss 0.166817
batch 5036: loss 0.257174
batch 5037: loss 0.106532
batch 5038: loss 0.013944
batch 5039: loss 0.010701
batch 5040: loss 0.041897
batch 5041: loss 0.178280
batch 5042: loss 0.100898
batch 5043: loss 0.060769
batch 5044: loss 0.199808
batch 5045: loss 0.091028
batch 5046: loss 0.013726
batch 5047: loss 0.015888
batch 5048: loss 0.083116
batch 5049: loss 0.039095
batch 5050: loss 0.032741
batch 5051: 

batch 5330: loss 0.038773
batch 5331: loss 0.016521
batch 5332: loss 0.016125
batch 5333: loss 0.154762
batch 5334: loss 0.029513
batch 5335: loss 0.009149
batch 5336: loss 0.029511
batch 5337: loss 0.086189
batch 5338: loss 0.011376
batch 5339: loss 0.016898
batch 5340: loss 0.110335
batch 5341: loss 0.038202
batch 5342: loss 0.028929
batch 5343: loss 0.132477
batch 5344: loss 0.088856
batch 5345: loss 0.032835
batch 5346: loss 0.033788
batch 5347: loss 0.021124
batch 5348: loss 0.125027
batch 5349: loss 0.090744
batch 5350: loss 0.029536
batch 5351: loss 0.016665
batch 5352: loss 0.027385
batch 5353: loss 0.161963
batch 5354: loss 0.085698
batch 5355: loss 0.148134
batch 5356: loss 0.037220
batch 5357: loss 0.029374
batch 5358: loss 0.118824
batch 5359: loss 0.093451
batch 5360: loss 0.079056
batch 5361: loss 0.011083
batch 5362: loss 0.034041
batch 5363: loss 0.013273
batch 5364: loss 0.023614
batch 5365: loss 0.034743
batch 5366: loss 0.039496
batch 5367: loss 0.156580
batch 5368: 

batch 5672: loss 0.109053
batch 5673: loss 0.039779
batch 5674: loss 0.012006
batch 5675: loss 0.192895
batch 5676: loss 0.034544
batch 5677: loss 0.029613
batch 5678: loss 0.048136
batch 5679: loss 0.003675
batch 5680: loss 0.041943
batch 5681: loss 0.033423
batch 5682: loss 0.004458
batch 5683: loss 0.008699
batch 5684: loss 0.002546
batch 5685: loss 0.041430
batch 5686: loss 0.051093
batch 5687: loss 0.053575
batch 5688: loss 0.034419
batch 5689: loss 0.024202
batch 5690: loss 0.038040
batch 5691: loss 0.004939
batch 5692: loss 0.019188
batch 5693: loss 0.080989
batch 5694: loss 0.023442
batch 5695: loss 0.032882
batch 5696: loss 0.088626
batch 5697: loss 0.015065
batch 5698: loss 0.037924
batch 5699: loss 0.078166
batch 5700: loss 0.053143
batch 5701: loss 0.090256
batch 5702: loss 0.139775
batch 5703: loss 0.041214
batch 5704: loss 0.048543
batch 5705: loss 0.126939
batch 5706: loss 0.038698
batch 5707: loss 0.064499
batch 5708: loss 0.021839
batch 5709: loss 0.006054
batch 5710: 

batch 5988: loss 0.034664
batch 5989: loss 0.008595
batch 5990: loss 0.040199
batch 5991: loss 0.042098
batch 5992: loss 0.284126
batch 5993: loss 0.040309
batch 5994: loss 0.153446
batch 5995: loss 0.062820
batch 5996: loss 0.107820
batch 5997: loss 0.018578
batch 5998: loss 0.055278
batch 5999: loss 0.042166


# 優化神經網路(keras版本)

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.compat.v1.reset_default_graph()

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape([x_train.shape[0], -1])
x_test = x_test.reshape([x_test.shape[0], -1])
print(x_train.shape, ' ', y_train.shape)
print(x_test.shape, ' ', y_test.shape)

(60000, 784)   (60000,)
(10000, 784)   (10000,)


In [3]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(784,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])


#keras.optimizers.Adagrad(learning_rate=0.01)
#keras.optimizers.Adam(learning_rate=0.01)
#keras.optimizers.RMSprop(learning_rate=0.01)

# provide labels as one_hot representation => tf.keras.losses.CategoricalCrossentropy
# provide labels as integers => tf.keras.losses.SparseCategoricalCrossentropy 
model.compile(optimizer=keras.optimizers.Adam(),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                50240     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                650       
Total params: 59,210
Trainable params: 59,210
Non-trainable params: 0
_________________________________________________________________


In [4]:
#history = model.fit(x_train, y_train, batch_size=256, epochs=100, validation_split=0.3, verbose=2)
history = model.fit(x_train, y_train, batch_size=256, epochs=100, validation_data=(x_test, y_test), verbose=2)


Epoch 1/100
235/235 - 1s - loss: 2.7168 - accuracy: 0.7500 - val_loss: 0.6696 - val_accuracy: 0.8551
Epoch 2/100
235/235 - 1s - loss: 0.4772 - accuracy: 0.8876 - val_loss: 0.3863 - val_accuracy: 0.9069
Epoch 3/100
235/235 - 0s - loss: 0.3047 - accuracy: 0.9182 - val_loss: 0.3004 - val_accuracy: 0.9230
Epoch 4/100
235/235 - 0s - loss: 0.2286 - accuracy: 0.9366 - val_loss: 0.2639 - val_accuracy: 0.9341
Epoch 5/100
235/235 - 0s - loss: 0.1856 - accuracy: 0.9464 - val_loss: 0.2417 - val_accuracy: 0.9381
Epoch 6/100
235/235 - 1s - loss: 0.1571 - accuracy: 0.9539 - val_loss: 0.2174 - val_accuracy: 0.9422
Epoch 7/100
235/235 - 0s - loss: 0.1402 - accuracy: 0.9585 - val_loss: 0.2101 - val_accuracy: 0.9445
Epoch 8/100
235/235 - 1s - loss: 0.1220 - accuracy: 0.9634 - val_loss: 0.2079 - val_accuracy: 0.9488
Epoch 9/100
235/235 - 0s - loss: 0.1111 - accuracy: 0.9665 - val_loss: 0.2068 - val_accuracy: 0.9503
Epoch 10/100
235/235 - 1s - loss: 0.1009 - accuracy: 0.9688 - val_loss: 0.2097 - val_accura

Epoch 82/100
235/235 - 0s - loss: 0.0157 - accuracy: 0.9959 - val_loss: 0.3222 - val_accuracy: 0.9641
Epoch 83/100
235/235 - 0s - loss: 0.0134 - accuracy: 0.9959 - val_loss: 0.3162 - val_accuracy: 0.9668
Epoch 84/100
235/235 - 0s - loss: 0.0259 - accuracy: 0.9937 - val_loss: 0.2860 - val_accuracy: 0.9683
Epoch 85/100
235/235 - 0s - loss: 0.0154 - accuracy: 0.9956 - val_loss: 0.3030 - val_accuracy: 0.9663
Epoch 86/100
235/235 - 0s - loss: 0.0111 - accuracy: 0.9962 - val_loss: 0.2927 - val_accuracy: 0.9715
Epoch 87/100
235/235 - 1s - loss: 0.0150 - accuracy: 0.9962 - val_loss: 0.2868 - val_accuracy: 0.9681
Epoch 88/100
235/235 - 0s - loss: 0.0125 - accuracy: 0.9963 - val_loss: 0.3190 - val_accuracy: 0.9684
Epoch 89/100
235/235 - 0s - loss: 0.0149 - accuracy: 0.9957 - val_loss: 0.2905 - val_accuracy: 0.9679
Epoch 90/100
235/235 - 0s - loss: 0.0263 - accuracy: 0.9928 - val_loss: 0.3017 - val_accuracy: 0.9695
Epoch 91/100
235/235 - 1s - loss: 0.0175 - accuracy: 0.9951 - val_loss: 0.2958 - v