In [1]:
import tensorflow as tf
import matplotlib.pylab as plt
import numpy as np
tf.__version__

'2.0.0-rc1'

### 加载数据

In [2]:
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

In [3]:
path = "../../data/fashion_mnist"
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
x_train,y_train = load_mnist(path)
x_test,y_test = load_mnist(path,"t10k")
x_train = x_train/255.0
x_test = x_test/255.0

In [4]:
batch_size = 256

### 定义模型参数

In [18]:
num_inputs,num_outputs,num_hiddens = 784,10,256
w1 = tf.random.normal([num_inputs,num_hiddens])
b1 = tf.ones(shape=[num_hiddens])
w2 = tf.random.normal(shape=[num_hiddens,num_outputs])
b2 = tf.ones(shape=[num_outputs])

### 定义激活函数

In [6]:
def relu(x):
    return tf.math.maximum(x,0)

### 定义模型

In [7]:
def model(x,w1,b1,w2,b2):
    out1 = relu( tf.matmul(x,w1) + b1 )
    out2 = tf.math.softmax( tf.matmul(out1,w2) + b2 )
    return out2

In [11]:
x = tf.constant(x_train[0],dtype=tf.float32)
x = tf.reshape(x,shape=[-1,num_inputs])
model(x,w1,b1,w2,b2)

<tf.Tensor: id=32, shape=(1, 10), dtype=float32, numpy=
array([[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.7809088e-26,
        0.0000000e+00, 1.0000000e+00]], dtype=float32)>

In [12]:
import random
def data_inter(batch_size,features,lanbles):
    num = len(features)
    indices = list(range(num))
    random.shuffle(indices)
    
    for i in range(0,num,batch_size):
        j = min(i+batch_size,num)
        yield features[i:j],lanbles[i:j]

In [13]:
def loss(y_hat,y_true):
    return tf.losses.sparse_categorical_crossentropy(y_true,y_hat)

In [14]:
import numpy as np
def acc(y_hat,y):
    return np.mean((tf.argmax(y_hat,axis=1) == y))

### 训练模型

In [26]:
num_epchos = 30
lr = 0.001
for i in range(num_epchos):
    for x,y in data_inter(batch_size,x_train,y_train):
        x = tf.constant(x,dtype=tf.float32) 
        with tf.GradientTape() as tape:
            tape.watch(w1)
            tape.watch(b1)
            tape.watch(w2)
            tape.watch(b2)
            y_hat = model(x,w1,b1,w2,b2)
            l = loss(y_hat,y)
        dw1,db1,dw2,db2 = tape.gradient(l,[w1,b1,w2,b2])
        w1 -= lr * dw1/batch_size
        w2 -= lr * dw2/batch_size
        b1 -= lr * db1/batch_size
        b2 -= lr * db2/batch_size
    epcho_loss = loss(model(x,w1,b1,w2,b2),y)
    print("epcho:",i+1,"loss:",tf.reduce_mean(epcho_loss).numpy())

epcho: 1 loss: 3.272382
epcho: 2 loss: 3.2749054
epcho: 3 loss: 3.2748795
epcho: 4 loss: 3.2749062
epcho: 5 loss: 3.2750485
epcho: 6 loss: 3.2753103
epcho: 7 loss: 3.275365
epcho: 8 loss: 3.2758248
epcho: 9 loss: 3.2762804
epcho: 10 loss: 3.2768219
epcho: 11 loss: 3.277627
epcho: 12 loss: 3.2781026
epcho: 13 loss: 3.2785378
epcho: 14 loss: 3.2791078
epcho: 15 loss: 3.2794883
epcho: 16 loss: 3.2798445
epcho: 17 loss: 3.2800605
epcho: 18 loss: 3.280244
epcho: 19 loss: 3.28042
epcho: 20 loss: 3.2806156
epcho: 21 loss: 3.2808533
epcho: 22 loss: 3.2814357
epcho: 23 loss: 3.2817829
epcho: 24 loss: 3.2820766
epcho: 25 loss: 3.2823322
epcho: 26 loss: 3.2846267
epcho: 27 loss: 3.2849486
epcho: 28 loss: 3.285148
epcho: 29 loss: 3.2861626
epcho: 30 loss: 3.2862995


In [27]:
acc_=[]
for x,y in data_inter(batch_size,x_test,y_test):
    x = tf.constant(x,dtype=tf.float32) 
    y_hat = model(x,w1,b1,w2,b2)
    acc_.append(acc(y_hat,y))

np.mean(acc_)

0.7482421875