In [1]:
import tensorflow as tf
from tensorflow.keras import layers,optimizers,datasets,Sequential
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

In [4]:
tf.random.set_seed(2345)
conv_layers=[
    # 5 units of conv+maxpooling
    # unit1
    layers.Conv2D(64,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.Conv2D(64,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same'),
    
    # unit2
    layers.Conv2D(128,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.Conv2D(128,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same'),
    
    # unit3
    layers.Conv2D(256,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.Conv2D(256,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same'),
    
    # unit4
    layers.Conv2D(512,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.Conv2D(512,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same'),
    
    # unit5
    layers.Conv2D(512,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.Conv2D(512,kernel_size=[3,3],padding='same',activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2,2],strides=2,padding='same'),
    
]

In [8]:
# datasets
def preprocess(x,y):
    x=tf.cast(x,dtype=tf.float32)/255.
    y=tf.cast(y,dtype=tf.int32)
    return x,y

(x,y),(x_test,y_test)=datasets.cifar100.load_data()
y=tf.squeeze(y,axis=1)
y_test=tf.squeeze(y_test,axis=1)
print(x.shape,y.shape,x_test.shape,y_test.shape)

(50000, 32, 32, 3) (50000,) (10000, 32, 32, 3) (10000,)


In [9]:
train_db=tf.data.Dataset.from_tensor_slices((x,y))
train_db=train_db.shuffle(1000).map(preprocess).batch(64)

test_db=tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db=test_db.map(preprocess).batch(64)

sample=next(iter(train_db))
print('sample:',sample[0].shape,sample[1].shape,tf.reduce_min(sample[0]),tf.reduce_max(sample[0]))

sample: (64, 32, 32, 3) (64,) tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32)


In [15]:
def main():
    
    # converlution layers, [b,32,32,3]->[b,1,1,512]
    conv_net=Sequential(conv_layers)
    
    # test output shape
    #conv_net.build(input_shape=[None,32,32,3])
    #x=tf.random.normal([4,32,32,3])
   #out=conv_net(x)
    #print(out.shape)
    
    # fully connected
    fc_net=Sequential([
        layers.Dense(256,activation=tf.nn.relu),
        layers.Dense(128,activation=tf.nn.relu),
        layers.Dense(100,activation=None),
    ])
    
    conv_net.build(input_shape=[None,32,32,3])
    fc_net.build(input_shape=[None,512])
    
    optimizer=optimizers.Adam(lr=1e-4)
    
    variables=conv_net.trainable_variables + fc_net.trainable_variables
    # training 
    for epoch in range(50):
        for step, (x,y) in enumerate(train_db):
            with tf.GradientTape() as tape:
                #[b,32,32,3]->[b,1,1,512]
                out=conv_net(x)
                # flattern
                out=tf.reshape(out,[-1,512])
                # [b,512]->[b,100]
                logits=fc_net(out)
                #[b]->[b,100]
                y_onehot=tf.one_hot(y,depth=100)
                # comppute loss
                loss=tf.losses.categorical_crossentropy(y_onehot,logits,from_logits=True)
                loss=tf.reduce_mean(loss)
            
            grads=tape.gradient(loss,variables)
            optimizer.apply_gradients(zip(grads,variables))
            
            if step%100 == 0:
                print(epoch,step,'loss:',float(loss))
                
        total_num=0
        total_correct=0
        # test, whether really improved
        for x,y in test_db:
            out=conv_net(x)
            out=tf.reshape(out,[-1,512])
            logits=fc_net(out)
            prob=tf.nn.softmax(logits,axis=1)
            pred=tf.argmax(prob,axis=1)
            pred=tf.cast(pred,dtype=tf.int32)
            correct=tf.equal(pred,y)
            correct=tf.cast(correct,dtype=tf.int32)
            correct=tf.reduce_sum(correct)
            
            total_num=total_num+x.shape[0]
            total_correct=total_correct+correct
            
        acc=total_correct/total_num    
        print('epoch:',epoch,'acc:',acc)

main()

0 0 loss: 4.664167881011963
0 100 loss: 4.338935852050781
0 200 loss: 4.494081497192383
0 300 loss: 4.188960552215576
0 400 loss: 4.1856207847595215
0 500 loss: 3.9554741382598877
0 600 loss: 4.160648345947266
0 700 loss: 3.8547306060791016
epoch: 0 acc: tf.Tensor(0.0832, shape=(), dtype=float64)
1 0 loss: 3.9903905391693115
1 100 loss: 3.9369590282440186
1 200 loss: 3.736947536468506
1 300 loss: 3.936816692352295
1 400 loss: 3.6469597816467285
1 500 loss: 3.6335337162017822
1 600 loss: 3.7240471839904785
1 700 loss: 3.7123191356658936
epoch: 1 acc: tf.Tensor(0.1415, shape=(), dtype=float64)
2 0 loss: 3.7488901615142822
2 100 loss: 3.8149466514587402
2 200 loss: 4.138293743133545
2 300 loss: 3.3684353828430176
2 400 loss: 3.3791017532348633
2 500 loss: 3.2359561920166016
2 600 loss: 3.4087491035461426
2 700 loss: 3.0918660163879395
epoch: 2 acc: tf.Tensor(0.1734, shape=(), dtype=float64)
3 0 loss: 3.194413185119629
3 100 loss: 3.1994082927703857


KeyboardInterrupt: 