In [1]:
import tensorflow as tf
import tensorflow.keras.datasets as tfds
from utils.layer_units import *
from utils.ResAttNet import ResAttNet
import pydotplus
import numpy as np


# Plot configurations
%matplotlib inline

# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

In [2]:
(X_train,y_train),(X_test,y_test) = tfds.cifar100.load_data()

In [3]:
X_valid = X_train[49000:]
y_valid = y_train[49000:]
X_train = X_train[:49000]
y_train = y_train[:49000]

In [4]:
# center but not scale data
# we found scale not very stable
def preprocess(X):
    # pixel wise center
    ret = X.astype(np.float32) - np.mean(X,axis=0)
    return ret

In [5]:
X_train = preprocess(X_train)
X_valid = preprocess(X_valid)
X_test = preprocess(X_test)

In [6]:
my_RANet = ResAttNet(100,256)

In [7]:
ipt = tf.keras.Input(shape=(32,32,3))
model_out = my_RANet.build(ipt, pre_conv = True, pre_pooling = False, 
             attention_num = 2,network_param = [1,1,1],
             resid_params = [[64,3,1,0] for _ in range(3)],
             skip_param = [True, 1])
model = tf.keras.Model(ipt,model_out)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 32, 64)   1792        input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 32, 32, 64)   256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 32, 32, 64)   0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [8]:
opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9,nesterov=True)

model.compile(optimizer = 'Adam',
    loss = 'sparse_categorical_crossentropy',
    metrics = ['sparse_categorical_accuracy'],
)

In [9]:
# ImageDataGenerator code referred from hw2 Task4
# and referred from https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
datagen = tf.keras.preprocessing.image.ImageDataGenerator(shear_range=0.1,zoom_range=0.2,horizontal_flip=True,rotation_range=30,width_shift_range=0.1,
                             height_shift_range=0.1)
datagen.fit(X_train)

In [None]:
#history = model.fit(X_train,y_train,epochs=10,batch_size=128,validation_split=0.2,callbacks=[tensorboard_callback])
bs = 128
history = model.fit(datagen.flow(X_train,y_train,batch_size=bs),epochs=30,steps_per_epoch=len(X_train)//bs,
                    validation_data = (X_valid,y_valid))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30

In [116]:
model.save('cifar_100_two_attention_64_3_resid.h5')

In [117]:
model.evaluate(x=X_test,y=y_test)



[2.103074312210083, 0.4934000074863434]

In [118]:
import pandas as pd
hist_normalized = pd.DataFrame(history.history)

In [119]:
hist_normalized.to_csv('hist_two_attention_triple_64_3_3_one_dense.csv',sep=',')