In [8]:
import tensorflow as tf
import tensorflow.keras.datasets as tfds
from utils.layer_units import *
from utils.ResAttNet import ResAttNet
import pydotplus
import numpy as np


# Plot configurations
%matplotlib inline

# Notebook auto reloads code. (Ref: http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Trainning CIFAR 10 data with ResAttNet
In this file we first load in the training and testing data from CIFAR 10 dataset. We chose the last 1000 out as validation set. We did a preprocess where we pixelwise subtracted the RGB mean across data input data. We didn't scale by 255 because we found it makes the validation accuracy unstable and take longer to train with less accurate result. We build model with ResAttNet class and use experimenting with the various parmeters. Current parameter is the one we used after we finished exploring CIFAR 100 data for detail see the analysis in CIFAR_100_with_ResAttn_class file. We used default Adam optimizer. We do experiment with SGD optimizer as described by in the paper. The accuracy didn't improve one percent above chance for several epochs so we decided to using Adam instead. We also added ImageDataGenerator to augment our data as we learned in early days of exploring. We chose some common image augmentation parameter, including shear, zoom, horizontal flip, slight rotation, horizontal and vertical shift. When we used the augmentation, it raised our accuracy from around 60% to 75%. After the run we saved good model and saved the training history as a csv for plotting. 


In [9]:
(X_train,y_train),(X_test,y_test) = tfds.cifar10.load_data()

In [10]:
X_valid = X_train[49000:]
y_valid = y_train[49000:]
X_train = X_train[:49000]
y_train = y_train[:49000]

In [11]:
# center but not scale data
# we found scale not very stable
def preprocess(X):
    # pixel wise center
    ret = X.astype(np.float32) - np.mean(X,axis=0)
    return ret

In [12]:
X_train = preprocess(X_train)
X_valid = preprocess(X_valid)
X_test = preprocess(X_test)

In [16]:
my_RANet = ResAttNet(10,256)

In [17]:
ipt = tf.keras.Input(shape=(32,32,3))
model_out = my_RANet.build(ipt, pre_conv = True, pre_pooling = True, 
             attention_num = 3,network_param = [1,1,1],
             resid_params = [[64,3,1,0],[64,3,1,0],[64,1,1,0]],
             skip_param = [True, 1])
model = tf.keras.Model(ipt,model_out)
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv2d_262 (Conv2D)             (None, 32, 32, 64)   1792        input_3[0][0]                    
__________________________________________________________________________________________________
average_pooling2d (AveragePooli (None, 16, 16, 64)   0           conv2d_262[0][0]                 
__________________________________________________________________________________________________
batch_normalization_186 (BatchN (None, 16, 16, 64)   256         average_pooling2d[0][0]          
____________________________________________________________________________________________

In [18]:
opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9,nesterov=True)

model.compile(optimizer = 'Adam',
    loss = 'sparse_categorical_crossentropy',
    metrics = ['sparse_categorical_accuracy'],
)

In [19]:
# ImageDataGenerator code referred from hw2 Task4
# and referred from https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
datagen = tf.keras.preprocessing.image.ImageDataGenerator(shear_range=0.1,zoom_range=0.2,horizontal_flip=True,rotation_range=30,width_shift_range=0.1,
                             height_shift_range=0.1)
datagen.fit(X_train)

In [20]:
#history = model.fit(X_train,y_train,epochs=10,batch_size=128,validation_split=0.2,callbacks=[tensorboard_callback])
bs = 128
history = model.fit(datagen.flow(X_train,y_train,batch_size=bs),epochs=30,steps_per_epoch=len(X_train)//bs,
                    validation_data = (X_valid,y_valid))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [22]:
model.save('cifar_10_three_attention_64_3_64_1_resid.h5')

In [21]:
model.evaluate(x=X_test,y=y_test)



[0.5159221291542053, 0.8230000138282776]

In [23]:
import pandas as pd
hist_normalized = pd.DataFrame(history.history)

In [24]:
hist_normalized.to_csv('hist_cifar10_two_attention_64_3_3_64_3_1_one_dense_with_prepooling.csv',sep=',')