<a href="https://colab.research.google.com/github/aljeshishe/notebooks/blob/master/vgg_gpu_v2_with_wb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf 
from tensorflow import keras
import numpy as np 
import matplotlib.pyplot as plt 
import sys 

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout, Flatten 
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import BatchNormalization
from time import time 

In [0]:
def load_dataset():
    #load dataset
    (trainX, trainY),(testX, testY) = cifar10.load_data()
    #one hot encode the target 
    trainY = keras.utils.to_categorical(trainY)
    testY = keras.utils.to_categorical(testY)
    return trainX, trainY, testX, testY

def validation_split(testX, testY, valid_X, valid_Y, v_split):
    
    index_of_validation = int(v_split * len(testX))
    valid_X.extend(testX[-index_of_validation:])
    valid_Y.extend(testY[-index_of_validation:])
    testX = testX[:-index_of_validation]
    testY = testY[:-index_of_validation]
    return testX, testY, np.asarray(valid_X), np.asarray(valid_Y)

def normalize(train,test,valid):
    # convert from integers to float 
    train_norm = train.astype('float32')
    test_norm = test.astype('float32')
    valid_norm = valid.astype('float32')
    #normalize to range 0-1
    train_norm = train_norm / 255.0
    test_norm = test_norm / 255.0
    valid_norm = valid_norm / 255.0
    return train_norm, test_norm,valid_norm 

# plot diagnostic learning curves
def summarize_diagnostics(history):
    plt.subplots(figsize = (7,7))
    # plot loss
    plt.subplot(211)
    plt.title('Cross Entropy Loss')
    plt.plot(history.history['loss'], color='blue', label='train')
    plt.plot(history.history['val_loss'], color='orange', label='test')

    # plot accuracy
    plt.subplot(212)
    plt.title('Classification Accuracy')
    plt.plot(history.history['accuracy'], color='blue', label='train')
    plt.plot(history.history['val_accuracy'], color='orange', label='test')
    plt.show()
    # save plot to file
    filename = sys.argv[0].split('/')[-1]
    plt.savefig(filename + '_plot.png')
    plt.close()



In [0]:
# load dataset
trainX, trainY, testX, testY = load_dataset()
#get validation set 
valid_X = []
valid_Y = []
testX, testY, validX, validY = validation_split(testX, testY, valid_X, valid_Y,v_split=0.5)

# normalize the data
trainX, testX,validX = normalize(trainX, testX,validX)


In [0]:
import os
# use gpu/cpu/tpu
# see details in https://colab.research.google.com/drive/1cpuwjKTJbMjlvZ7opyrWzMXF_NYnjkiE#scrollTo=y3gk7nSvTUFZ
gpus = tf.config.experimental.list_physical_devices('GPU')
COLAB_TPU_ADDR = os.environ.get('COLAB_TPU_ADDR')
if COLAB_TPU_ADDR:
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + COLAB_TPU_ADDR)
  tf.config.experimental_connect_to_cluster(resolver)
  # This is the TPU initialization code that has to be at the beginning.
  tf.tpu.experimental.initialize_tpu_system(resolver)
  strategy = tf.distribute.experimental.TPUStrategy(resolver)
  print('Running on TPU ')  
elif len(gpus) > 1:
  strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
  print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on single GPU ', gpus[0].name)
else:
  strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
  print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)


In [0]:
pip install wandb -q

In [0]:
import wandb
from wandb.keras import WandbCallback
config = dict(
  batch_size = 64,          # input batch size for training (default: 64)
  epochs = 1000,             # number of epochs to train (default: 10)
  lr = 0.001,               # learning rate (default: 0.01)
  momentum = 0.9,          # SGD momentum (default: 0.5) 
  seed = 42,               # random seed (default: 42)
  log_interval = 10,     # how many batches to wait before logging training status
  weight_decay = 0.0005
)
wandb.init(config=config)
config = wandb.config

In [0]:
def vgg():
  input_shape = (trainX.shape[1], trainX.shape[2], 3)
  
  # Define the model architecture - This is a simplified version of the VGG19 architecture
  model = Sequential()
  
  # Set of Conv2D, Conv2D, MaxPooling2D layers with 32 and 64 filters
  model.add(Conv2D(filters = 32, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())
  model.add(Dropout(0.3))

  model.add(Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))

  # Another set of Conv2D, Conv2D, MaxPooling2D layers with 128 filters
  model.add(Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))

  # Another set of Conv2D, Conv2D, MaxPooling2D layers with 256 filters
  model.add(Conv2D(filters = 256, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(filters = 256, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))

  # Another set of Conv2D, Conv2D, MaxPooling2D layers with 512 filters
  model.add(Conv2D(filters = 512, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(filters = 512, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))

  # Another set of Conv2D, Conv2D, MaxPooling2D layers with 512 filters
  model.add(Conv2D(filters = 1024, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(filters = 1024, kernel_size = (3, 3), padding = 'same', 
                    activation ='relu', input_shape = input_shape, kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))

  # Flatten
  model.add(Flatten())
  model.add(Dense(512, activation ='relu', kernel_regularizer=regularizers.l2(config.weight_decay)))
  model.add(BatchNormalization())
  model.add(Dropout(0.5))
  num_classes = 10
  model.add(Dense(num_classes, activation = "softmax"))
  opt = SGD(lr = config.lr, momentum=config.momentum)
  model.compile(optimizer=opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])
  return model

In [0]:
# define model
def baseline():
    model = Sequential()
    model.add(Conv2D(32,(3,3), activation = 'relu', kernel_initializer = 'he_uniform', padding = 'same', input_shape = (32,32,3)))
    model.add(BatchNormalization())
    model.add(Conv2D(32,(3,3), activation = 'relu', kernel_initializer = 'he_uniform', padding = 'same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64,(3,3), activation = 'relu', kernel_initializer = 'he_uniform', padding = 'same'))
    model.add(BatchNormalization())
    model.add(Conv2D(64,(3,3), activation = 'relu', kernel_initializer = 'he_uniform', padding = 'same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.3))
    model.add(Conv2D(128,(3,3), activation = 'relu', kernel_initializer = 'he_uniform', padding = 'same'))
    model.add(BatchNormalization())
    model.add(Conv2D(128,(3,3), activation = 'relu', kernel_initializer = 'he_uniform', padding = 'same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.4))
   

    model.add(Flatten())
    model.add(Dense(128, activation = 'relu', kernel_initializer = 'he_uniform'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation = 'softmax'))
    
    #compile model 
    opt = SGD(lr = config.lr, momentum=config.momentum)
    model.compile(optimizer=opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model


In [0]:
with strategy.scope():
  model = vgg()
#epochs = 400
#create data generator 
datagen = ImageDataGenerator(width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True)
#iterator 
train = datagen.flow(trainX, trainY, batch_size=config.batch_size)
# fit model
steps = int(trainX.shape[0]/ 64)
history = model.fit_generator(train, steps_per_epoch=steps, epochs=config.epochs, validation_data=(validX, validY), verbose=1,
                              callbacks=[WandbCallback(validation_data=(validX, validY),
                                                       save_model=True, verbose=1)])

wandb.save("mymodel.h5")
model.save(os.path.join(wandb.run.dir, "mymodel.h5"))

In [0]:
# evaluate model
_, acc = model.evaluate(testX, testY, verbose=0)
print('> %.3f' % (acc * 100.0))


In [0]:

summarize_diagnostics(history)


In [0]:
wandb.save("mymodel.h5")
model.save(os.path.join(wandb.run.dir, "mymodel.h5"))