<a href="https://colab.research.google.com/github/electricdarb/597TL-697TL-Project/blob/main/SparseResNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import *
from tensorflow.keras import regularizers
import tensorflow_datasets as tfds
from time import time
import matplotlib.pyplot as plt 
from tensorflow.keras.datasets import cifar10
import datetime
import os

In [2]:
%load_ext tensorboard

In [3]:
INPUT_SHAPE = [32,32,3]
NUM_CLASSES = 10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

In [4]:
batch_size = 128

In [5]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_std_normalization=True, 
                             featurewise_center=True)
datagen_val = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_std_normalization=True, 
                             featurewise_center=True)
datagen_val.fit(x_train)
datagen.fit(x_train)

In [6]:
import random
def rand_mask(num_filters, num_channels):
  """
  args:
    num_filters:
    num_channels
  return:
    mask: a randomly generated mask for sparse conv 
  """

  weights_per_kernel = 4

  choices = [(0, 0), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1), (2, 2)]
  mask = np.zeros((3, 3, num_channels, num_filters))
  for f in range(num_filters):
    random.shuffle(choices)
    for c in range(num_channels):
      mask[1, 1, c, f] = 1.
      for k in range(weights_per_kernel-1): 
        i, j = choices[k]
        mask[i, j, c, f] = 1.
  return tf.cast(mask, tf.float32)

In [7]:
class IregConv2D(tf.keras.layers.Layer): # to do: make input the same as Conv2D
    def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid',
            data_format=None, dilation_rate=(1, 1), groups=1, activation=None,
            use_bias=True, kernel_initializer='glorot_uniform',
            bias_initializer='zeros', kernel_regularizer=None,
            bias_regularizer=None, activity_regularizer=None, kernel_constraint=None,
            bias_constraint=None, **kwargs):
      
        super(IregConv2D, self).__init__()
        if isinstance(strides, int):
            strides = [1, strides, strides, 1]
        elif isinstance(strides, tuple):
            if len(strides) == 2:
                strides = [1, *strides, 1]

        self.filters = filters
        self.strides = strides
        self.padding = padding.upper()
        self.conv = tf.nn.conv2d

        self.bias_initializer = bias_initializer
        self.bias_regularizer = bias_regularizer
        self.bias_constraint = bias_constraint
        self.kernel_initializer = kernel_initializer
        self.kernel_regularizer = kernel_regularizer
        self.kernel_constraint = kernel_constraint
        
    def build(self, input_shape):
        self.mask = rand_mask(self.filters, input_shape[-1])
        self.b = self.add_weight('bias', shape = (self.mask.shape[-1]), initializer=self.bias_initializer, 
                                 regularizer = self.bias_regularizer, constraint = self.bias_constraint, trainable=True) 
        self.w = self.add_weight("kernel", shape = self.mask.shape, initializer=self.kernel_initializer,
                                 regularizer = self.kernel_regularizer, constraint = self.kernel_constraint, trainable=True)
        self.w = tf.multiply(self.w, self.mask)

    def call(self, x):
        conv_out = tf.multiply(self.w, self.mask)
        return self.conv(x, tf.nn.bias_add(conv_out, self.b), self.strides, self.padding) + self.b

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            "filters": self.filters,
            "strides": self.strides,
            "padding": self.padding,
            "conv": self.conv,
            "bias_initializer": self.bias_initializer,
            "bias_regularizer": self.bias_regularizer,
            "bias_constraint": self.bias_constraint,
            "kernel_initializer": self.kernel_initializer,
            "kernel_regularizer": self.kernel_regularizer,
            "kernel_constraint": self.kernel_constraint,
        })
        return config


In [8]:
def build_model(weight_decay, Conv2D = Conv2D):
  # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.

  model = tf.keras.Sequential()

  model.add(Conv2D(64, (3, 3), padding='same',
                    input_shape = INPUT_SHAPE, kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.3))

  model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))

  model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))

  model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))

  model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))


  model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.4))

  model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())

  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.5))

  model.add(Flatten())
  model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
  model.add(Activation('relu'))
  model.add(BatchNormalization())

  model.add(Dropout(0.5))
  model.add(Dense(NUM_CLASSES))
  model.add(Activation('softmax'))
  return model


In [9]:

base_model = build_model(0.0005)
test_model = build_model(0.0005 * (9/4)**2, Conv2D = IregConv2D)

In [10]:
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
early_stopper = tf.keras.callbacks.EarlyStopping(min_delta=0.001, patience=10)
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tbcb = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
cbs = [lr_reducer, early_stopper, tbcb]

In [11]:
lr = 0.1
opt = Adam(lr)
epochs = 3
test_model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])
base_model.compile(loss='categorical_crossentropy', optimizer=opt,metrics=['accuracy'])

In [12]:
test_model.fit(datagen.flow(x_train, y_train, batch_size=batch_size), epochs = epochs,
                            validation_data=datagen_val.flow(x_test, y_test, batch_size=batch_size),callbacks=cbs,verbose=1)

Epoch 1/3
  1/391 [..............................] - ETA: 1:29:34 - loss: 15.0318 - accuracy: 0.0625

KeyboardInterrupt: ignored

In [None]:
base_model.fit(datagen.flow(x_train, y_train, batch_size=batch_size), epochs = epochs,
                            validation_data=datagen_val.flow(x_test, y_test, batch_size=batch_size),callbacks=cbs,verbose=1)

In [13]:
x_test.shape


(10000, 32, 32, 3)