# Import Modules

In [None]:
!git clone https://github.com/dariush-bahrami/gravity.optimizer.git

In [None]:
import sys
import tensorflow as tf
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
plt.style.use('dark_background')

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    Print('GPU device not found')
else:
    print('Found GPU at: {}'.format(device_name))

sys.path.append('/content/gravity.optimizer')
import gravity

# Optimizer Definition

In [None]:
class Gravity(tf.keras.optimizers.Optimizer):
    def __init__(self,
                 learning_rate=0.1,
                 alpha=0.01,
                 beta=0.9,
                 name="Gravity",
                 **kwargs):
        super(Gravity, self).__init__(name, **kwargs)
        self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
        self._set_hyper('decay', self._initial_decay)
        self._set_hyper('alpha', alpha)
        self._set_hyper('beta', beta)
        self.epsilon = 1e-7

    def _create_slots(self, var_list):
        alpha = self._get_hyper("alpha")
        stddev = alpha / self.learning_rate
        initializer = tf.keras.initializers.RandomNormal(mean=0.0,
                                                         stddev=stddev,
                                                         seed=None)
        for var in var_list:
            self.add_slot(var, "velocity", initializer=initializer)

    @tf.function
    def _resource_apply_dense(self, grad, var):
        # Get Data
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        beta = self._get_hyper("beta", var_dtype)
        t = tf.cast(self.iterations, float)
        beta_hat = (beta * t + 1) / (t + 2)
        velocity = self.get_slot(var, "velocity")

        # Calculations
        max_step_grad = 1 / tf.math.reduce_max(tf.math.abs(grad))
        gradient_term = grad / (1 + (grad / max_step_grad)**2)

        # update variables
        updated_velocity = velocity.assign(beta_hat * velocity +
                                           (1 - beta_hat) * gradient_term)
        updated_var = var.assign(var - lr_t * updated_velocity)

        # updates = [updated_var, updated_velocity]
        # return tf.group(*updates)
    def _resource_apply_sparse(self, grad, var):
        raise NotImplementedError

    def get_config(self):
        config = super(Gravity, self).get_config()
        config.update({
            'learning_rate':
            self._serialize_hyperparameter('learning_rate'),
            'decay':
            self._serialize_hyperparameter('decay'),
            'alpha':
            self._serialize_hyperparameter('alpha'),
            'beta':
            self._serialize_hyperparameter('beta'),
            'epsilon':
            self.epsilon,
        })
        return config

# Benchmarks

## MNIST

In [None]:
mnist_dict = gravity.get_dataset_mnist(verbose=True, show_images=True)
x_train_mnist, y_train_mnist = mnist_dict['train_data']
x_test_mnist, y_test_mnist = mnist_dict['test_data']
mnist_classes = mnist_dict['classes']
mnist_input_shape = mnist_dict['input_shape']

### VGG16

In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.00025)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.0001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

base_model = gravity.get_vgg16(classes=mnist_classes,
                               input_shape=(32, 32, 1),
                               drop_out_rate=0,
                               print_summary=False)
model = gravity.resize_model_input_size(base_model,
                                        target_size=(32, 32),
                                        data_shape=(28, 28, 1),
                                        print_summary=False)

# Try different optimizers by replacing gravity_opt
optimizer = gravity_opt
model.compile(optimizer=optimizer,
              loss=cost_func,
              metrics=['accuracy'])
history = model.fit(x_train_mnist, y_train_mnist,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_mnist, y_test_mnist))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'MNIST',
                      comment=comment)

### VGG19

In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.00025)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

base_model = gravity.get_vgg19(classes=mnist_classes,
                               input_shape=(32, 32, 1),
                               drop_out_rate=0,
                               print_summary=False)
model = gravity.resize_model_input_size(base_model,
                                        target_size=(32, 32),
                                        data_shape=(28, 28, 1),
                                        print_summary=False)
optimizer = gravity_opt
model.compile(optimizer=optimizer,
              loss=cost_func,
              metrics=['accuracy'])
history = model.fit(x_train_mnist, y_train_mnist,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_mnist, y_test_mnist))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'MNIST',
                      comment=comment)

## Fashion MNIST

In [None]:
fashion_mnist_dict = gravity.get_dataset_fashion_mnist(verbose=True, show_images=True)
x_train_fashion_mnist, y_train_fashion_mnist = fashion_mnist_dict['train_data']
x_test_fashion_mnist, y_test_fashion_mnist = fashion_mnist_dict['test_data']
fashion_mnist_classes = fashion_mnist_dict['classes']
fashion_mnist_input_shape = fashion_mnist_dict['input_shape']

### VGG16

In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

base_model = gravity.get_vgg16(classes=fashion_mnist_classes,
                               input_shape=(32, 32, 1),
                               drop_out_rate=0,
                               print_summary=False)
model = gravity.resize_model_input_size(base_model,
                                        target_size=(32, 32),
                                        data_shape=fashion_mnist_input_shape,
                                        print_summary=False)

optimizer = gravity_opt
model.compile(optimizer=optimizer,
            loss=cost_func,
            metrics=['accuracy'])
history = model.fit(x_train_fashion_mnist, y_train_fashion_mnist,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_fashion_mnist, y_test_fashion_mnist))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'Fashion MNIST',
                      comment=comment)

### VGG19

In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

base_model = gravity.get_vgg19(classes=fashion_mnist_classes,
                               input_shape=(32, 32, 1),
                               drop_out_rate=0,
                               print_summary=False)
model = gravity.resize_model_input_size(base_model,
                                        target_size=(32, 32),
                                        data_shape=fashion_mnist_input_shape,
                                        print_summary=False)

optimizer = gravity_opt
model.compile(optimizer=optimizer,
            loss=cost_func,
            metrics=['accuracy'])
history = model.fit(x_train_fashion_mnist, y_train_fashion_mnist,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_fashion_mnist, y_test_fashion_mnist))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'Fashion MNIST',
                      comment=comment)

## CIFAR10

In [None]:
cifar10_dict = gravity.get_dataset_cifar10(verbose=True, show_images=True)
x_train_cifar10, y_train_cifar10 = cifar10_dict['train_data']
x_test_cifar10, y_test_cifar10 = cifar10_dict['test_data']
cifar10_classes = cifar10_dict['classes']
cifar10_input_shape = cifar10_dict['input_shape']

### VGG16

In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model = gravity.get_vgg16(classes=cifar10_classes,
                          input_shape=(32, 32, 3),
                          drop_out_rate=0,
                          print_summary=False)

optimizer = gravity_opt
model.compile(optimizer=optimizer,
            loss=cost_func,
            metrics=['accuracy'])
history = model.fit(x_train_cifar10, y_train_cifar10,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_cifar10, y_test_cifar10))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'CIFAR10',
                      comment=comment)

### VGG19


In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model = gravity.get_vgg19(classes=cifar10_classes,
                          input_shape=(32, 32, 3),
                          drop_out_rate=0,
                          print_summary=False)

optimizer = gravity_opt
model.compile(optimizer=optimizer,
            loss=cost_func,
            metrics=['accuracy'])

history = model.fit(x_train_cifar10, y_train_cifar10,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_cifar10, y_test_cifar10))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'CIFAR10',
                      comment=comment)

## CIFAR100 (Coarse)

In [None]:
cifar100_dict = gravity.get_dataset_cifar100(label_mode='coarse', verbose=True, show_images=True)
x_train_cifar100, y_train_cifar100 = cifar100_dict['train_data']
x_test_cifar100, y_test_cifar100 = cifar100_dict['test_data']
cifar100_classes = cifar100_dict['classes']
cifar100_input_shape = cifar100_dict['input_shape']

### VGG16

In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model = gravity.get_vgg16(classes=cifar100_classes,
                          input_shape=(32, 32, 3),
                          drop_out_rate=0,
                          print_summary=False)

optimizer = gravity_opt
model.compile(optimizer=optimizer,
            loss=cost_func,
            metrics=['accuracy'])

history = model.fit(x_train_cifar100, y_train_cifar100,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_cifar100, y_test_cifar100))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'CIFAR100 (Coarse)',
                      comment=comment)

### VGG19


In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model = gravity.get_vgg19(classes=cifar100_classes,
                          input_shape=(32, 32, 3),
                          drop_out_rate=0,
                          print_summary=False)

optimizer = gravity_opt
model.compile(optimizer=optimizer,
            loss=cost_func,
            metrics=['accuracy'])

history = model.fit(x_train_cifar100, y_train_cifar100,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_cifar100, y_test_cifar100))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'CIFAR100 (Coarse)',
                      comment=comment)

## CIFAR100 (Fine)

In [None]:
cifar100_dict = gravity.get_dataset_cifar100(label_mode='fine', verbose=True, show_images=True)
x_train_cifar100, y_train_cifar100 = cifar100_dict['train_data']
x_test_cifar100, y_test_cifar100 = cifar100_dict['test_data']
cifar100_classes = cifar100_dict['classes']
cifar100_input_shape = cifar100_dict['input_shape']

### VGG16

In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model = gravity.get_vgg16(classes=cifar100_classes,
                          input_shape=(32, 32, 3),
                          drop_out_rate=0,
                          print_summary=False)

optimizer = gravity_opt
model.compile(optimizer=optimizer,
            loss=cost_func,
            metrics=['accuracy'])
history = model.fit(x_train_cifar100, y_train_cifar100,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_cifar100, y_test_cifar100))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                      history,
                      optimizer,
                      'CIFAR100 (Fine)',
                      comment=comment)

### VGG19


In [None]:
batch_size = 128
epochs = 10

gravity_opt = Gravity(learning_rate=0.1, alpha=0.01, beta=0.9)
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
rmsprop_opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)

cost_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model = gravity.get_vgg19(classes=cifar100_classes,
                          input_shape=(32, 32, 3),
                          drop_out_rate=0,
                          print_summary=False)

optimizer = gravity_opt
model.compile(optimizer=optimizer,
            loss=cost_func,
            metrics=['accuracy'])
history = model.fit(x_train_cifar100, y_train_cifar100,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test_cifar100, y_test_cifar100))
gravity.plot_history(history)

In [None]:
comment = ''
gravity.save_run_info(model,
                     history,
                     optimizer,
                     'CIFAR100 (Fine)',
                     comment=comment)