# How to use AddSign/PowSign Optimizers in classifying soybean dataset in Python

The soybean dataset has 14 types of leafs. Each one class have minimum 10 types of images and some have 50 images. the total images are 375 images. in the test folder we have 41 images. with this we have to train 375 images with class labels and test on 41 images, predict each image class label.

### https://towardsdatascience.com/custom-optimizer-in-tensorflow-d5b41f75644a

In [2]:
!pip install pmlb



In [3]:
# imports
from pmlb import fetch_data
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
import numpy as np

from keras.models import Sequential
import tensorflow.keras.layers as Layers
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, SeparableConv2D, Activation, BatchNormalization
from tensorflow.keras.utils import to_categorical

### *Loading Dataset*

In [4]:
# loading dataset
dataset_name = 'soybean'
dataset = fetch_data(dataset_name)
print(dataset.head())

print("\n", dataset.columns)

cols = ['date', 'plant-stand', 'precip', 'temp', 'hail', 'crop-hist',
       'area-damaged', 'severity', 'seed-tmt', 'germination', 'plant-growth',
       'leaves', 'leafspots-halo', 'leafspots-marg', 'leafspot-size',
       'leaf-shread', 'leaf-malf', 'leaf-mild', 'stem', 'lodging',
       'stem-cankers', 'canker-lesion', 'fruiting-bodies', 'external-decay',
       'mycelium', 'int-discolor', 'sclerotia', 'fruit-pods', 'fruit-spots',
       'seed', 'mold-growth', 'seed-discolor', 'seed-size', 'shriveling',
       'roots', 'target']

print(dataset[cols].info())
print(dataset[cols].describe())
print(dataset[cols].corr())

   date  plant-stand  precip  temp  ...  seed-size  shriveling  roots  target
0     6            2       1     3  ...          2           1      2      10
1     2            2       1     3  ...          2           1      2      10
2     3            2       1     3  ...          2           1      2      10
3     3            2       1     3  ...          2           1      2      10
4     6            2       1     3  ...          2           1      2      10

[5 rows x 36 columns]

 Index(['date', 'plant-stand', 'precip', 'temp', 'hail', 'crop-hist',
       'area-damaged', 'severity', 'seed-tmt', 'germination', 'plant-growth',
       'leaves', 'leafspots-halo', 'leafspots-marg', 'leafspot-size',
       'leaf-shread', 'leaf-malf', 'leaf-mild', 'stem', 'lodging',
       'stem-cankers', 'canker-lesion', 'fruiting-bodies', 'external-decay',
       'mycelium', 'int-discolor', 'sclerotia', 'fruit-pods', 'fruit-spots',
       'seed', 'mold-growth', 'seed-discolor', 'seed-size', 'shriveli

### *Handling and Spliting Data*

In [5]:
# loading features and targets from dataset
X, y = fetch_data(dataset_name, return_X_y=True)

# split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

# Normalizing dataset
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# One-hot encoding
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

### *Creating Model*

In [6]:
# create Model
model = Sequential()
model.add(Dense(units = 56, input_dim=35, activation="relu"))
model.add(Dense(units = 28, activation="relu"))
model.add(Dense(units = 19, activation="softmax"))

# **<u>Creating Custom Optimizer**

In [7]:
from tensorflow.keras.optimizers import Adam

# This class defines the API to add Ops to train a model. 
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.framework import ops
from tensorflow.python.training import optimizer
import tensorflow as tf

In [36]:
class PowerSign(optimizer.Optimizer):
  """Implementation of PowerSign.
  See [Bello et. al., 2017](https://arxiv.org/abs/1709.07417)
  @@__init__
  """
  def __init__(self, learning_rate=0.001,alpha=0.01,beta=0.5, use_locking=False, name="PowerSign"):
    super(PowerSign, self).__init__(use_locking, name)
    self._lr = learning_rate
    self._alpha = alpha
    self._beta = beta
    
    # Tensor versions of the constructor arguments, created in _prepare().
    self._lr_t = None
    self._alpha_t = None
    self._beta_t = None

  def _prepare(self):
    self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")
    self._alpha_t = ops.convert_to_tensor(self._beta, name="alpha_t")
    self._beta_t = ops.convert_to_tensor(self._beta, name="beta_t")

  def _create_slots(self, var_list):
    # Create slots for the first and second moments.
    for v in var_list:
        self._zeros_slot(v, "m", self._name)

  def _resource_apply_dense(self, grad, var):
    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
    alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
    beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)

    eps = 1e-7 #cap for moving average
    
    m = self.get_slot(var, "m")
    m_t = m.assign(tf.maximum(beta_t * m + eps, tf.abs(grad)))

    var_update = state_ops.assign_sub(var, lr_t*grad*tf.exp( tf.math.log(alpha_t)*tf.sign(grad)*tf.sign(m_t))) #Update 'ref' by subtracting 'value
    #Create an op that groups multiple operations.
    #When this op finishes, all ops in input have finished
    return control_flow_ops.group(*[var_update, m_t])

  def _apply_sparse(self, grad, var):
    raise NotImplementedError("Sparse gradient updates are not supported.")

In [37]:
class AddSign(optimizer.Optimizer):
    """Implementation of AddSign.
    See [Bello et. al., 2017](https://arxiv.org/abs/1709.07417)
    @@__init__
    """

    def __init__(self, learning_rate=1.001,alpha=0.01,beta=0.5, use_locking=False, name="AddSign"):
        super(AddSign, self).__init__(use_locking, name)
        self._lr = learning_rate
        self._alpha = alpha
        self._beta = beta
        
        # Tensor versions of the constructor arguments, created in _prepare().
        self._lr_t = None
        self._alpha_t = None
        self._beta_t = None
      
    def _prepare(self):
        self._lr_t = ops.convert_to_tensor(self._lr, name="learning_rate")
        self._alpha_t = ops.convert_to_tensor(self._beta, name="beta_t")
        self._beta_t = ops.convert_to_tensor(self._beta, name="beta_t")

    def _create_slots(self, var_list):
        # Create slots for the first and second moments.
        for v in var_list:
            self._zeros_slot(v, "m", self._name)

    def _apply_dense(self, grad, var):
        raise NotImplementedError("Dense gradient updates are not supported.")

    def _apply_sparse(self, grad, var):
        raise NotImplementedError("Sparse gradient updates are not supported.")

    def _resource_apply_dense(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
        alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
    
        eps = 1e-7 #cap for moving average
        
        m = self.get_slot(var, "m")
        m_t = m.assign(tf.maximum(beta_t * m + eps, tf.abs(grad)))
        
        var_update = state_ops.assign_sub(var, lr_t*grad*(1.0+alpha_t*tf.sign(grad)*tf.sign(m_t) ) )
        #Create an op that groups multiple operations
        #When this op finishes, all ops in input have finished
        return control_flow_ops.group(*[var_update, m_t])

In [42]:
# New Custom Function
# opti = AddSign(learning_rate=0.001)
opti = PowerSign(learning_rate=0.001)

New TensorFlow Session

In [46]:
tf.keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

### *Compiling Model*

In [47]:
# compiling model
model.compile(loss='categorical_crossentropy', optimizer=opti, metrics=['acc'])

### *Training Model*

In [48]:
# training model
model.fit(X_train, y_train, epochs=120, batch_size=10, verbose=1)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78

<keras.callbacks.History at 0x7fe2765388d0>

### *Evaluating Model*

In [49]:
# evaluating model
scores = model.evaluate(X_test, y_test)
print() ; print(model.metrics_names) ; print(scores)
print("\n%s : %.2f%%" % (model.metrics_names[1], 100*scores[1]))


['loss', 'acc']
[0.49272477626800537, 0.847533643245697]

acc : 84.75%


### *Model Description*

In [None]:
# more on model
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 56)                2016      
                                                                 
 dense_1 (Dense)             (None, 28)                1596      
                                                                 
 dense_2 (Dense)             (None, 19)                551       
                                                                 
Total params: 4,163
Trainable params: 4,163
Non-trainable params: 0
_________________________________________________________________
None
