In [None]:
'''
How to choose learning rate with Neptune
'''
###### Create Neptune project 
## update: pip install neptune-client==0.10.7
import neptune
import os

# Connect your script to Neptune
project = neptune.init(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                       project_qualified_name='YourUserName/YourProjectName') 


### from sklearn.datasets import load_iris 
import pandas as pd
import numpy as np
import tensorflow.keras
from tensorflow.keras import backend as K
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import tensorflow.keras.callbacks as callbacks
import tensorflow.keras
from tensorflow.keras.layers import Dense, Input, Flatten, concatenate, MaxPooling2D, Conv2D
from tensorflow.keras.models import Model,Sequential
import tensorflow.keras.backend as K
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import Callback, LearningRateScheduler 

from sklearn.metrics import f1_score
from tensorflow import random_normal_initializer
from numpy import argmax
from collections import Counter

import os
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from keras.callbacks import TensorBoard
pd.options.display.max_columns = 100

#### Load data for the image classifier model
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test_full, y_test_full) = fashion_mnist.load_data()

reset_random_seeds()
trainIdx = random.sample(range(60000), 20000)

x_train, y_train = X_train_full[trainIdx]/255.0, y_train_full[trainIdx]
x_test, y_test = X_test_full/255.0, y_test_full
    
#### Save learning rate during the training    
def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        curLR = optimizer._decayed_lr(tf.float32)
        return curLR # use ._decayed_lr method instead of .lr
    return lr

### Function to plot the learning rate 
def plotLR(history):
    learning_rate = history.history['lr']
    epochs = range(1, len(learning_rate) + 1)
    fig = plt.figure(figsize=(12, 7))
    plt.plot(epochs, learning_rate)
    plt.title('Learning rate')
    plt.xlabel('Epochs')
    plt.ylabel('Learning rate')
    return(fig)

#### Define the Neural Network model
def runModel():   
    model = Sequential()
    model.add(Flatten(input_shape=[28, 28])) 
    model.add(Dense(512, activation='relu'))
    model.add(Dense(200, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    return model

model = runModel()
model.summary()


### Learning Rate Schedulers ###
import math
### in the console printout, we can see the learning rate difference  
initial_learning_rate = 0.01
epochs = 100
decay = initial_learning_rate / epochs

In [None]:
CURRENT_LR_SCHEDULER = 'constant'
# CURRENT_LR_SCHEDULER = 'time-based'
# CURRENT_LR_SCHEDULER, POLY_POWER = 'polynomial', 'linear'


### Functions to plot the train history 
def plotPerformance(history, CURRENT_LR_SCHEDULER=CURRENT_LR_SCHEDULER):
    #### Loss
    fig = plt.figure(figsize=(10, 4))
    fig = plt.subplot(1, 2, 1) # row 1, col 2 index 1

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.legend(['Train Loss', 'Test Loss'])
    plt.title(f'Loss Curves ({CURRENT_LR_SCHEDULER})')
    plt.xlabel('Epoch')
    plt.ylabel('Loss on the Validation Set')
    
    #### Accuracy 
    fig = plt.subplot(1, 2, 2) # row 1, col 2 index 1

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.legend(['Train Accuracy', 'Test Accuracy'])
    plt.title(f'Accuracy Curves ({CURRENT_LR_SCHEDULER})')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy on the Validation Set')
    return fig
    
    
    
    
if CURRENT_LR_SCHEDULER == 'constant':
    # Create an experiment and log the model 
    npt_exp = project.create_experiment(name='ConstantLR', 
                                        description='constant-lr', 
                                        tags=['LearingRate', 'constant', 'baseline', 'neptune'])       
        
    ### Baseline model: constant learning rate 
    initial_learning_rate = 0.01
    epochs = 100
    sgd = keras.optimizers.SGD(learning_rate=initial_learning_rate)
    lr_metric = get_lr_metric(sgd)
    
    model.compile(optimizer = sgd,
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy', lr_metric]) 
    
    reset_random_seeds()
    
    trainHistory_constantLR = model.fit(
        x_train, y_train, 
        epochs=epochs,
        validation_data=(x_test, y_test),
        batch_size=64
    )
    
    ### Plot learning rate over time 
    npt_exp.log_image('Learning Rate Change (Constant)', plotLR(trainHistory_constantLR))
    
    ### Plot the training history 
    npt_exp.log_image('Training Performance Curves (Constant)', plotPerformance(trainHistory_constantLR).get_figure())
    
elif CURRENT_LR_SCHEDULER == 'time-based':
    ## initial learning rate set to a larger number 
    initial_learning_rate = 0.5 
    epochs = 100
    decay = initial_learning_rate/epochs   

    # Create an experiment and log the model 
    npt_exp = project.create_experiment(name='TimeBasedLRDecay', 
                                        description='time-based-lr-decay', 
                                        tags=['LearningRate', 'timebased', 'decay', 'neptune'])       

    def lr_time_based_decay(epoch, lr):
        return lr * 1 / (1 + decay * epoch)
    
    model = runModel()
    model.summary()
    
    sgd = keras.optimizers.SGD(learning_rate=initial_learning_rate) 
    model.compile(
                  optimizer = sgd,
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy']) 
    
    reset_random_seeds()
    
    trainHistory_timeBasedDecay = model.fit(
        x_train, y_train, 
        epochs=epochs, 
        batch_size=64,
        validation_split=0.2,
        callbacks=[LearningRateScheduler(lr_time_based_decay, verbose=1)])    
    
    ### Plot learning rate over time 
    npt_exp.log_image('Learning Rate Change (Time-Based Decay)', plotLR(trainHistory_timeBasedDecay))
    ### Plot the training history 
    npt_exp.log_image('Training Performance Curves (Time-Based Decay)', plotPerformance(trainHistory_timeBasedDecay).get_figure())

elif CURRENT_LR_SCHEDULER == 'polynomial':
    initial_learning_rate = 0.5 
    epochs = 100
    decay = initial_learning_rate/epochs   
    
    ## Defined as a class to save parameters as attributes
    class lr_polynomial_decay:
    	def __init__(self, epochs=100, initial_learning_rate=0.01, power=1.0):
    		# store the maximum number of epochs, base learning rate, and power of the polynomial
    		self.epochs = epochs
    		self.initial_learning_rate = initial_learning_rate
    		self.power = power
            
    	def __call__(self, epoch):
    		# compute the new learning rate based on polynomial decay
    		decay = (1 - (epoch / float(self.epochs))) ** self.power
    		updated_eta = self.initial_learning_rate * decay
    		# return the new learning rate
    		return float(updated_eta)
        
    def plot_Neptune(history, decayTitle):
        ### Plot learning rate over time 
        npt_exp.log_image(f'Learning Rate Change ({decayTitle})', plotLR(history))
        ### Plot the training history 
        npt_exp.log_image(f'Training Performance Curves ({decayTitle})', plotPerformance(history).get_figure())
    
    # Create an experiment and log the model 
    npt_exp = project.create_experiment(name=f'{POLY_POWER}LRDecay', 
                                        description=f'{POLY_POWER}-lr-decay', 
                                        tags=['LearningRate', POLY_POWER, 'decay', 'neptune'])   
       
    if POLY_POWER == 'linear':
        curPower = 1.0
   
    curScheduler = lr_polynomial_decay(epochs=epochs, initial_learning_rate=initial_learning_rate, power=curPower)
    
    model = runModel()
    model.summary()
    
    sgd = keras.optimizers.SGD(learning_rate=initial_learning_rate) 
    model.compile(
                  optimizer = sgd,
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy']) 
    
    reset_random_seeds()
    
    trainHistory_polyDecay = model.fit(
        x_train, y_train, 
        epochs=epochs, 
        batch_size=64,
        validation_split=0.2,
        callbacks=[LearningRateScheduler(curScheduler, verbose=1)]) 
    
    if POLY_POWER == 'linear':
        trainHistory_linearDecay = trainHistory_polyDecay
        plot_Neptune(history=trainHistory_linearDecay, decayTitle='Linear Decay')
