# Noisy Quadratic Function Gradient Descent

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

## Define function

$$f(x) = (x-3)^2 + \epsilon$$

where $\epsilon$ is Gaussian (from normal distribution) noise 

In [None]:
def f(x, epsilon):
    return (x - 3)**2 + epsilon

In [None]:
def adam_tape(f_tf, max_iters=5000, learning_rate=0.001, initial_guess = None):
    if initial_guess is None:
        weight = tf.Variable(1.5, dtype=tf.float32)
    else:
        weight = tf.Variable(initial_guess, dtype=tf.float32) # initialize initial guess
    
    beta1, beta2, epsilon = 0.9, 0.999, 1e-8  # regularization parameter to avoid division by zero
    m, s = 0.0, 0.0
    
    pred_1 = f_tf(weight)
    
    for t in range(1, max_iters + 1):
        with tf.GradientTape() as tape:
            y = f_tf(weight)
            
        grad = tape.gradient(y, weight)
        
        m = beta1 * m + (1 - beta1) * grad
        s = beta2 * s + (1 - beta2) * tf.square(grad)
        m_hat = m / (1 - beta1 ** t)
        s_hat = s / (1 - beta2 ** t)
        
        weight.assign_sub(learning_rate * m_hat / (tf.sqrt(s_hat + epsilon))) # update weights/parameters
        
        pred_2 = f_tf(weight).numpy() # current prediction
        
        if abs(pred_2 - pred_1) < 1e-13: # stopping criterion
            print(f"Converged on {t+1}th iteration")
            break
        pred_1 = pred_2

    print("x (weight):", float(weight))
    print("At x (weight), y =", float(f_tf(weight)))
    
    return weight.numpy(), f_tf(weight).numpy()