# Math related

In [3]:
# gradient descent
# minimize function: f(x) = x^2
# should converge to 0

def minimize(iter, learning_rate, start_point):
    x = start_point
    for i in range(iter):
        gradient = 2 * x  # derivative of f(x) = x^2
        x = x - learning_rate * gradient
        print(f"Iteration {i+1}: x = {x}, f(x) = {x**2}")
    return x

# Example usage
minimize(10, 0.1, 10)

Iteration 1: x = 8.0, f(x) = 64.0
Iteration 2: x = 6.4, f(x) = 40.96000000000001
Iteration 3: x = 5.12, f(x) = 26.2144
Iteration 4: x = 4.096, f(x) = 16.777216
Iteration 5: x = 3.2768, f(x) = 10.73741824
Iteration 6: x = 2.62144, f(x) = 6.871947673600001
Iteration 7: x = 2.0971520000000003, f(x) = 4.398046511104002
Iteration 8: x = 1.6777216000000004, f(x) = 2.8147497671065613
Iteration 9: x = 1.3421772800000003, f(x) = 1.801439850948199
Iteration 10: x = 1.0737418240000003, f(x) = 1.1529215046068475


In [9]:
# Numerical overflow
import numpy as np

# Sigmoid function
# can be written in two ways:
# 1. 1/(1+exp(-x))
# 2. exp(x)/(1+exp(x))
# Each one takes care of overflow for big positive and negative numbers
# Need to combine them to avoid overflow

def sigmod_unstable(x):
    return 1/(1+np.exp(-x))

def sigmod_stable(x):
    if x < 0:
        return np.exp(x)/(1+np.exp(x))
    else:
        return 1/(1+np.exp(-x))
# Example usage
n = -100000000000
print(sigmod_unstable(n))  # This may cause overflow
print(sigmod_stable(n))  # This should work fine

0.0
0.0


  return 1/(1+np.exp(-x))
