#**Pattern Recognition - Machine Learning** | Assignment 1




# 1) **Operations with Vectors and Matrices**





i) Creating matrices of integers for processing.

In [None]:
import numpy as np

# We define the low and upper bounds of the integer values in the matrices, without affecting the general idea.
# Seed: last 4 digits of the student ID: 0021->21
np.random.seed(21)

# Creating a 3x4 matrix of integers
X = np.random.randint(low=-500, high=500, size=(3, 4))
print("Matrix X (3x4):\n")
print(X)

# Creating a 4x3 matrix of integers
Y = np.random.randint(low=-500, high=500, size=(4, 3))
print("----------------------------------------------------")
print("Matrix Y (4x3):\n")
print(Y)

ii) Creating vectors of integers for processing

In [None]:
# We define the low and upper bounds of the integer values in the vectors, without affecting the general idea

# Seed: last 4 digits of the student ID: 0021->21
np.random.seed(21)

# Creating a 4-dimensional vector of integers, a
a = np.random.randint(low=-500, high=500, size=(4,))
print("Vector a:\n")
print(a)

# Creating a 4-dimensional vector of integers, b
b = np.random.randint(low=-500, high=500, size=(4,))
print("----------------------------------------------------")
print("Vector b:\n")
print(b)


#1.1) Inner product of vectors a and b ($a^T b$)

In [None]:
# Use of np.inner
c = np.inner(a, b)
print("c = \n")
print(c)



#1.2) Product of matrix X and vector a ($Xa$)

In [None]:
# We can omit the transpose '.T' operation when multiplying matrix X with vector a, without affecting the result.
res = X*a.T
print("res = \n")
print(res)


# 1.3) Product of two matrices X and Y ($XY$)

In [None]:
# Multiplication of X, Y matrices
mult_res = np.dot(X, Y)

print("mult_res = XY = \n")
print(mult_res)

# 1.4) Euclidean norm of vector a

In [None]:
norm=np.linalg.norm(a)
print("norm of a = \n")
print(norm)

# 2) **Calculation of Derivatives**

# 2.1) Derivative of $f(x)$
We use rules 89 and 61 (pages 10-11) from the Matrix Cookbook.

# 2.2) Global minimum problem

We use rules 4, 5, 15, 101, and 116 from Matrix Cookbook.

# 3) **Gradient Descent Algorithm**

The first part of the code implements the Gradient Descent algorithm with a learning rate of 0.5 and 10 iterations. It also defines a precision of 0.0001. This first part of the code represents the initial simple approach without any changes to the learning rate or the iterations of the algorithm.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from decimal import *

# definitions of the functions and their partial derivatives
def f1_x1_x2(x1, x2):
    return (x1-2) ** 2 + (x2-3) ** 2

def f2_x1_x2(x1, x2):
    return (1 - (x2-3)) ** 2 + 20 * ((x1+3) - (x2-3) ** 2) ** 2

def df1_x1(x1, x2):
    return (2 * (x1-2))

def df1_x2(x1, x2):
    return (2 * (x1-3))

def df2_x1(x1, x2):
    return (40 * (x1-3) - 40 * (x2-3) ** 2)

def df2_x2(x1, x2):
    return (2 * (x2-4) - 80 * (x1+3) * (x2-3) + 80 * (x2-3) ** 3)

# initializing variables

rate = 0.5                      # Learning rate
precision = 0.0001
curX1 = 0
curX2 = 0
maxIters = 10                   # maximum number of iterations (modifiable)
iters = 0                       # iteration counter
iter_list1 = []                 # list stores the iteration number, to use later in the plot
iter_list2 = []                 # list stores the iteration number, to use later in the plot
f1_values_list = []
f2_values_list = []

print("\n-------------------------------------f1(x)-----------------------------------------------\n")

# Iterations for f1(x)
for aIter in range(maxIters):

      prevX1 = curX1
      curX1 = curX1 - rate * df1_x1(curX1, curX2)
      diff1 = abs(curX1-prevX1)
      prevX2 = curX2
      curX2 = curX2 - rate * df1_x2(curX1, curX2)
      diff2 = abs(curX2 - prevX2)
      print("Iteration ", aIter + 1, "\n(x1,x2)=(",curX1, ",", curX2, ") => f1(", curX1, ",", curX2, ")=", f1_x1_x2(curX1, curX2))  # Print iterations

      f1_values_list.append(float(f1_x1_x2(curX1, curX2)))  # contains values of function f1 at each (x1,x2) point
      iter_list1.append(int(aIter+1))                       # contains iterations number for printing later

      if diff1 <= precision or diff2 <= precision:

          # if one/both of the 2 variables x1 or x2 (prevX and curX) are too close to each other (less than the precision) then terminate the algorithm
          print("The local minimum for f1(x) = (x1-1)^2+(x2-3)^2 occurs at (x1,x2)=(", curX1, ",", curX2, ") after underflowing"
            " precision threshold and is", f1_x1_x2(curX1, curX2))
          break


# initializing variables
cur_X1 = 0
cur_X2 = 0

print("\n-------------------------------------f2(x)-----------------------------------------------\n")

# Iterations for f2(x)
for aIter in range(maxIters):

      prev_X1 = cur_X1
      cur_X1 = cur_X1 - rate * df2_x1(cur_X1, cur_X2)
      diff1 = abs(cur_X1-prev_X1)
      prev_X2 = cur_X2
      cur_X2 = cur_X2 - rate * df2_x2(cur_X1, cur_X2)
      diff2 = abs(cur_X2 - prev_X2)
      print("Iteration ", aIter+1, "\n(x1,x2)=(", cur_X1, ",", cur_X2, ") => f2(", cur_X1, ",", cur_X2, ")=", f2_x1_x2(cur_X1, cur_X2))  # Print iterations

      f2_values_list.append(float(f2_x1_x2(cur_X1, cur_X2)))        # contains values of function f1 at each (x1,x2) point
      iter_list2.append(int(aIter + 1))                             # contains iterations number for printing later

      if diff1 <= precision or diff2 <= precision:

        # if prevX and curX are too close to each other (less than the precision) then terminate the algorithm
        print("The local minimum for f2(x) = (1-(x2-3))^2+20*((x1+3)-(x2-3)^2)^2 occurs at (x1,x2)=(", cur_X1, ",", cur_X2, ") after underflowing"
            " precision threshold and is", f2_x1_x2(cur_X1, cur_X2))
        break

# Plotting for f1

plt.figure(figsize = (8,6))
plt.title("rate=" + str(rate) + "  precision=" + str(precision))
plt.scatter(iter_list1, f1_values_list, marker='.', color='red', s=60)
plt.plot([min(iter_list1), max(iter_list1)], [min(f1_values_list), max(f1_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
plt.xlabel("Iterations")
plt.ylabel("f1(x1,x2)")
plt.show()

# Plotting for f2

plt.figure(figsize = (8,6))
plt.title("rate=" + str(rate) + "  precision=" + str(precision))
plt.scatter(iter_list2, f2_values_list, marker='.', color='red', s=60)
plt.plot([min(iter_list2), max(iter_list2)], [min(f2_values_list), max(f2_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
plt.xlabel("Iterations")
plt.ylabel("f2(x1,x2)")
plt.show()



We observe that there is an overflow in the float representation during the 4th iteration of the algorithm for f2. This is expected because the learning rate is quite large, resulting in the curX (new x) values increasing rapidly and consequently causing a significant increase in the values of the derivatives df2_x1 and df2_x2 with respect to x1 and x2. Therefore, a lower learning rate should be set to ensure that the transitions between x values are much smaller, leading to a slower convergence rate in each iteration (with a smaller "step"). This code segment, although designed to include plotting, cannot proceed due to the overflow error.

The second part of the question (3) asks for the variation of the learning rate and the number of iterations performed by the algorithm. The following code segment implements the algorithm starting with a learning rate of 0.5 and a precision of 0.0001. The algorithm is implemented with a while loop to present the results for all possible combinations of learning rate and precision. After completing the algorithm, we divide the initial parameters by 10. The while loop structure allows for setting a learning rate and testing a range of precisions from 0.0001 to 0.00000001 with a division by 10 each time. Once this loop finishes, the learning rate and precision are divided by 10 again, and the previous set of values is repeated. The learning rate starts from 0.0001 (to avoid memory overflows) and goes down to 0.00000001. Additionally, each resulting plot includes the learning rate and precision being used, while the remaining results and intermediate stages of the algorithm are displayed in the terminal. In total, there are 25 combinations of rate and precision for 2 functions (a total of 50 plots), while the number of iterations remains constant at 200 (which can be manually changed).

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# definitions of the functions and their partial derivatives
def f1_x1_x2(x1, x2):
    return (x1-2) ** 2 + (x2-3) ** 2

def f2_x1_x2(x1, x2):
    return (1 - (x2-3)) ** 2 + 20 * ((x1+3) - (x2-3) ** 2) ** 2

def df1_x1(x1, x2):
    return (2 * (x1-2))

def df1_x2(x1, x2):
    return (2 * (x1-3))

def df2_x1(x1, x2):
    return (40 * (x1-3) - 40 * (x2-3) ** 2)

def df2_x2(x1, x2):
    return (2 * (x2-4) - 80 * (x1+3) * (x2-3) + 80 * (x2-3) ** 3)

# initializing variables

rate = 0.0001                          # Learning rate

while (rate >= 0.00000001):

    precision = 0.0001
    while (precision >= 0.00000001):

        curX1 = 0
        curX2 = 0
        maxIters = 200                  # maximum number of iterations (modifiable)
        iters = 0                       # iteration counter
        iter_list1 = []                 # list stores the iteration number, to use later in the plot
        iter_list2 = []                 # list stores the iteration number, to use later in the plot
        f1_values_list = []             # list stores the iteration number, to use later in the plot
        f2_values_list = []             # list stores the iteration number, to use later in the plot

        print("\n------------------------f1(x)-------rate =", rate,", precision =", precision,"----------------------------------------\n")

        # Iterations for f1(x)
        for aIter in range(maxIters):

              prevX1 = curX1
              curX1 = curX1 - rate * df1_x1(curX1, curX2)
              diff1 = abs(curX1-prevX1)
              prevX2 = curX2
              curX2 = curX2 - rate * df1_x2(curX1, curX2)
              diff2 = abs(curX2 - prevX2)
              print("Iteration ", aIter + 1, "\n(x1,x2)=(",curX1, ",", curX2, ") => f1(", curX1, ",", curX2, ")=", f1_x1_x2(curX1, curX2))  # Print iterations

              f1_values_list.append(float(f1_x1_x2(curX1, curX2)))  # contains values of function f1 at each (x1,x2) point
              iter_list1.append(int(aIter+1))                       # contains iterations number for printing later

              if diff1 <= precision or diff2 <= precision:

                    # if one/both of the 2 variables x1 or x2 (prevX and curX) are too close to each other (less than the precision) then terminate the algorithm
                    print("The local minimum for f1(x) = (x1-1)^2+(x2-3)^2 occurs at (x1,x2)=(", curX1, ",", curX2, ") after underflowing"
                          " precision threshold and is", f1_x1_x2(curX1, curX2))
                    break


        # initializing variables

        cur_X1 = 0
        cur_X2 = 0

        print("\n------------------------f2(x)-------rate =", rate,", precision =", precision,"----------------------------------------\n")

        # Iterations for f2(x)
        for aIter in range(maxIters):

              prev_X1 = cur_X1
              cur_X1 = cur_X1 - rate * df2_x1(cur_X1, cur_X2)
              diff1 = abs(cur_X1-prev_X1)
              prev_X2 = cur_X2
              cur_X2 = cur_X2 - rate * df2_x2(cur_X1, cur_X2)
              diff2 = abs(cur_X2 - prev_X2)
              print("Iteration ", aIter+1, "\n(x1,x2)=(", cur_X1, ",", cur_X2, ") => f2(", cur_X1, ",", cur_X2, ")=", f2_x1_x2(cur_X1, cur_X2))  # Print iterations

              f2_values_list.append(float(f2_x1_x2(cur_X1, cur_X2)))        # contains values of function f1 at each (x1,x2) point
              iter_list2.append(int(aIter + 1))                             # contains iterations number for printing later

              if diff1 <= precision or diff2 <= precision:

                    # if one/both of the 2 variables x1 or x2 (prevX and curX) are too close to each other (less than the precision) then terminate the algorithm
                    print("The local minimum for f2(x) = (1-(x2-3))^2+20*((x1+3)-(x2-3)^2)^2 occurs at (x1,x2)=(", cur_X1, ",", cur_X2, ") after underflowing"
                        " precision threshold and is", f2_x1_x2(cur_X1, cur_X2))
                    break

        # Plotting for f1

        plt.figure(figsize = (8,6))
        plt.title("rate=" + str(rate) + "  precision=" + str(precision))
        plt.scatter(iter_list1, f1_values_list, marker='.', color='red', s=0.6)
        plt.plot([min(iter_list1), max(iter_list1)], [min(f1_values_list), max(f1_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
        plt.xlabel("Iterations")
        plt.ylabel("f1(x1,x2)")
        plt.show()

        # Plotting for f2

        plt.figure(figsize = (8,6))
        plt.title("rate=" + str(rate) + "  precision=" + str(precision))
        plt.scatter(iter_list2, f2_values_list, marker='.', color='red', s=0.6)
        plt.plot([min(iter_list2), max(iter_list2)], [min(f2_values_list), max(f2_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
        plt.xlabel("Iterations")
        plt.ylabel("f2(x1,x2)")
        plt.show()
        precision = precision / 10      # preparing for next combination of learning rate and precision
    rate = rate / 10

In the next code segment, the 25 combinations of learning rate and precision are repeated, but each combination is applied with a different number of iterations (starting from T=50 and increasing by 100). With each change in the learning rate, T is also modified accordingly. This allows for a better comparison between the previous plots and the new ones.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# definitions of the functions and their partial derivatives
def f1_x1_x2(x1, x2):
    return (x1-2) ** 2 + (x2-3) ** 2

def f2_x1_x2(x1, x2):
    return (1 - (x2-3)) ** 2 + 20 * ((x1+3) - (x2-3) ** 2) ** 2

def df1_x1(x1, x2):
    return (2 * (x1-2))

def df1_x2(x1, x2):
    return (2 * (x1-3))

def df2_x1(x1, x2):
    return (40 * (x1-3) - 40 * (x2-3) ** 2)

def df2_x2(x1, x2):
    return (2 * (x2-4) - 80 * (x1+3) * (x2-3) + 80 * (x2-3) ** 3)

# initializing variables

rate = 0.0001                          # Learning rate
maxIters = 50                         # maximum number of iterations (will change in the loop)

while (rate >= 0.00000001):

    precision = 0.0001
    while (precision >= 0.00000001):

        curX1 = 0
        curX2 = 0
        iters = 0                       # iteration counter
        iter_list1 = []                 # list stores the iteration number, to use later in the plot
        iter_list2 = []                 # list stores the iteration number, to use later in the plot
        f1_values_list = []             # list stores the iteration number, to use later in the plot
        f2_values_list = []             # list stores the iteration number, to use later in the plot

        print("\n------------------------f1(x)-------rate =", rate,", precision =", precision,"----------------------------------------\n")

        # Iterations for f1(x)
        for aIter in range(maxIters):

              prevX1 = curX1
              curX1 = curX1 - rate * df1_x1(curX1, curX2)
              diff1 = abs(curX1-prevX1)
              prevX2 = curX2
              curX2 = curX2 - rate * df1_x2(curX1, curX2)
              diff2 = abs(curX2 - prevX2)
              print("Iteration ", aIter + 1, "\n(x1,x2)=(",curX1, ",", curX2, ") => f1(", curX1, ",", curX2, ")=", f1_x1_x2(curX1, curX2))  # Print iterations

              f1_values_list.append(float(f1_x1_x2(curX1, curX2)))  # contains values of function f1 at each (x1,x2) point
              iter_list1.append(int(aIter+1))                       # contains iterations number for printing later

              if diff1 <= precision or diff2 <= precision:

                    # if one/both of the 2 variables x1 or x2 (prevX and curX) are too close to each other (less than the precision) then terminate the algorithm
                    print("The local minimum for f1(x) = (x1-1)^2+(x2-3)^2 occurs at (x1,x2)=(", curX1, ",", curX2, ") after underflowing"
                          " precision threshold and is", f1_x1_x2(curX1, curX2))
                    break


        # initializing variables

        cur_X1 = 0
        cur_X2 = 0

        print("\n------------------------f2(x)-------rate =", rate,", precision =", precision,"----------------------------------------\n")

        # Iterations for f2(x)
        for aIter in range(maxIters):

              prev_X1 = cur_X1
              cur_X1 = cur_X1 - rate * df2_x1(cur_X1, cur_X2)
              diff1 = abs(cur_X1-prev_X1)
              prev_X2 = cur_X2
              cur_X2 = cur_X2 - rate * df2_x2(cur_X1, cur_X2)
              diff2 = abs(cur_X2 - prev_X2)
              print("Iteration ", aIter+1, "\n(x1,x2)=(", cur_X1, ",", cur_X2, ") => f2(", cur_X1, ",", cur_X2, ")=", f2_x1_x2(cur_X1, cur_X2))  # Print iterations

              f2_values_list.append(float(f2_x1_x2(cur_X1, cur_X2)))        # contains values of function f1 at each (x1,x2) point
              iter_list2.append(int(aIter + 1))                             # contains iterations number for printing later

              if diff1 <= precision or diff2 <= precision:

                    # if one/both of the 2 variables x1 or x2 (prevX and curX) are too close to each other (less than the precision) then terminate the algorithm
                    print("The local minimum for f2(x) = (1-(x2-3))^2+20*((x1+3)-(x2-3)^2)^2 occurs at (x1,x2)=(", cur_X1, ",", cur_X2, ") after underflowing"
                        " precision threshold and is", f2_x1_x2(cur_X1, cur_X2))
                    break

        # Plotting for f1

        plt.figure(figsize = (8,6))
        plt.title("rate=" + str(rate) + "  precision=" + str(precision))
        plt.scatter(iter_list1, f1_values_list, marker='.', color='red', s=0.6)
        plt.plot([min(iter_list1), max(iter_list1)], [min(f1_values_list), max(f1_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
        plt.xlabel("Iterations")
        plt.ylabel("f1(x1,x2)")
        plt.show()

        # Plotting for f2

        plt.figure(figsize = (8,6))
        plt.title("rate=" + str(rate) + "  precision=" + str(precision))
        plt.scatter(iter_list2, f2_values_list, marker='.', color='red', s=0.6)
        plt.plot([min(iter_list2), max(iter_list2)], [min(f2_values_list), max(f2_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
        plt.xlabel("Iterations")
        plt.ylabel("f2(x1,x2)")
        plt.show()
        precision = precision / 10      # preparing for next combination of learning rate and precision
    rate = rate / 10
    maxIters = maxIters + 100

* The question accepts a qualitative rather than a quantitative answer: in general, each problem is distinct with its own parameters and constraints. Therefore, it is important to properly combine the learning rate, precision, and iterations.

1. Generally, when we have a small learning rate, meaning small "steps" towards the optimal point, we need many iterations for the algorithm to converge to it, especially if the initial values are far from the optimal point.

2. On the other hand, if the learning rate is large and the initial values are far from the optimal point, fewer iterations of the algorithm will be required.

3. However, if the learning rate is large and the initial values are close to the optimal point, the algorithm may either diverge from the optimal point or get trapped around it without ever approaching it satisfactorily.

* For large values of the learning rate, the algorithm traverses the different X values with large "steps." This can cause it to overshoot the optimal point (minimum in this case) and continue beyond it, resulting in unstable results and unpredictability.
* Conversely, if the learning rate is very small, the algorithm moves through the different X values with small "steps" and takes a long time to approach the optimal point. This implies an increased computational and time complexity in solving the problem.
* If we start to modify the initial values of the variables for the algorithm's initialization, we will observe that if these values are very far from the optimal point, the algorithm will converge very slowly to it or may not converge satisfactorily unless there are enough iterations. On the other hand, if the initial values of the variables for the initialization of the algorithm are close to the optimal point, the algorithm may converge very quickly to it.

In the following code segments, we observe what happens when we only modify the number of iterations for the two functions while keeping the other parameters constant.


**400 iterations**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from decimal import *

# definitions of the functions and their partial derivatives
def f1_x1_x2(x1, x2):
    return (x1-2) ** 2 + (x2-3) ** 2

def f2_x1_x2(x1, x2):
    return (1 - (x2-3)) ** 2 + 20 * ((x1+3) - (x2-3) ** 2) ** 2

def df1_x1(x1, x2):
    return (2 * (x1-2))

def df1_x2(x1, x2):
    return (2 * (x1-3))

def df2_x1(x1, x2):
    return (40 * (x1-3) - 40 * (x2-3) ** 2)

def df2_x2(x1, x2):
    return (2 * (x2-4) - 80 * (x1+3) * (x2-3) + 80 * (x2-3) ** 3)

# initializing variables

rate = 0.0005                      # Learning rate
precision = 0.000001
curX1 = 0
curX2 = 0
maxIters = 400                   # maximum number of iterations (modifiable)
iters = 0                       # iteration counter
iter_list1 = []                 # list stores the iteration number, to use later in the plot
iter_list2 = []                 # list stores the iteration number, to use later in the plot
f1_values_list = []
f2_values_list = []

print("\n-------------------------------------f1(x)-----------------------------------------------\n")

# Iterations for f1(x)
for aIter in range(maxIters):

      prevX1 = curX1
      curX1 = curX1 - rate * df1_x1(curX1, curX2)
      diff1 = abs(curX1-prevX1)
      prevX2 = curX2
      curX2 = curX2 - rate * df1_x2(curX1, curX2)
      diff2 = abs(curX2 - prevX2)
      print("Iteration ", aIter + 1, "\n(x1,x2)=(",curX1, ",", curX2, ") => f1(", curX1, ",", curX2, ")=", f1_x1_x2(curX1, curX2))  # Print iterations

      f1_values_list.append(float(f1_x1_x2(curX1, curX2)))  # contains values of function f1 at each (x1,x2) point
      iter_list1.append(int(aIter+1))                       # contains iterations number for printing later

      if diff1 <= precision or diff2 <= precision:

          # if one/both of the 2 variables x1 or x2 (prevX and curX) are too close to each other (less than the precision) then terminate the algorithm
          print("The local minimum for f1(x) = (x1-1)^2+(x2-3)^2 occurs at (x1,x2)=(", curX1, ",", curX2, ") after underflowing"
            " precision threshold and is", f1_x1_x2(curX1, curX2))
          break


# initializing variables
cur_X1 = 0
cur_X2 = 0

print("\n-------------------------------------f2(x)-----------------------------------------------\n")

# Iterations for f2(x)
for aIter in range(maxIters):

      prev_X1 = cur_X1
      cur_X1 = cur_X1 - rate * df2_x1(cur_X1, cur_X2)
      diff1 = abs(cur_X1-prev_X1)
      prev_X2 = cur_X2
      cur_X2 = cur_X2 - rate * df2_x2(cur_X1, cur_X2)
      diff2 = abs(cur_X2 - prev_X2)
      print("Iteration ", aIter+1, "\n(x1,x2)=(", cur_X1, ",", cur_X2, ") => f2(", cur_X1, ",", cur_X2, ")=", f2_x1_x2(cur_X1, cur_X2))  # Print iterations

      f2_values_list.append(float(f2_x1_x2(cur_X1, cur_X2)))        # contains values of function f1 at each (x1,x2) point
      iter_list2.append(int(aIter + 1))                             # contains iterations number for printing later

      if diff1 <= precision or diff2 <= precision:

        # if prevX and curX are too close to each other (less than the precision) then terminate the algorithm
        print("The local minimum for f2(x) = (1-(x2-3))^2+20*((x1+3)-(x2-3)^2)^2 occurs at (x1,x2)=(", cur_X1, ",", cur_X2, ") after underflowing"
            " precision threshold and is", f2_x1_x2(cur_X1, cur_X2))
        break

# Plotting for f1

plt.figure(figsize = (8,6))
plt.title("rate=" + str(rate) + "  precision=" + str(precision))
plt.scatter(iter_list1, f1_values_list, marker='.', color='red', s=0.5)
plt.plot([min(iter_list1), max(iter_list1)], [min(f1_values_list), max(f1_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
plt.xlabel("Iterations")
plt.ylabel("f1(x1,x2)")
plt.show()

# Plotting for f2

plt.figure(figsize = (8,6))
plt.title("rate=" + str(rate) + "  precision=" + str(precision))
plt.scatter(iter_list2, f2_values_list, marker='.', color='red', s=0.5)
plt.plot([min(iter_list2), max(iter_list2)], [min(f2_values_list), max(f2_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
plt.xlabel("Iterations")
plt.ylabel("f2(x1,x2)")
plt.show()


**200 iterations**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from decimal import *

# definitions of the functions and their partial derivatives
def f1_x1_x2(x1, x2):
    return (x1-2) ** 2 + (x2-3) ** 2

def f2_x1_x2(x1, x2):
    return (1 - (x2-3)) ** 2 + 20 * ((x1+3) - (x2-3) ** 2) ** 2

def df1_x1(x1, x2):
    return (2 * (x1-2))

def df1_x2(x1, x2):
    return (2 * (x1-3))

def df2_x1(x1, x2):
    return (40 * (x1-3) - 40 * (x2-3) ** 2)

def df2_x2(x1, x2):
    return (2 * (x2-4) - 80 * (x1+3) * (x2-3) + 80 * (x2-3) ** 3)

# initializing variables

rate = 0.0005                      # Learning rate
precision = 0.000001
curX1 = 0
curX2 = 0
maxIters = 200                   # maximum number of iterations (modifiable)
iters = 0                       # iteration counter
iter_list1 = []                 # list stores the iteration number, to use later in the plot
iter_list2 = []                 # list stores the iteration number, to use later in the plot
f1_values_list = []
f2_values_list = []

print("\n-------------------------------------f1(x)-----------------------------------------------\n")

# Iterations for f1(x)
for aIter in range(maxIters):

      prevX1 = curX1
      curX1 = curX1 - rate * df1_x1(curX1, curX2)
      diff1 = abs(curX1-prevX1)
      prevX2 = curX2
      curX2 = curX2 - rate * df1_x2(curX1, curX2)
      diff2 = abs(curX2 - prevX2)
      print("Iteration ", aIter + 1, "\n(x1,x2)=(",curX1, ",", curX2, ") => f1(", curX1, ",", curX2, ")=", f1_x1_x2(curX1, curX2))  # Print iterations

      f1_values_list.append(float(f1_x1_x2(curX1, curX2)))  # contains values of function f1 at each (x1,x2) point
      iter_list1.append(int(aIter+1))                       # contains iterations number for printing later

      if diff1 <= precision or diff2 <= precision:

          # if one/both of the 2 variables x1 or x2 (prevX and curX) are too close to each other (less than the precision) then terminate the algorithm
          print("The local minimum for f1(x) = (x1-1)^2+(x2-3)^2 occurs at (x1,x2)=(", curX1, ",", curX2, ") after underflowing"
            " precision threshold and is", f1_x1_x2(curX1, curX2))
          break


# initializing variables
cur_X1 = 0
cur_X2 = 0

print("\n-------------------------------------f2(x)-----------------------------------------------\n")

# Iterations for f2(x)
for aIter in range(maxIters):

      prev_X1 = cur_X1
      cur_X1 = cur_X1 - rate * df2_x1(cur_X1, cur_X2)
      diff1 = abs(cur_X1-prev_X1)
      prev_X2 = cur_X2
      cur_X2 = cur_X2 - rate * df2_x2(cur_X1, cur_X2)
      diff2 = abs(cur_X2 - prev_X2)
      print("Iteration ", aIter+1, "\n(x1,x2)=(", cur_X1, ",", cur_X2, ") => f2(", cur_X1, ",", cur_X2, ")=", f2_x1_x2(cur_X1, cur_X2))  # Print iterations

      f2_values_list.append(float(f2_x1_x2(cur_X1, cur_X2)))        # contains values of function f1 at each (x1,x2) point
      iter_list2.append(int(aIter + 1))                             # contains iterations number for printing later

      if diff1 <= precision or diff2 <= precision:

        # if prevX and curX are too close to each other (less than the precision) then terminate the algorithm
        print("The local minimum for f2(x) = (1-(x2-3))^2+20*((x1+3)-(x2-3)^2)^2 occurs at (x1,x2)=(", cur_X1, ",", cur_X2, ") after underflowing"
            " precision threshold and is", f2_x1_x2(cur_X1, cur_X2))
        break

# Plotting for f1

plt.figure(figsize = (8,6))
plt.title("rate=" + str(rate) + "  precision=" + str(precision))
plt.scatter(iter_list1, f1_values_list, marker='.', color='red', s=0.5)
plt.plot([min(iter_list1), max(iter_list1)], [min(f1_values_list), max(f1_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
plt.xlabel("Iterations")
plt.ylabel("f1(x1,x2)")
plt.show()

# Plotting for f2

plt.figure(figsize = (8,6))
plt.title("rate=" + str(rate) + "  precision=" + str(precision))
plt.scatter(iter_list2, f2_values_list, marker='.', color='red', s=0.5)
plt.plot([min(iter_list2), max(iter_list2)], [min(f2_values_list), max(f2_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
plt.xlabel("Iterations")
plt.ylabel("f2(x1,x2)")
plt.show()


**800 iterations**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from decimal import *

# definitions of the functions and their partial derivatives
def f1_x1_x2(x1, x2):
    return (x1-2) ** 2 + (x2-3) ** 2

def f2_x1_x2(x1, x2):
    return (1 - (x2-3)) ** 2 + 20 * ((x1+3) - (x2-3) ** 2) ** 2

def df1_x1(x1, x2):
    return (2 * (x1-2))

def df1_x2(x1, x2):
    return (2 * (x1-3))

def df2_x1(x1, x2):
    return (40 * (x1-3) - 40 * (x2-3) ** 2)

def df2_x2(x1, x2):
    return (2 * (x2-4) - 80 * (x1+3) * (x2-3) + 80 * (x2-3) ** 3)

# initializing variables

rate = 0.0005                      # Learning rate
precision = 0.000001
curX1 = 0
curX2 = 0
maxIters = 800                   # maximum number of iterations (modifiable)
iters = 0                       # iteration counter
iter_list1 = []                 # list stores the iteration number, to use later in the plot
iter_list2 = []                 # list stores the iteration number, to use later in the plot
f1_values_list = []
f2_values_list = []

print("\n-------------------------------------f1(x)-----------------------------------------------\n")

# Iterations for f1(x)
for aIter in range(maxIters):

      prevX1 = curX1
      curX1 = curX1 - rate * df1_x1(curX1, curX2)
      diff1 = abs(curX1-prevX1)
      prevX2 = curX2
      curX2 = curX2 - rate * df1_x2(curX1, curX2)
      diff2 = abs(curX2 - prevX2)
      print("Iteration ", aIter + 1, "\n(x1,x2)=(",curX1, ",", curX2, ") => f1(", curX1, ",", curX2, ")=", f1_x1_x2(curX1, curX2))  # Print iterations

      f1_values_list.append(float(f1_x1_x2(curX1, curX2)))  # contains values of function f1 at each (x1,x2) point
      iter_list1.append(int(aIter+1))                       # contains iterations number for printing later

      if diff1 <= precision or diff2 <= precision:

          # if one/both of the 2 variables x1 or x2 (prevX and curX) are too close to each other (less than the precision) then terminate the algorithm
          print("The local minimum for f1(x) = (x1-1)^2+(x2-3)^2 occurs at (x1,x2)=(", curX1, ",", curX2, ") after underflowing"
            " precision threshold and is", f1_x1_x2(curX1, curX2))
          break


# initializing variables
cur_X1 = 0
cur_X2 = 0

print("\n-------------------------------------f2(x)-----------------------------------------------\n")

# Iterations for f2(x)
for aIter in range(maxIters):

      prev_X1 = cur_X1
      cur_X1 = cur_X1 - rate * df2_x1(cur_X1, cur_X2)
      diff1 = abs(cur_X1-prev_X1)
      prev_X2 = cur_X2
      cur_X2 = cur_X2 - rate * df2_x2(cur_X1, cur_X2)
      diff2 = abs(cur_X2 - prev_X2)
      print("Iteration ", aIter+1, "\n(x1,x2)=(", cur_X1, ",", cur_X2, ") => f2(", cur_X1, ",", cur_X2, ")=", f2_x1_x2(cur_X1, cur_X2))  # Print iterations

      f2_values_list.append(float(f2_x1_x2(cur_X1, cur_X2)))        # contains values of function f1 at each (x1,x2) point
      iter_list2.append(int(aIter + 1))                             # contains iterations number for printing later

      if diff1 <= precision or diff2 <= precision:

        # if prevX and curX are too close to each other (less than the precision) then terminate the algorithm
        print("The local minimum for f2(x) = (1-(x2-3))^2+20*((x1+3)-(x2-3)^2)^2 occurs at (x1,x2)=(", cur_X1, ",", cur_X2, ") after underflowing"
            " precision threshold and is", f2_x1_x2(cur_X1, cur_X2))
        break

# Plotting for f1

plt.figure(figsize = (8,6))
plt.title("rate=" + str(rate) + "  precision=" + str(precision))
plt.scatter(iter_list1, f1_values_list, marker='.', color='red', s=0.5)
plt.plot([min(iter_list1), max(iter_list1)], [min(f1_values_list), max(f1_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
plt.xlabel("Iterations")
plt.ylabel("f1(x1,x2)")
plt.show()

# Plotting for f2

plt.figure(figsize = (8,6))
plt.title("rate=" + str(rate) + "  precision=" + str(precision))
plt.scatter(iter_list2, f2_values_list, marker='.', color='red', s=0.5)
plt.plot([min(iter_list2), max(iter_list2)], [min(f2_values_list), max(f2_values_list)], color='blue',markerfacecolor='red', markersize=10,linestyle='dashed')
plt.xlabel("Iterations")
plt.ylabel("f2(x1,x2)")
plt.show()


We observe that in some cases, the algorithm approximates the optimal point at the beginning, in others it approaches it at an intermediate iteration rather than the last one, while in some cases, it gets "trapped" and fails to converge.

