In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
import math

## Training Set

In [2]:
p = np.array([5, 10, 15, 20]) # Input
t = np.array([2, 8, 8, 20]) # Target

In [3]:
w_layer1 = np.array([-0.1, 0.2])
b_layer1 = np.array([0.4, -1.3])
w_layer2 = np.array([0.5, 0.7])
b_layer2 = np.array([0.8])
bias_layer2 = np.array([-1])

u = 0.5

a_layer0 = 0

f1_purelin = -1

## Function to Compute

### logsig

In [4]:
def log_sigmod(a):
    return 1/(1+math.exp(1)**-a)

### Cal Error

In [5]:
def cal_error(t, a):
    return t - a

### desired matrix

In [6]:
def convert_to_desired_matrix(original_array):
    n = len(original_array)
    diagonal_values = [(1 - original_array[i]) * original_array[i] for i in range(n)]
    desired_matrix = np.diag(diagonal_values)
    return desired_matrix

### Jacobian

In [7]:
def create_jacobian_matrix(t, s_layer2, n_layer1, bias, p):
    # Create a 4x7 matrix filled with zeros
    matrix = np.zeros((4, 7))

    # Fill in the values based on the pattern described
    for i in range(len(t)):
        matrix[i, 0] = s_layer2[i * 2] * p[i]
        matrix[i, 1] = s_layer2[i * 2]
        matrix[i, 2] = s_layer2[i * 2 + 1] * p[i]
        matrix[i, 3] = s_layer2[i * 2 + 1]
        matrix[i, 4] = bias * n_layer1[i * 2]
        matrix[i, 5] = bias * n_layer1[i * 2 + 1]
        matrix[i, 6] = bias

    return matrix

### I Matrix

In [8]:
def create_I_matrix(size, diagonal_value):
    # Create a size x size identity matrix
    matrix = np.eye(size)
    
    # Set the diagonal elements to the specified value
    matrix *= diagonal_value
    
    return matrix

## Static Cal

In [9]:
a_layer1 = w_layer1.transpose()*p[0]+b_layer1.transpose()
n_layer1 = np.round(log_sigmod(a_layer1), 4)

a_layer2 = np.round(np.dot(w_layer2, n_layer1)+b_layer2, 4)

error = cal_error(t[0], a_layer2)

value1 = (1 - n_layer1[0]) * n_layer1[0]
value2 = (1 - n_layer1[1]) * n_layer1[1]

desired_matrix = np.array([[value1, 0], [0, value2]])

s_layer2 = np.round(np.dot(desired_matrix,w_layer2)*f1_purelin, 4)

print(f"a_layer1 = {w_layer1.transpose()} x {p[0]} + {b_layer1.transpose()} = {a_layer1}")
print(f"n_layer1  : {n_layer1}")
print(f"a_layer2 = {w_layer2} x {n_layer1} + {b_layer2} = {a_layer2}")
print(f"error = {error}")
print(f"s_layer2 = {desired_matrix} x {w_layer2} x {b_layer2}")
print(f"s_layer2 = {s_layer2}")

a_layer1 = [-0.1  0.2] x 5 + [ 0.4 -1.3] = [-0.1 -0.3]
n_layer1  : [0.475  0.4256]
a_layer2 = [0.5 0.7] x [0.475  0.4256] + [0.8] = [1.3354]
error = [0.6646]
s_layer2 = [[0.249375   0.        ]
 [0.         0.24446464]] x [0.5 0.7] x [0.8]
s_layer2 = [-0.1247 -0.1711]


## Parameter Recoard

In [10]:
history_error = np.array([])
history_s_layer2 = np.array([])
history_n_layey1 = np.array([])

## Interation Process

In [11]:
for i in range(len(p)):
    a_layer1 = w_layer1.transpose() * p[i] + b_layer1.transpose()
    n_layer1 = np.round(log_sigmod(a_layer1), 4)
    
    history_n_layey1 = np.append(history_n_layey1, n_layer1)

    a_layer2 = np.round(np.dot(w_layer2, n_layer1) + b_layer2, 4)

    error = cal_error(t[i], a_layer2)
    
    history_error = np.append(history_error, error)

    # value1 = (1 - n_layer1[0]) * n_layer1[0]
    # value2 = (1 - n_layer1[1]) * n_layer1[1]

    # desired_matrix = np.array([[value1, 0], [0, value2]])
    
    desired_matrix = convert_to_desired_matrix(n_layer1)

    s_layer2 = np.round(np.dot(desired_matrix,w_layer2)*f1_purelin, 4)
    
    history_s_layer2 = np.append(history_s_layer2, s_layer2)
    
    print(f"--------------------- Input P : {i+1} --------------------------")
    print(f"a_layer1 = {w_layer1.transpose()} x {p[i]} + {b_layer1.transpose()} = {a_layer1}")
    print(f"n_layer1  : {n_layer1}")
    print(f"a_layer2 = {w_layer2} x {n_layer1} + {b_layer2} = {a_layer2}")
    print(f"error = {error}")
    # print(f"s_layer2 = {desired_matrix} x {w_layer2} x {b_layer2}")
    print(f"s_layer2 = {s_layer2}")

--------------------- Input P : 1 --------------------------
a_layer1 = [-0.1  0.2] x 5 + [ 0.4 -1.3] = [-0.1 -0.3]
n_layer1  : [0.475  0.4256]
a_layer2 = [0.5 0.7] x [0.475  0.4256] + [0.8] = [1.3354]
error = [0.6646]
s_layer2 = [-0.1247 -0.1711]
--------------------- Input P : 2 --------------------------
a_layer1 = [-0.1  0.2] x 10 + [ 0.4 -1.3] = [-0.6  0.7]
n_layer1  : [0.3543 0.6682]
a_layer2 = [0.5 0.7] x [0.3543 0.6682] + [0.8] = [1.4449]
error = [6.5551]
s_layer2 = [-0.1144 -0.1552]
--------------------- Input P : 3 --------------------------
a_layer1 = [-0.1  0.2] x 15 + [ 0.4 -1.3] = [-1.1  1.7]
n_layer1  : [0.2497 0.8455]
a_layer2 = [0.5 0.7] x [0.2497 0.8455] + [0.8] = [1.5167]
error = [6.4833]
s_layer2 = [-0.0937 -0.0914]
--------------------- Input P : 4 --------------------------
a_layer1 = [-0.1  0.2] x 20 + [ 0.4 -1.3] = [-1.6  2.7]
n_layer1  : [0.168 0.937]
a_layer2 = [0.5 0.7] x [0.168 0.937] + [0.8] = [1.5399]
error = [18.4601]
s_layer2 = [-0.0699 -0.0413]


## Sum of Error Square

In [12]:
sum_error_power2 = np.round(sum(history_error**2), 4)
print("sum of error square = {}".format(sum_error_power2))

sum of error square = 426.2195


In [13]:
print(history_s_layer2)
print(history_n_layey1)

[-0.1247 -0.1711 -0.1144 -0.1552 -0.0937 -0.0914 -0.0699 -0.0413]
[0.475  0.4256 0.3543 0.6682 0.2497 0.8455 0.168  0.937 ]


## Jacobian

In [14]:
jacobian_matrix = create_jacobian_matrix(t, history_s_layer2, history_n_layey1, bias_layer2, p)
print(jacobian_matrix)

[[-0.6235 -0.1247 -0.8555 -0.1711 -0.475  -0.4256 -1.    ]
 [-1.144  -0.1144 -1.552  -0.1552 -0.3543 -0.6682 -1.    ]
 [-1.4055 -0.0937 -1.371  -0.0914 -0.2497 -0.8455 -1.    ]
 [-1.398  -0.0699 -0.826  -0.0413 -0.168  -0.937  -1.    ]]


  matrix[i, 4] = bias * n_layer1[i * 2]
  matrix[i, 5] = bias * n_layer1[i * 2 + 1]
  matrix[i, 6] = bias


In [15]:
J_T_J = np.round(np.dot(jacobian_matrix.transpose(), jacobian_matrix), 4)
print(J_T_J)

[[5.6273 0.438  5.3906 0.4704 1.2873 3.5281 4.571 ]
 [0.438  0.0423 0.4704 0.0505 0.1349 0.2742 0.4027]
 [5.3906 0.4704 5.7025 0.5467 1.4373 3.3343 4.6045]
 [0.4704 0.0505 0.5467 0.0634 0.166  0.2925 0.459 ]
 [1.2873 0.1349 1.4373 0.166  0.4417 0.8074 1.247 ]
 [3.5281 0.2742 3.3343 0.2925 0.8074 2.2205 2.8763]
 [4.571  0.4027 4.6045 0.459  1.247  2.8763 4.    ]]


In [16]:
J_T_v = np.round(np.dot(jacobian_matrix.transpose(), history_error), 4)
print(J_T_v)

[-42.8329  -2.7306 -34.8787  -2.486   -7.3583 -27.4417 -32.1631]


In [17]:
print(J_T_v.shape)

(7,)


In [18]:
matrix_uI = create_I_matrix(len(J_T_J), u)
print(matrix_uI)

[[0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.5]]


In [19]:
J_T_J_plus_matrix_uI = J_T_J+matrix_uI

print(J_T_J_plus_matrix_uI)

[[6.1273 0.438  5.3906 0.4704 1.2873 3.5281 4.571 ]
 [0.438  0.5423 0.4704 0.0505 0.1349 0.2742 0.4027]
 [5.3906 0.4704 6.2025 0.5467 1.4373 3.3343 4.6045]
 [0.4704 0.0505 0.5467 0.5634 0.166  0.2925 0.459 ]
 [1.2873 0.1349 1.4373 0.166  0.9417 0.8074 1.247 ]
 [3.5281 0.2742 3.3343 0.2925 0.8074 2.7205 2.8763]
 [4.571  0.4027 4.6045 0.459  1.247  2.8763 4.5   ]]


In [20]:
delta_x = np.round(np.dot(-(np.linalg.inv(J_T_J_plus_matrix_uI)), J_T_v), 4)
print(delta_x)

[ 6.203  -0.3878 -2.5879 -1.0277 -2.1216  4.7188  1.2058]


In [21]:
delta_weight_bias = np.concatenate((w_layer1, b_layer1, w_layer2, b_layer2))
print(delta_weight_bias)

[-0.1  0.2  0.4 -1.3  0.5  0.7  0.8]


In [22]:
result = delta_x + delta_weight_bias

print(result)

[ 6.103  -0.1878 -2.1879 -2.3277 -1.6216  5.4188  2.0058]
