In [1]:
# import matplotlib
# import numpy
# import scipy
# import sys

# print("Python version:", sys.version)
# print("Matplotlib version:", matplotlib.__version__)
# print("Numpy version:", numpy.__version__)
# print("Scipy version:", scipy.__version__)


In [2]:
%matplotlib notebook

In [3]:
import random
import numpy as np
import matplotlib.pyplot as plt
import math

np.random.seed(1)

def objectfunc(x,y):
  out = 2*x**2+5*y**2+10
  return out

def splitData(X, rate):
    X_train = X[int(X.shape[0] * rate):]
    X_val = X[:int(X.shape[0] * rate)]
    return X_train, X_val

def shuffle(X):
    np.random.seed(10)
    randomList = np.arange(X.shape[0])
    np.random.shuffle(randomList)
    return X[randomList]

def normalize(x_train):
    x_train_norm = (x_train - np.mean(x_train, axis=0)) / (np.max(x_train, axis=0) - np.min(x_train, axis=0))
    return x_train_norm

def denormalize(norm_value, mean, range_):
    denorm_value = norm_value * range_ + mean
    return denorm_value


# 生成訓練數據
def generate_data(num_samples):
    dataset = []
    for _ in range(num_samples):
        # 生成兩個隨機1-100
        x = np.random.randint(1,100)
        y = np.random.randint(1,100)

        out = objectfunc(x,y)

        # 將兩個數作為輸入特徵，和作為輸出標籤，存為一個樣本
        sample = [x, y, out]
        dataset.append(sample)
    return np.array(dataset)

# 生成一萬個訓練數據
dataset = generate_data(10000)

print("訓練數據:")
for i in range(5):
    print("樣本:", dataset[i])

dataset = shuffle(dataset)
dataset_train, dataset_val = splitData(dataset, 0.2)

train_label = dataset_train[:, -1]
train_label_norm = normalize(train_label)

x_train = dataset_train[:, 0:2]
x_train_norm = normalize(x_train)

test_label = dataset_val[:, -1]
test_label_norm = normalize(test_label)

x_val = dataset_val[:, 0:2]
x_val_norm = normalize(x_val)

print(x_train_norm)
print(train_label_norm)
print(x_val_norm)
print(test_label_norm)
x_train=x_train.T
x_train_norm=x_train_norm.T
x_train.shape[0]
x_train_norm.shape[0]
train_label_norm = train_label_norm.reshape(-1, 1)


訓練數據:
樣本: [  38   13 3743]
樣本: [   73    10 11168]
樣本: [   76     6 11742]
樣本: [   80    65 33935]
樣本: [ 17   2 608]
[[-0.18205995  0.34716199]
 [-0.19226403  0.04103954]
 [-0.44736607 -0.38753189]
 ...
 [ 0.11385842  0.10226403]
 [-0.05961097  0.1736926 ]
 [ 0.31794005 -0.35691964]]
[ 0.20844525 -0.09692131 -0.32727322 ...  0.03416373  0.04699364
 -0.12998583]
[[ 0.33063776  0.45009694]
 [-0.0265051   0.07254592]
 [-0.4142602  -0.1417398 ]
 ...
 [-0.22038265 -0.21316837]
 [ 0.10614796  0.26642347]
 [-0.49589286 -0.11112755]]
[ 0.52587591 -0.03424712 -0.24774948 ... -0.26183694  0.19989886
 -0.23282187]


In [4]:
# x_train.shape
train_label.shape

(8000,)

In [5]:
def calculate_average(lst):
    if not lst:
        return None

    total = sum(lst)
    average = total / len(lst)
    return average

In [6]:
def initialize_parameters(layer_dims):
    np.random.seed(1) #lock random parameters
    parameters = {}
    layers = len(layer_dims)
    for layer in range(1,layers):
        parameters["W"+str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer-1])
        parameters["b"+str(layer)] = np.zeros((layer_dims[layer], 1))

        assert(parameters['W' + str(layer)].shape == (layer_dims[layer], layer_dims[layer-1]))
        assert(parameters['b' + str(layer)].shape == (layer_dims[layer], 1))

    return parameters



In [7]:
def linear_forward(A, W, b):
    Z = np.dot(W,A)+b
    cache = (A,W,b)
    return Z, cache

def tanh(Z):
    A = np.tanh(Z)
    return A

def relu(Z):
    A = np.maximum(0, Z)
    return A

def sigmoid(Z):
    A = 1.0 / (1+np.exp(-Z))
    return A

In [8]:
def linear_activation_forward(A_pre, W, b, activation):
    Z, linear_cache = linear_forward(A_pre, W, b)
    if activation == "tanh":
        A = tanh(Z)
    elif activation == "linear":
        A = Z
    elif activation == "relu":
        A = relu(Z)
    elif activation == "sigmoid":
        A = sigmoid(Z)
    cache = (linear_cache, A)
    return A, cache

In [9]:
def model_forward(X, parameters, activation):
    caches = []
    A = X

    # Number of layers
    layers = len(parameters) // 2

    for layer in range(1, layers):
        A_pre = A
        A, cache = linear_activation_forward(A_pre, parameters["W" + str(layer)], parameters["b" + str(layer)], activation)
        caches.append(cache)

    AL, cache = linear_activation_forward(A, parameters["W" + str(layers)], parameters["b" + str(layers)], "linear")
    caches.append(cache)

    return AL, caches


In [10]:
def compute_loss(AL, Y):
    loss = (AL - Y) ** 2 / 2
    loss = np.squeeze(loss)
    return loss


In [11]:
def linear_backward(dZ, cache):
    A_pre, W, b = cache
    dW = np.dot(dZ, A_pre.T)
    db = dZ
    dA_pre = np.dot(W.T, dZ)
    return dA_pre, dW, db

def tanh_backward(dA, Z):
    dZ = dA * (1 - np.tanh(Z)**2)
    assert dZ.shape == Z.shape
    return dZ

def relu_backward(dA, Z):
    dZ = np.where(Z > 0, 1, 0)
    return dZ

def sigmoid_backward(dA, Z):
    dZ = dA * 1/(1+np.exp(-Z)) * (1-1/(1+np.exp(-Z)))
    assert dZ.shape == Z.shape
    return dZ


def linear_activation_backward(dA, cache, activation):
    dA_pre = None
    dW = None
    db = None
    linear_cache, activation_cache = cache
    if activation == "tanh":
        dZ = tanh_backward(dA, activation_cache)
        dA_pre, dW, db = linear_backward(dZ, linear_cache)
    elif activation == "linear":
        dA_pre, dW, db = linear_backward(dA, linear_cache)
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_pre, dW, db = linear_backward(dZ, linear_cache)
    return dA_pre, dW, db


In [12]:
def model_backward(AL, Y, caches, parameters, activation):
    grads = {}
    layers = len(caches)

    # Compute dAL (gradient of the loss function with respect to AL)
    dAL = AL - Y

    # Get the cache of the last layer
    current_cache = caches[layers-1]

    # Backward pass for the last layer
    grads["dA"+str(layers-1)], grads["dW"+str(layers)], grads["db"+str(layers)] = \
               linear_activation_backward(dAL, current_cache, "linear")

    # Backward pass for the hidden layers
    for layer in reversed(range(layers-1)):
        current_cache = caches[layer]
        activation = activation if layer > 0 else "linear"  # Activation function for hidden layers
        grads["dA"+str(layer)], grads["dW"+str(layer+1)], grads["db"+str(layer+1)] = \
             linear_activation_backward(grads["dA"+str(layer+1)], current_cache, activation)

    return grads


In [13]:
def update_parameters(parameters, grads, learning_rate):
    layers = len(parameters) // 2
    for layer in range(layers):
        dW = grads["dW" + str(layer + 1)]
        db = grads["db" + str(layer + 1)]
        if dW is not None:  # 梯度不為 None 時才更新參數
            # print(parameters["W" + str(layer + 1)])
            parameters["W" + str(layer + 1)] = parameters["W" + str(layer + 1)] - learning_rate * dW
            # print(parameters["W" + str(layer + 1)])
        if db is not None:
            parameters["b" + str(layer + 1)] = parameters["b" + str(layer + 1)] - learning_rate * db
    return parameters


In [14]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
from mpl_toolkits import mplot3d
from scipy.interpolate import griddata



def deep_learning_model(X, Y, layer_dims, learning_rate, num_iterations, print_cost=False):
    losses = []
    outputs = []
    mean_loss = []
    parameters = initialize_parameters(layer_dims)
    for i in range(num_iterations):
        for j in range(X.shape[1]):
            x_single = X[:,j:j+1]
            y_single = Y[j]
            AL, caches = model_forward(x_single, parameters, "sigmoid")
            loss = compute_loss(AL, y_single)
            grads = model_backward(AL, y_single, caches, parameters, "sigmoid")
            parameters = update_parameters(parameters, grads, learning_rate)
            losses.append(loss)
            if(j<1000):
                outputs.append(AL)
        mean_loss.append(calculate_average(losses))
    return parameters, outputs, mean_loss



In [15]:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

def draw(x_train_norm, outputs, mean_loss, num_iterations, train_label, num_input, print_cost=False):
    outputs = np.array(outputs)
    # train_label = train_label.astype(float)
    # 定義錯誤區間
    bins = np.linspace(-0.05, 0.08, num=10)
    hist, bins = np.histogram(mean_loss, bins)

    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16, 4))
    fig2 = plt.figure()

    ax1.set_xlim(0, num_iterations)
    ax1.set_ylim(0, 1)  # 設置初始y軸範圍
    line1, = ax1.plot([], [], lw=2)
    text1 = ax1.text(0.02, 0.95, '', transform=ax1.transAxes)

    ax2.set_xlim(0, num_input)
    ax2.set_ylim(0, np.max(outputs) * 1.1)  # 設置初始y軸範圍
    line2, = ax2.plot([], [], lw=2)
    text2 = ax2.text(0.02, 0.95, '', transform=ax2.transAxes)

    line3 = ax2.plot(np.arange(num_input), train_label[0:num_input], lw=2)
    text3 = ax2.text(0.02, 0.95, '', transform=ax2.transAxes)
    ax4 = fig2.add_subplot(111, projection='3d')
    # line4, = ax4.plot([], [], [], lw=2)
    # line5, = ax4.plot([], [], [], lw=2)


    def init():
        line1.set_data([], [])
        text1.set_text('')
        line2.set_data([], [])
        text2.set_text('')

        return line1, text1, line2, text2

    # def init2():
    #     line4.set_data([], [])
    #     line4.set_3d_properties([])
    #     line5.set_data(x_train_norm[0, :num_input], x_train_norm[1, :num_input])
    #     line5.set_3d_properties(train_label[:num_input])
    #     return line4, line5,

    def update(i):
        # 更新訓練過程中的損失函數
        cost_text = "Cost after iteration %i: %f" % (i, mean_loss[i])
        text1.set_text(cost_text)
        xdata = np.arange(i + 1)  # 限制 x 范围為 0 到 i+1
        ydata = mean_loss[:i + 1]
        line1.set_data(xdata, ydata)
        ax1.set_xlim(0, num_iterations)
        ax1.set_ylim(0, np.max(mean_loss) * 1.1)

        # 更新訓練過程中的損失函數
        cost_text = "Output after iteration %i " % (i)
        text2.set_text(cost_text)
        xdata = np.arange(num_input)  # 限制 x 范围
        ydata = outputs[num_input * i:num_input * (i + 1)]
        line2.set_data(xdata, ydata)
        ax2.set_xlim(0, num_input)
        ax2.set_ylim(0, np.max(outputs) * 1.1)

        return line1, text1, line2, text2


    # def update2(i):
    #     # if i>0:
    #     #     ax4.clear()
    #     # X1 = x_train_norm[0, :num_input].astype(float)
    #     # X2 = x_train_norm[1, :num_input].astype(float)
    #     # Y = outputs[num_input * i:num_input * (i + 1)]
    #     # print(X1.shape,X2.shape,Y.shape)

    #     line4.set_data(x_train_norm[0, :num_input], x_train_norm[1, :num_input])
    #     line4.set_3d_properties(outputs[num_input*i:num_input + num_input*i])

    #     ax4.set_xlim(np.min(x_train_norm[0, :num_input]), np.max(x_train_norm[0, :num_input]))
    #     ax4.set_ylim(np.min(x_train_norm[1, :num_input]), np.max(x_train_norm[1, :num_input]))
    #     ax4.set_zlim(np.min(outputs[num_input*i:num_input +num_input*i]), np.max(outputs[num_input*i:num_input +num_input*i]))

    #     return line4,
    def update2(i):
      ax4.clear()
      x_data = x_train_norm[0, :num_input]
      y_data = x_train_norm[1, :num_input]
      z_data = outputs[num_input * i:num_input * (i + 1)]
      # ax4.scatter(x_data, y_data, z_data, c=train_label[:num_input], cmap='viridis')
      ax4.scatter(x_data, y_data, z_data, c=z_data, cmap='viridis', marker='o')
      ax4.set_xlim(np.min(x_train_norm[0, :num_input]), np.max(x_train_norm[0, :num_input]))
      ax4.set_ylim(np.min(x_train_norm[1, :num_input]), np.max(x_train_norm[1, :num_input]))
      ax4.set_zlim(np.min(outputs[num_input * i:num_input * (i + 1)]), np.max(outputs[num_input * i:num_input * (i + 1)]))

      ax4.set_xlabel('Feature 1')
      ax4.set_ylabel('Feature 2')
      ax4.set_zlabel('Output')
      return ax4,

    # def update2(i):
    #     ax4.clear()  # 清除之前的绘图，以便更新新的图形

    #     X1 = x_train_norm[0, :num_input].astype(float)
    #     X2 = x_train_norm[1, :num_input].astype(float)
    #     Y = outputs[num_input * i:num_input * (i + 1)].squeeze()
    #     z = np.mat(Y)
    #     z = np.array(z)
    #     z.shape = (1000,1000)

    #     X, Y = np.meshgrid(X1, X2)


    #     contour = ax4.contourf(X, Y, Z, cmap='viridis', levels=20)

    #     ax4.set_xlabel('Feature 1')
    #     ax4.set_ylabel('Feature 2')
    #     ax4.set_title(f'Iteration {i}')

    #     # 添加颜色栏
    #     cbar = plt.colorbar(contour, ax=ax4)
    #     cbar.set_label('Output Value')

    #     return ax4,



    ani = FuncAnimation(fig, update, frames=num_iterations, init_func=init, blit=True)
    # ani2 = FuncAnimation(fig2, update2, frames=num_iterations, init_func=init2, blit=True)
    ani2 = FuncAnimation(fig2, update2, frames=num_iterations, blit=True)

    ax1.set_xlabel('Iteration')
    ax1.set_ylabel('Loss')
    ax1.set_title('Training Loss')

    ax2.set_xlabel('Output Index')
    ax2.set_ylabel('Output Value')
    ax2.set_title('Output Variation')

    # 顯示錯誤直方圖
    ax3.bar(bins[:-1], hist, width=0.1)
    ax3.set_xlabel('Error Interval')
    ax3.set_ylabel('Amount')
    ax3.set_title('Error Histogram')

    # ax4.set_xlabel('Feature 1')
    # ax4.set_ylabel('Feature 2')
    # ax4.set_zlabel('Output')

    # 將動畫轉換為HTML格式並顯示
    html_anim = ani.to_jshtml()
    html_anim2 = ani2.to_jshtml()
    display(HTML(html_anim))
    display(HTML(html_anim2))
    # Show the plot
    plt.show()


In [16]:
print(x_train_norm.shape[0])
print(x_train_norm[0,:1000].shape)
print(train_label_norm.shape)

2
(1000,)
(8000, 1)


In [17]:
layer_dims = [x_train.shape[0], 3, 1]
learning_rate = 0.001
num_iterations = 250
parameters, outputs, mean_loss= deep_learning_model(x_train_norm, train_label_norm, layer_dims, learning_rate, num_iterations, print_cost=True)#訓練


In [18]:
plt.rcParams['animation.embed_limit'] = 100  # 將值設置為更大的數字（以MB為單位）
draw(x_train_norm, outputs, mean_loss, num_iterations, train_label_norm, 1000)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [19]:
#將數組的完整內容顯示出來
np.set_printoptions(threshold=np.inf)
AL, cache=model_forward(x_val_norm.T, parameters, "relu")# 測試集向前傳播
print(AL)
AL1 = denormalize(AL, np.mean(test_label), np.max(test_label) - np.min(test_label))
print(x_val)
print(abs(AL1))
