In [1]:
%matplotlib notebook

In [2]:
import random
import numpy as np
import matplotlib.pyplot as plt
import math

np.random.seed(1)

def splitData(X, rate):
    X_train = X[int(X.shape[0] * rate):]
    X_val = X[:int(X.shape[0] * rate)]
    return X_train, X_val

def shuffle(X):
    np.random.seed(10)
    randomList = np.arange(X.shape[0])
    np.random.shuffle(randomList)
    return X[randomList]

def normalize(x_train):
    x_train_norm = (x_train - np.mean(x_train, axis=0)) / (np.max(x_train, axis=0) - np.min(x_train, axis=0))
    return x_train_norm

def denormalize(norm_value, mean, range_):
    denorm_value = norm_value * range_ + mean
    return denorm_value


# 生成訓練數據
def generate_data(num_samples):
    dataset = []
    for _ in range(num_samples):
        # 生成兩個隨機的四位數
        num1 = np.random.randint(1000, 9999)
        num2 = np.random.randint(1000, 9999)

        # 計算兩個數的和
        total = num1 + num2

        # 將兩個數作為輸入特徵，和作為輸出標籤，存為一個樣本
        sample = [num1, num2, total]
        dataset.append(sample)

    return np.array(dataset)

# 生成一萬個訓練數據
dataset = generate_data(10000)

print("訓練數據:")
for i in range(5):
    print("樣本:", dataset[i])

dataset = shuffle(dataset)
dataset_train, dataset_val = splitData(dataset, 0.2)

train_label = dataset_train[:, -1]
train_label_norm = normalize(train_label)

x_train = dataset_train[:, 0:2]
x_train_norm = normalize(x_train)

test_label = dataset_val[:, -1]
test_label_norm = normalize(test_label)

x_val = dataset_val[:, 0:2]
x_val_norm = normalize(x_val)

print(x_train_norm)
print(train_label_norm)
print(x_val_norm)
print(test_label_norm)
x_train=x_train.T
x_train_norm=x_train_norm.T
x_train.shape[0]
x_train_norm.shape[0]
train_label_norm = train_label_norm.reshape(-1, 1)


訓練數據:
樣本: [1235 6192 7427]
樣本: [ 1905  8813 10718]
樣本: [3895 6056 9951]
樣本: [1144 5225 6369]
樣本: [ 8751  4462 13213]
[[-0.25481087  0.35503655]
 [-0.26559105  0.17010657]
 [-0.4417413   0.15265824]
 ...
 [ 0.26774969  0.43949976]
 [-0.15634455 -0.33511682]
 [ 0.2277408  -0.46414549]]
[ 0.05057089 -0.04817862 -0.14586269 ...  0.35685699 -0.24797675
 -0.11928276]
[[-0.02258497 -0.35802851]
 [-0.13951284 -0.29422947]
 [ 0.20571335  0.42867817]
 ...
 [ 0.45913115 -0.35658358]
 [ 0.14791631  0.05099672]
 [-0.32412993 -0.3739227 ]]
[-0.1967808  -0.22424891  0.32798647 ...  0.05301807  0.10283993
 -0.36089987]


In [3]:
# x_train.shape
train_label.shape

(8000,)

In [4]:
def calculate_average(lst):
    if not lst:
        return None

    total = sum(lst)
    average = total / len(lst)
    return average

In [5]:
def initialize_parameters(layer_dims):
    np.random.seed(1) #lock random parameters
    parameters = {}
    layers = len(layer_dims)
    for layer in range(1,layers):
        parameters["W"+str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer-1])
        parameters["b"+str(layer)] = np.zeros((layer_dims[layer], 1))

        assert(parameters['W' + str(layer)].shape == (layer_dims[layer], layer_dims[layer-1]))
        assert(parameters['b' + str(layer)].shape == (layer_dims[layer], 1))

    return parameters



In [6]:
def linear_forward(A, W, b):
    Z = np.dot(W,A)+b
    cache = (A,W,b)
    return Z, cache

def tanh(Z):
    A = np.tanh(Z)
    cache = Z
    return A, cache

def linear(Z):
    cache = Z
    return Z, cache


In [7]:
def linear_activation_forward(A_pre, W, b, activation):
    Z, linear_cache = linear_forward(A_pre, W, b)
    if activation == "tanh":
        A = np.tanh(Z)
    elif activation == "linear":
        A = Z
    cache = (linear_cache, A)
    return A, cache

In [8]:
def model_forward(X, parameters):
    caches = []
    A = X

    # Number of layers
    layers = len(parameters) // 2

    for layer in range(1, layers):
        A_pre = A
        A, cache = linear_activation_forward(A_pre, parameters["W" + str(layer)], parameters["b" + str(layer)], "tanh")
        caches.append(cache)

    AL, cache = linear_activation_forward(A, parameters["W" + str(layers)], parameters["b" + str(layers)], "linear")
    caches.append(cache)

    return AL, caches


In [9]:
def compute_loss(AL, Y):
    loss = (AL - Y) ** 2 / 2
    loss = np.squeeze(loss)
    return loss


In [10]:
def linear_backward(dZ, cache):
    A_pre, W, b = cache
    dW = np.dot(dZ, A_pre.T)
    db = dZ
    dA_pre = np.dot(W.T, dZ)
    return dA_pre, dW, db

def tanh_backward(dA, Z):
    dZ = dA * (1 - np.tanh(Z)**2)
    assert dZ.shape == Z.shape
    return dZ

def linear_activation_backward(dA, cache, activation):
    dA_pre = None
    dW = None
    db = None

    linear_cache, activation_cache = cache
    if activation == "tanh":
        dZ = tanh_backward(dA, activation_cache)
        dA_pre, dW, db = linear_backward(dZ, linear_cache)
    elif activation == "linear":
        dA_pre, dW, db = linear_backward(dA, linear_cache)

    return dA_pre, dW, db


In [11]:
def model_backward(AL, Y, caches, parameters):
    grads = {}
    layers = len(caches)

    # Compute dAL (gradient of the loss function with respect to AL)
    dAL = AL - Y

    # Get the cache of the last layer
    current_cache = caches[layers-1]

    # Backward pass for the last layer
    grads["dA"+str(layers-1)], grads["dW"+str(layers)], grads["db"+str(layers)] = \
               linear_activation_backward(dAL, current_cache, "linear")

    # Backward pass for the hidden layers
    for layer in reversed(range(layers-1)):
        current_cache = caches[layer]
        activation = "tanh" if layer > 0 else "linear"  # Activation function for hidden layers
        grads["dA"+str(layer)], grads["dW"+str(layer+1)], grads["db"+str(layer+1)] = \
             linear_activation_backward(grads["dA"+str(layer+1)], current_cache, activation)

    return grads


In [12]:
def update_parameters(parameters, grads, learning_rate):
    layers = len(parameters) // 2
    for layer in range(layers):
        dW = grads["dW" + str(layer + 1)]
        db = grads["db" + str(layer + 1)]
        if dW is not None:  # 梯度不為 None 時才更新參數
            # print(parameters["W" + str(layer + 1)])
            parameters["W" + str(layer + 1)] = parameters["W" + str(layer + 1)] - learning_rate * dW
            # print(parameters["W" + str(layer + 1)])
        if db is not None:
            parameters["b" + str(layer + 1)] = parameters["b" + str(layer + 1)] - learning_rate * db
    return parameters


In [13]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

plt.rcParams['animation.embed_limit'] = 50  # 設置更大的值（以MB為單位）

def deep_learning_model(X, Y, layer_dims, learning_rate, num_iterations, print_cost=False):
    losses = []
    outputs = []
    mean_loss = []
    parameters = initialize_parameters(layer_dims)
    for i in range(num_iterations):
        for j in range(X.shape[1]):
            x_single = X[:,j:j+1]
            y_single = Y[j]
            AL, caches = model_forward(x_single, parameters)
            loss = compute_loss(AL, y_single)
            grads = model_backward(AL, y_single, caches, parameters)
            parameters = update_parameters(parameters, grads, learning_rate)
            losses.append(loss)
            if(j<1000):
                outputs.append(AL)
        mean_loss.append(calculate_average(losses))
    return parameters, outputs, mean_loss



In [14]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

def draw(outputs, mean_loss, num_iterations, train_label, num_input, print_cost=False):

    # 定義錯誤區間
    bins = np.linspace(-0.05, 0.08, num=10)
    hist, bins = np.histogram(mean_loss, bins)

    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 4))

    ax1.set_xlim(0, num_iterations)
    ax1.set_ylim(0, 1)  # 設置初始y軸範圍
    line1, = ax1.plot([], [], lw=2)
    text1 = ax1.text(0.02, 0.95, '', transform=ax1.transAxes)

    ax2.set_xlim(0, num_input)
    ax2.set_ylim(0, np.max(outputs) * 1.1)  # 設置初始y軸範圍
    line2, = ax2.plot([], [], lw=2)
    text2 = ax2.text(0.02, 0.95, '', transform=ax2.transAxes)

    line3 = ax2.plot(np.arange(num_input), train_label[0:num_input], lw=2)
    text3 = ax2.text(0.02, 0.95, '', transform=ax2.transAxes)

    def init():
        line1.set_data([], [])
        text1.set_text('')
        line2.set_data([], [])
        text2.set_text('')
        return line1, text1, line2, text2

    def update(i):
        # 更新訓練過程中的損失函數
        cost_text = "Cost after iteration %i: %f" % (i, mean_loss[i])
        text1.set_text(cost_text)
        xdata = np.arange(i+1)  # 限制 x 范围為 0 到 i+1
        ydata = mean_loss[:i + 1]
        line1.set_data(xdata, ydata)
        ax1.set_xlim(0, num_iterations)
        ax1.set_ylim(0, np.max(mean_loss) * 1.1)

        # 更新訓練過程中的損失函數
        cost_text = "Output after iteration %i " % (i)
        text2.set_text(cost_text)
        xdata = np.arange(num_input)  # 限制 x 范围為 0 到 i+1
        ydata = outputs[num_input*i:num_input*(i+1)]
        line2.set_data(xdata, ydata)
        ax2.set_xlim(0, num_input)
        ax2.set_ylim(0, np.max(outputs) * 1.1)
        return line1, text1, line2, text2

    ani = FuncAnimation(fig, update, frames=num_iterations, init_func=init, blit=True)

    ax1.set_xlabel('Iteration')
    ax1.set_ylabel('Loss')
    ax1.set_title('Training Loss')

    ax2.set_xlabel('Output Index')
    ax2.set_ylabel('Output Value')
    ax2.set_title('Output Variation')

    # 顯示錯誤直方圖
    ax3.bar(bins[:-1], hist, width=0.1)
    ax3.set_xlabel('Error Interval')
    ax3.set_ylabel('Amount')
    ax3.set_title('Error Histogram')

    # 將動畫轉換為HTML格式並顯示
    html_anim = ani.to_jshtml()
    display(HTML(html_anim))

    plt.show()



In [15]:
print(x_train_norm.shape[0])
print(x_train_norm[0:1,:].shape)
print(train_label_norm.shape)

2
(1, 8000)
(8000, 1)


In [16]:
layer_dims = [x_train.shape[0], 3, 1]
learning_rate = 0.001
num_iterations = 250
parameters, outputs, mean_loss= deep_learning_model(x_train_norm, train_label_norm, layer_dims, learning_rate, num_iterations, print_cost=True)#訓練


In [None]:
draw(outputs, mean_loss, num_iterations, train_label_norm, 1000)

In [None]:
#將數組的完整內容顯示出來
np.set_printoptions(threshold=np.inf)
AL, cache=model_forward(x_val_norm.T, parameters)# 測試集向前傳播
print(AL)
AL1 = denormalize(AL, np.mean(test_label), np.max(test_label) - np.min(test_label))
print(AL1)


[[-1.90537436e-01 -2.23275289e-01  3.12551767e-01  3.53823810e-02
   2.82572739e-01 -1.03182441e-01  2.22116495e-01 -8.20820788e-02
  -4.16461756e-02 -3.18519046e-02 -2.23387088e-01  1.41044810e-01
  -3.05874224e-01  8.63288134e-02 -1.50251889e-01 -9.54501234e-02
   1.25983129e-01 -1.25968444e-01 -3.08888655e-01  6.55601246e-04
   1.16299891e-01  5.46170828e-02 -2.74681639e-01  2.54088189e-01
   1.10952371e-01  1.62421582e-01 -2.57970855e-01 -3.86071232e-02
  -2.54286510e-01 -5.06174920e-02 -2.60765232e-01 -3.30703097e-01
  -4.29132450e-01 -3.30329936e-01 -3.52288950e-01 -3.55057742e-01
   1.06316782e-01 -3.24341087e-01  3.85234888e-02 -1.10853039e-01
  -6.65294484e-02  1.78318163e-01 -2.47923042e-01  6.80553246e-03
  -2.18527189e-01  2.89646090e-01 -1.79906543e-01  2.58627241e-01
   3.46431023e-01 -9.86405641e-02  3.32250618e-01  8.50947883e-02
  -3.39778464e-02 -1.88457061e-01 -4.51405221e-01 -6.13568597e-02
  -4.17724013e-01  5.94125061e-03  2.87870831e-01 -8.16570976e-02
   3.41514