In [1]:
%matplotlib notebook

In [2]:
import csv
import random
import numpy as np
import matplotlib.pyplot as plt
import math

np.random.seed(1)

def splitData(X, rate):
    X_train = X[int(X.shape[0] * rate):]
    X_val = X[:int(X.shape[0] * rate)]
    return X_train, X_val

def shuffle(X):
    np.random.seed(10)
    randomList = np.arange(X.shape[0])
    np.random.shuffle(randomList)
    return X[randomList]

def normalize(x_train):
    x_train_norm = (x_train - np.mean(x_train, axis=0)) / (np.max(x_train, axis=0) - np.min(x_train, axis=0))
    return x_train_norm

def denormalize(original_value, norm_value, mean, range_):
    denorm_value = norm_value * range_ + mean
    return denorm_value

# 生成訓練數據
def generate_data(num_samples):
    dataset = []
    for _ in range(num_samples):
        # 生成兩個隨機的四位數
        num1 = np.random.randint(1000, 9999)
        num2 = np.random.randint(1000, 9999)

        # 計算兩個數的和
        total = num1 + num2

        # 將兩個數作為輸入特徵，和作為輸出標籤，存為一個樣本
        sample = [num1, num2, total]
        dataset.append(sample)

    return np.array(dataset)

# 生成一萬個訓練數據
dataset = generate_data(10000)

print("訓練數據:")
for i in range(5):
    print("樣本:", dataset[i])

dataset = shuffle(dataset)
dataset_train, dataset_val = splitData(dataset, 0.2)

train_label = dataset_train[:, -1]
train_label_norm = normalize(train_label)

x_train = dataset_train[:, 0:2]
x_train_norm = normalize(x_train)

test_label = dataset_val[:, -1]
test_label_norm = normalize(test_label)

x_val = dataset_val[:, 0:2]
x_val_norm = normalize(x_val)

print(x_train_norm)
print(train_label_norm)
print(x_val_norm)
print(test_label_norm)



訓練數據:
樣本: [1235 6192 7427]
樣本: [ 1905  8813 10718]
樣本: [3895 6056 9951]
樣本: [1144 5225 6369]
樣本: [ 8751  4462 13213]
[[-0.25481087  0.35503655]
 [-0.26559105  0.17010657]
 [-0.4417413   0.15265824]
 ...
 [ 0.26774969  0.43949976]
 [-0.15634455 -0.33511682]
 [ 0.2277408  -0.46414549]]
[ 0.05057089 -0.04817862 -0.14586269 ...  0.35685699 -0.24797675
 -0.11928276]
[[-0.02258497 -0.35802851]
 [-0.13951284 -0.29422947]
 [ 0.20571335  0.42867817]
 ...
 [ 0.45913115 -0.35658358]
 [ 0.14791631  0.05099672]
 [-0.32412993 -0.3739227 ]]
[-0.1967808  -0.22424891  0.32798647 ...  0.05301807  0.10283993
 -0.36089987]


In [3]:
x_train=x_train.T
x_train.shape[0]

2

In [4]:
# x_train.shape
train_label.shape

(8000,)

In [5]:
def initialize_parameters(layer_dims):
    np.random.seed(1) #lock random parameters
    parameters = {}
    layers = len(layer_dims) 
    for layer in range(1,layers):
        parameters["W"+str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer-1])
        parameters["b"+str(layer)] = np.zeros((layer_dims[layer], 1))
        
        assert(parameters['W' + str(layer)].shape == (layer_dims[layer], layer_dims[layer-1]))
        assert(parameters['b' + str(layer)].shape == (layer_dims[layer], 1))
                                              
    return parameters



In [6]:
def linear_forward(A, W, b):
    Z = np.dot(W,A)+b
    cache = (A,W,b)
    assert(Z.shape == (W.shape[0], A.shape[1]))
    return Z, cache

def tanh(Z):
    A = np.tanh(Z)
    cache = Z
    return A, cache

def linear(Z):
    cache = Z
    return Z, cache


In [7]:
def linear_activation_forward(A_pre, W, b, activation):
    Z, linear_cache = linear_forward(A_pre, W, b)
    if activation == "tanh":
        A, activation_cache = tanh(Z)
    else:
        A, activation_cache = linear(Z)
    cache = (linear_cache, activation_cache)
    assert (A.shape == (W.shape[0], A_pre.shape[1]))   
    return A, cache

In [8]:
def model_forward(X, parameters):
    A=X
    caches = []
    layers = len(parameters) // 2
    for layer in range(1, layers):
        A_pre = A
        A, cache= linear_activation_forward(A_pre.T, parameters["W"+str(layer)], parameters["b"+str(layer)], "tanh")
        caches.append(cache)
    AL, cache = linear_activation_forward(A, parameters["W"+str(layers)], parameters["b"+str(layers)], "linear")
    caches.append(cache)
    assert(AL.shape == (1,X.shape[0]))
    return AL, caches


In [9]:
def compute_loss(AL, Y):
    m = Y.shape[0]  
    loss = np.sum((AL - Y) ** 2) / (2 * m)
    loss = np.squeeze(loss)
    return loss


In [10]:
def linear_backward(dZ, cache):
    A_pre, W, b = cache
    m = dZ.shape[1]
    dW = np.dot(dZ, A_pre.T) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    dA_pre = np.dot(W.T, dZ)
    return dA_pre, dW, db

def tanh_backward(dA, Z):
    dZ = dA * (1 - np.tanh(Z)**2)
    assert dZ.shape == Z.shape
    return dZ

def linear_activation_backward(dA, cache, activation):
    dA_pre = None
    dW = None
    db = None

    linear_cache, activation_cache = cache
    if activation == "tanh":
        dZ = tanh_backward(dA, activation_cache)
        dA_pre, dW, db = linear_backward(dZ, linear_cache)
    elif activation == "linear":
        dA_pre, dW, db = linear_backward(dA, linear_cache)

    return dA_pre, dW, db


In [11]:
def model_backward(AL, Y, caches, parameters):
    grads = {}
    m = AL.shape[1]
    layers = len(caches)
    
    # Compute dAL (gradient of the loss function with respect to AL)
    dAL = AL - Y
    
    # Get the cache of the last layer
    current_cache = caches[layers-1]
    
    # Backward pass for the last layer
    grads["dA"+str(layers-1)], grads["dW"+str(layers)], grads["db"+str(layers)] = \
               linear_activation_backward(dAL, current_cache, "linear")
    
    # Backward pass for the hidden layers
    for layer in reversed(range(layers-1)):
        current_cache = caches[layer]
        activation = "tanh" if layer > 0 else "linear"  # Activation function for hidden layers
        grads["dA"+str(layer)], grads["dW"+str(layer+1)], grads["db"+str(layer+1)] = \
             linear_activation_backward(grads["dA"+str(layer+1)], current_cache, activation)
    
    return grads


In [12]:
def update_parameters(parameters, grads, learning_rate):
    layers = len(parameters) // 2
    for layer in range(layers):
        dW = grads["dW" + str(layer + 1)]
        db = grads["db" + str(layer + 1)]
        if dW is not None:  # 梯度不為 None 時才更新參數
            parameters["W" + str(layer + 1)] = parameters["W" + str(layer + 1)] - learning_rate * dW
        if db is not None:
            parameters["b" + str(layer + 1)] = parameters["b" + str(layer + 1)] - learning_rate * db
    return parameters


In [13]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
plt.rcParams['animation.embed_limit'] = 50  # 設置更大的值（以MB為單位）

def deep_learning_model(X, Y, layer_dims, learning_rate, num_iterations, print_cost=False):
    losses = []
    parameters = initialize_parameters(layer_dims)

    fig, ax = plt.subplots()
    ax.set_xlim(0, num_iterations)
    ax.set_ylim(0, 1)  # Set the initial y-axis range

    line, = ax.plot([], [], lw=2)
    text = ax.text(0.02, 0.95, '', transform=ax.transAxes)

    def init():
        line.set_data([], [])
        text.set_text('')
        return line, text

    def update(i):
        
        cost_text = "Cost after iteration %i: %f" % (i, losses[i])
        text.set_text(cost_text)

        losses_subset = losses[:i+1] 

        xdata = np.arange(len(losses_subset))  
        ydata = losses_subset  
        line.set_data(xdata, ydata)
        ax.set_xlim(0, num_iterations)
        ax.set_ylim(0, np.max(losses) * 1.1)
        return line, text

    for i in range(num_iterations):
        AL, caches = model_forward(X, parameters)
        loss = compute_loss(AL, Y)
        grads = model_backward(AL, Y, caches, parameters)
        parameters = update_parameters(parameters, grads, learning_rate)
        losses.append(loss)
        print(loss)

    ani = FuncAnimation(fig, update, frames=num_iterations, init_func=init, blit=True)

    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training Loss')

    # 將動畫轉換為HTML格式並顯示
    # html_anim = ani.to_jshtml()
    # HTML(html_anim)
    # display(HTML(html_anim))
    # return parameters



In [14]:
layer_dims = [x_train.shape[0], 3, 1]
learning_rate = 0.0045
num_iterations = 1000
parameters = deep_learning_model(x_train_norm, train_label_norm, layer_dims, learning_rate, num_iterations, print_cost=True)#訓練



<IPython.core.display.Javascript object>

0.323297487058824
0.3217214937803617
0.3201551626442256
0.31859842257648396
0.31705120311753354
0.31551343441595386
0.31398504722243226
0.31246597288375766
0.31095614333688254
0.30945549110305226
0.30796394928200116
0.30648145154621387
0.30500793213525174
0.30354332585014304
0.3020875680478367
0.3006405946357182
0.2992023420661868
0.2977727473312942
0.29635174795744273
0.2949392820001433
0.293535288038831
0.29213970517174015
0.2907524730108348
0.28937353167679625
0.2880028217940669
0.2866402844859481
0.2852858613697524
0.28393949455201056
0.28260112662373005
0.2812707006557068
0.2799481601938884
0.27863344925478783
0.2773265123209488
0.27602729433645923
0.27473574070251566
0.2734517972730345
0.27217541035031256
0.2709065266807343
0.26964509345052595
0.2683910582815562
0.2671443692271821
0.2659049747681403
0.2646728238084826
0.2634478656715563
0.2622300500960267
0.26101932723194365
0.2598156476368504
0.2586189622719336
0.2574292224982158
0.256246380072788
0.2550703871450829
0.2539011962



In [15]:
AL, cache=model_forward(x_val_norm, parameters)# 測試集向前傳播
print(AL)
AL1 = denormalize(AL, x_val, np.mean(x_val_norm, axis=0), np.max(x_val_norm, axis=0) - np.min(x_val_norm, axis=0))#反正規化
print(AL1)


TypeError: object of type 'NoneType' has no len()