## Import data

In [1]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import boston_housing
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

2025-03-17 10:03:49.693787: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-17 10:03:49.707620: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-17 10:03:49.711899: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-17 10:03:49.722401: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Original data(already divide into training set & test set) 
    'CRIM',     # 城鎮人均犯罪率
    'ZN',       # 佔地面積超過25,000平方呎的住宅用地比例
    'INDUS',    # 城鎮非零售商業用地比例
    'CHAS',     # 查爾斯河虛擬變量（1表示靠近河邊，0表示不靠近）
    'NOX',      # 一氧化氮濃度（百萬分之一）
    'RM',       # 每棟住宅的平均房間數
    'AGE',      # 1940年以前建造的自住單位比例
    'DIS',      # 與波士頓五個就業中心的加權距離
    'RAD',      # 放射性公路的可達性指數
    'TAX',      # 每10,000美元的全額財產稅率
    'PTRATIO',  # 城鎮學生與教師比例
    'B',        # 1000(Bk - 0.63)^2，其中Bk是城鎮黑人比例
    'LSTAT',    # 人口中社會地位較低的百分比

    Target：
    'MEDV'      # 自有住宅的房價中位數，以千美元為單位。

In [2]:
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
print(f"Training data:{train_data.shape}")
print(f"Test data:{test_data.shape}")

feature_names = [
    'CRIM',     # 城鎮人均犯罪率
    'ZN',       # 佔地面積超過25,000平方呎的住宅用地比例
    'INDUS',    # 城鎮非零售商業用地比例
    'CHAS',     # 查爾斯河虛擬變量（1表示靠近河邊，0表示不靠近）
    'NOX',      # 一氧化氮濃度（百萬分之一）
    'RM',       # 每棟住宅的平均房間數
    'AGE',      # 1940年以前建造的自住單位比例
    'DIS',      # 與波士頓五個就業中心的加權距離
    'RAD',      # 放射性公路的可達性指數
    'TAX',      # 每10,000美元的全額財產稅率
    'PTRATIO',  # 城鎮學生與教師比例
    'B',        # 1000(Bk - 0.63)^2，其中Bk是城鎮黑人比例
    'LSTAT',    # 人口中社會地位較低的百分比
]

train_df = pd.DataFrame(train_data, columns=feature_names)
print("Training sample:")
for i, name in enumerate(feature_names):
    print(f"{name}: {train_data[0][i]}")

print(f"Targets sample:{train_targets[0]}")

Training data:(404, 13)
Test data:(102, 13)
Training sample:
CRIM: 1.23247
ZN: 0.0
INDUS: 8.14
CHAS: 0.0
NOX: 0.538
RM: 6.142
AGE: 91.7
DIS: 3.9769
RAD: 4.0
TAX: 307.0
PTRATIO: 21.0
B: 396.9
LSTAT: 18.72
Targets sample:15.2


## Feature Normalization
- 對測試資料正規化時，正規化的數值要使用從訓練資料得出來的
- 不能對測試資料做修改

In [3]:
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

## Q2. 測試不同激活函數所帶來的影響
激活函數Activation function: Sigmoid、Softplus、ReLU

固定參數: 
1. 隱藏層數Dense layer: 5
2. 節點數Node: 128
3. 世代epoch: 100
4. 批次大小batch size: 16
5. 優化器optimizer: adam

In [4]:
activation_functions = ['sigmoid', 'softplus', 'relu']

results = []

### 建構模型method

In [5]:
def build_model(activation_function):
    model = keras.Sequential()
    
    model.add(layers.Dense(128, activation=activation_function))
    for _ in range(5 - 1):
        model.add(layers.Dense(128, activation=activation_function))
    model.add(layers.Dense(1))
    model.compile(optimizer="adam", loss="mse", metrics=["mae", "mse", "mape"])
    model.fit(train_data, train_targets,epochs=100,batch_size=16,validation_split=0.2,verbose=0)
    train_metrics = model.evaluate(train_data, train_targets, verbose=0)
    train_mae = train_metrics[1]
    train_mse = train_metrics[2]
    train_mape = train_metrics[3]
    train_rmse = math.sqrt(train_mse)
    test_metrics = model.evaluate(test_data, test_targets, verbose=0)
    test_mae = test_metrics[1]
    test_mse = test_metrics[2]
    test_mape = test_metrics[3]
    test_rmse = math.sqrt(test_mse)
    return {
        'activation_function': activation_function, 
        'train_mae': train_mae, 
        'train_rmse': train_rmse, 
        'train_mape': train_mape,
        'test_mae': test_mae, 
        'test_rmse': test_rmse, 
        'test_mape': test_mape
    }

### 訓練模型(所有排列組合)

In [6]:
for activation in activation_functions:
    print(f"測試激活函數：{activation}")
    result = build_model(activation)
    results.append(result)
    print(result)

測試激活函數：sigmoid


I0000 00:00:1742205833.685380   66395 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1742205833.702400   66395 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1742205833.702440   66395 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1742205833.704151   66395 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1742205833.704227   66395 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

{'activation_function': 'sigmoid', 'train_mae': 2.7878613471984863, 'train_rmse': 4.204345979574802, 'train_mape': 12.910015106201172, 'test_mae': 3.466043710708618, 'test_rmse': 5.083408271779159, 'test_mape': 16.99478530883789}
測試激活函數：softplus
{'activation_function': 'softplus', 'train_mae': 2.1512503623962402, 'train_rmse': 2.893968782331688, 'train_mape': 11.276071548461914, 'test_mae': 2.823206663131714, 'test_rmse': 4.45367877930026, 'test_mape': 15.23805046081543}
測試激活函數：relu
{'activation_function': 'relu', 'train_mae': 1.2612940073013306, 'train_rmse': 2.1563255186263164, 'train_mape': 6.78611421585083, 'test_mae': 2.8207151889801025, 'test_rmse': 4.448706861191958, 'test_mape': 14.818090438842773}


### Data frame

In [8]:
train_results_df = pd.DataFrame([
    {
        '激活函數': r['activation_function'], 
        'MAE': r['train_mae'], 'RMSE': r['train_rmse'], 'MAPE': r['train_mape']
    } for r in results
])

test_results_df = pd.DataFrame([
    {
        '激活函數': r['activation_function'],  
        'MAE': r['test_mae'], 'RMSE': r['test_rmse'], 'MAPE': r['test_mape']
    } for r in results
])

print("\n訓練績效表現:")
print(train_results_df)

print("\n測試績效表現:")
print(test_results_df)


訓練績效表現:
       激活函數       MAE      RMSE       MAPE
0   sigmoid  2.787861  4.204346  12.910015
1  softplus  2.151250  2.893969  11.276072
2      relu  1.261294  2.156326   6.786114

測試績效表現:
       激活函數       MAE      RMSE       MAPE
0   sigmoid  3.466044  5.083408  16.994785
1  softplus  2.823207  4.453679  15.238050
2      relu  2.820715  4.448707  14.818090


In [9]:
train_results_df.to_excel('Q2-1.xlsx', sheet_name='Q1', index=True, startrow=1, startcol=1)
test_results_df.to_excel('Q2-2.xlsx', sheet_name='Q2', index=True, startrow=1, startcol=8)

### reference
1. boston dataset: https://lib.stat.cmu.edu/datasets/boston
2. Boston Housing Price dataset with Keras(kaggle): https://www.kaggle.com/code/shanekonaung/boston-housing-price-dataset-with-keras