In [13]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
# import ace_tools as tools

# Load the data
data_path = './Data/Nantou/cn01.csv'
data = pd.read_csv(data_path, delimiter=';')
print(data.describe())

                   E             N              H          Angle  \
count  188336.000000  1.883360e+05  188336.000000  188336.000000   
mean   268653.714291  2.657544e+06    1158.038842     182.117588   
std         0.019853  4.928526e-02       0.029194     105.203661   
min    268652.831200  2.657543e+06    1153.690900       0.000000   
25%    268653.708800  2.657544e+06    1158.025800      95.000000   
50%    268653.712900  2.657544e+06    1158.040400     182.000000   
75%    268653.719100  2.657544e+06    1158.052600     274.000000   
max    268658.842500  2.657545e+06    1159.978700     359.000000   

                Axis          Plate          EMove          NMove  \
count  188336.000000  188336.000000  188336.000000  188336.000000   
mean       16.618260       7.551837       0.518627     -45.497017   
std       280.051689     231.812827      19.852823      49.285211   
min         0.000000       0.000000    -882.600000   -1537.500000   
25%         6.500000       3.000000      -

In [14]:
columns_list = list(data.columns)
columns_list

['date_time',
 'E',
 'N',
 'H',
 'Angle',
 'Axis',
 'Plate',
 'EMove',
 'NMove',
 'HMove',
 'TotalMove',
 'EDay',
 'NDay',
 'HDay']

In [15]:
# 檢查 'date_time' 欄位是否存在
if 'date_time' in data.columns:
    # 轉換日期時間欄位
    data['date_time'] = pd.to_datetime(data['date_time'])
else:
    raise KeyError("'date_time' column is missing from the DataFrame")

# 創建時間特徵
data['hour'] = data['date_time'].dt.hour
data["minute"] = data["date_time"].dt.minute

# 創建滯後特徵（Lag Features）
lag_features = ["TotalMove", "E", "N", "H"]
for feature in lag_features:
    data[f"{feature}_lag1"] = data[feature].shift(1)  # 前一個時間點的值
    data[f"{feature}_lag2"] = data[feature].shift(2)  # 前兩個時間點的值

# 移除 NaN 值（因為有滯後變數會導致前幾筆數據無法使用）
data.dropna(inplace=True)

# 設定輸入變數 (X) 和 目標變數 (y)
features = ["hour", "minute"] + [f"{feature}_lag1" for feature in lag_features] + [f"{feature}_lag2" for feature in lag_features]
X = data[features]
y_totalmove = data["TotalMove"]
y_E = data["E"]
y_N = data["N"]
y_H = data["H"]

# 分割數據集（80% 訓練，20% 測試）
X_train, X_test, y_train_totalmove, y_test_totalmove = train_test_split(X, y_totalmove, test_size=0.2, random_state=42)
X_train, X_test, y_train_E, y_test_E = train_test_split(X, y_E, test_size=0.2, random_state=42)
X_train, X_test, y_train_N, y_test_N = train_test_split(X, y_N, test_size=0.2, random_state=42)
X_train, X_test, y_train_H, y_test_H = train_test_split(X, y_H, test_size=0.2, random_state=42)

In [16]:
# 訓練回歸模型（使用隨機森林）
rf_totalmove = RandomForestRegressor(n_estimators=100, random_state=42)
rf_totalmove.fit(X_train, y_train_totalmove)

rf_E = RandomForestRegressor(n_estimators=100, random_state=42)
rf_E.fit(X_train, y_train_E)

rf_N = RandomForestRegressor(n_estimators=100, random_state=42)
rf_N.fit(X_train, y_train_N)

rf_H = RandomForestRegressor(n_estimators=100, random_state=42)
rf_H.fit(X_train, y_train_H)

In [17]:
# 預測
y_pred_totalmove = rf_totalmove.predict(X_test)
y_pred_E = rf_E.predict(X_test)
y_pred_N = rf_N.predict(X_test)
y_pred_H = rf_H.predict(X_test)

In [18]:
# 評估模型表現
mae_totalmove = mean_absolute_error(y_test_totalmove, y_pred_totalmove)
rmse_totalmove = np.sqrt(mean_squared_error(y_test_totalmove, y_pred_totalmove))

mae_E = mean_absolute_error(y_test_E, y_pred_E)
rmse_E = np.sqrt(mean_squared_error(y_test_E, y_pred_E))

mae_N = mean_absolute_error(y_test_N, y_pred_N)
rmse_N = np.sqrt(mean_squared_error(y_test_N, y_pred_N))

mae_H = mean_absolute_error(y_test_H, y_pred_H)
rmse_H = np.sqrt(mean_squared_error(y_test_H, y_pred_H))

In [19]:
# 顯示結果
results = pd.DataFrame({
    "Variable": ["TotalMove", "E", "N", "H"],
    "MAE": [mae_totalmove, mae_E, mae_N, mae_H],
    "RMSE": [rmse_totalmove, rmse_E, rmse_N, rmse_H]
})

# tools.display_dataframe_to_user(name="Model Evaluation Results", dataframe=results)

In [20]:
print(results)

    Variable       MAE       RMSE
0  TotalMove  5.819217  30.378140
1          E  0.005288   0.012615
2          N  0.043522   0.047515
3          H  0.009407   0.028694
