In [1]:
import os
import joblib
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import calendar
from modules.metrics import rmse
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import (
    Input, LSTM, ConvLSTM2D, Dense,
    Dropout, SpatialDropout2D, MaxPooling2D, BatchNormalization,
    TimeDistributed, LeakyReLU, Flatten
)
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
print(gpus)

1 Physical GPUs, 1 Logical GPUs
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# 資料分割

In [None]:
# split data by month
def split_data_by_month(data_path):
    train_files, val_files, test_files = [], [], []
    file_list = sorted(os.listdir(data_path))  # 20210101-160505_1_160505.csv
    train_cases_to_exclude = {'20210116', '20210530', '20210825', '20210722', '20220904'}

    for file_name in file_list:
        datetime = file_name.split('-')[0]
        year = int(datetime[0:4])
        month = int(datetime[4:6])
        day = int(datetime[6:8])
        # 取得當月的天數
        days_in_month = calendar.monthrange(year, month)[1]
        part1_end = int(days_in_month * 0.7)
        part2_end = int(days_in_month * 0.85)

        file_path = os.path.join(data_path, file_name)

        # 個案排除，加入倒測試集
        if datetime in train_cases_to_exclude:
            test_files.append(file_path)
            continue

        if day <= part1_end:
            train_files.append(file_path)
        elif day <= part2_end:
            val_files.append(file_path)
        else:
            test_files.append(file_path)

    return (train_files, val_files, test_files)


# split data by sequence
def split_data_by_sequence(data_path):
    train_files, val_files, test_files = [], [], []
    file_list = sorted(os.listdir(data_path))  # 20210101-160505_1_160505.csv

    for file_name in file_list:
        datetime = file_name.split('-')[0]

        file_path = os.path.join(data_path, file_name)
        if datetime.startswith(('2021', '202201', '202202', '202203', '202204', '202205', '202206')):
            train_files.append(file_path)
        if datetime.startswith(('202207', '202208', '202209')):
            val_files.append(file_path)
        if datetime.startswith(('202210', '202211', '202212')):
            test_files.append(file_path)

    return (train_files, val_files, test_files)

# mont and sequence data intersect
def intersect_test_files(data_path):
    test_files = []
    file_list = sorted(os.listdir(data_path))  # 20210101-160505_1_160505.csv

    for file_name in file_list:
        datetime = file_name.split('-')[0]

        file_path = os.path.join(data_path, file_name)
        
        if not datetime.startswith(('202210', '202211', '202212')):
            continue

        datetime = file_name.split('-')[0]
        year = int(datetime[0:4])
        month = int(datetime[4:6])
        day = int(datetime[6:8])
        # 取得當月的天數
        days_in_month = calendar.monthrange(year, month)[1]
        part1_end = int(days_in_month * 0.7)
        part2_end = int(days_in_month * 0.85)
        file_path = os.path.join(data_path, file_name)

        if day > part2_end:
            test_files.append(file_path)

    return test_files

# 資料縮放器

In [4]:
def fit_scaler_on_training_data(column_name, csv_files):
    """
    預先處理和縮放數據集，將經緯度差值縮放到同一範圍。
    """
    lat_diff_list = []
    lng_diff_list = []

    for csv_file_path in csv_files:
        csv_file = os.path.basename(csv_file_path)

        data = pd.read_csv(csv_file_path, encoding='utf-8', dtype={'fileName': str, 'day': str, 'time': str})

        # 計算緯度和經度的差值
        lat_diff = data[column_name[0]].diff().dropna().reset_index(drop=True)
        lng_diff = data[column_name[1]].diff().dropna().reset_index(drop=True)

        lat_diff_list.extend(lat_diff.values)
        lng_diff_list.extend(lng_diff.values)

    # 將經緯度差值進行縮放
    combined_data = np.vstack([lat_diff_list, lng_diff_list]).T
    scaler = MinMaxScaler()
    scaler.fit(combined_data)

    return scaler

# 資料生成器

In [5]:
def csv_sliding_window_generator(csv_files, column_name, scaler, window_size, step_size, mode):
    """
    預先處理CSV的滑動窗口，並將其生成。
    """

    for csv_file_path in csv_files:
        csv_file = os.path.basename(csv_file_path)
        data = pd.read_csv(csv_file_path, encoding='utf-8',
                           dtype={'fileName': str, 'day': str, 'time': str})
        # data_diff = data[column_name].diff().dropna().reset_index(drop=True)
        if len(data) < (window_size + step_size) + 1:
            # 資料不夠長，無法進行滑動窗口處理(至少需要 window_size + step_size + 1 筆資料)
            continue
        
        # 提取 X（經過縮放）和 y（未縮放）
        lat_diff = data[column_name[0]].diff().dropna().reset_index(drop=True)
        lng_diff = data[column_name[1]].diff().dropna().reset_index(drop=True)
        combined_data = np.vstack([lat_diff.values, lng_diff.values]).T  # 經緯度差異合併

        # 使用scaler對X進行縮放
        scaled_data_diff = scaler.transform(combined_data)
        
        x, y_lat, y_lng = [], [], []
        for i in range(0, len(scaled_data_diff) - window_size - 1, 1):
            x.append(scaled_data_diff[i:i + window_size])  # X是已經經過縮放的數據

            lat_d3 = lat_diff.values[i + window_size]
            lat_d4 = lat_diff.values[i + window_size + 1]
            lng_d3 = lng_diff.values[i + window_size]
            lng_d4 = lng_diff.values[i + window_size + 1]

            y_lat.append([lat_d3, lat_d4])  # [Δ₃, Δ₄]
            y_lng.append([lng_d3, lng_d4])  # [Δ₃, Δ₄]

        for x_sample, y_lat_sample, y_lng_sample in zip(x, y_lat, y_lng):
            yield np.array(x_sample, dtype=np.float32), {  
                'lstm_lat_output': np.array(y_lat_sample, dtype=np.float32),  # 緯度差標籤不縮放
                'lstm_lng_output': np.array(y_lng_sample, dtype=np.float32)   # 經度差標籤不縮放
            }

def create_csv_sliding_window_dataset(csv_files, column_name, scaler, window_size, step_size, mode):
    """
    使用滑動窗口生成TensorFlow Dataset，避免在生成過程中處理時間序列。
    """
    dataset = tf.data.Dataset.from_generator(
        lambda: csv_sliding_window_generator(csv_files, column_name, scaler, window_size, step_size, mode),
        output_signature=(
            tf.TensorSpec(shape=(window_size, 2), dtype=tf.float32), # X shape: (window_size, 2)
            {
                'lstm_lat_output': tf.TensorSpec(shape=(step_size,), dtype=tf.float32),  # 緯度標籤
                'lstm_lng_output': tf.TensorSpec(shape=(step_size,), dtype=tf.float32)   # 經度標籤
            },
        )
        
    )
    return dataset


In [6]:
split_data_mode = 'month'  # 'month' or 'sequence' or 'old_dataset'
cells_csv_path = r'H:\cell_data_processed\cells'
# cells_csv_path = r'E:\YuCheng\cell_data_processed\cells'

if split_data_mode == 'month':
    train_files, val_files, test_files = split_data_by_month(cells_csv_path)
else:
    train_files, val_files, test_files = split_data_by_sequence(cells_csv_path)

# 使用共同測試資料區間
# test_files = intersect_test_files(cells_csv_path)
print('train_files:', len(train_files))
print('val_files:', len(val_files))
print('test_files:', len(test_files))

train_files: 55628
val_files: 14313
test_files: 14110


In [7]:
column_name = ['Latitude', 'Longitude']
# 設定滑動窗口的參數
window_size = 3  # 窗口大小
step_size = 2  # 步長

# 創建經緯度滑動窗口數據集
# scaler = fit_scaler_on_training_data(column_name, train_files)  # 擬合scaler
# joblib.dump(scaler, f'config/{split_data_mode}/lstm_multitask_scaler.gz')  # 保存scaler
scaler = joblib.load(f'config/{split_data_mode}/lstm_multitask_scaler.gz')  # 載入scaler

# 創建csv數據集
train_dataset = create_csv_sliding_window_dataset(
    train_files, column_name, scaler, window_size, step_size, mode='train')

val_dataset = create_csv_sliding_window_dataset(
    val_files, column_name, scaler, window_size, step_size, mode='val')

In [8]:
print(train_dataset.element_spec)
print(val_dataset.element_spec)

(TensorSpec(shape=(3, 2), dtype=tf.float32, name=None), {'lstm_lat_output': TensorSpec(shape=(2,), dtype=tf.float32, name=None), 'lstm_lng_output': TensorSpec(shape=(2,), dtype=tf.float32, name=None)})
(TensorSpec(shape=(3, 2), dtype=tf.float32, name=None), {'lstm_lat_output': TensorSpec(shape=(2,), dtype=tf.float32, name=None), 'lstm_lng_output': TensorSpec(shape=(2,), dtype=tf.float32, name=None)})


In [15]:
# for x, y in train_dataset.take(1):
#     print(f'X shape: {x.shape}')
#     print(f'Lat y shape: {y["lstm_lat_output"].shape}')
#     print(f'Lng y shape: {y["lstm_lng_output"].shape}')

# for x, y in val_dataset.take(1):
#     print(f'X shape: {x.shape}')
#     print(f'Lat y shape: {y["lstm_lat_output"].shape}')
#     print(f'Lng y shape: {y["lstm_lng_output"].shape}')

# 建立模型

In [10]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5,
                               verbose=1, mode='auto', restore_best_weights=True)

checkpoint = ModelCheckpoint(os.path.join(
    os.getcwd(), 'weights', split_data_mode, 'non_cumulative_diff', 'lstm_multitask', 'lstm_mt_diff3-2_e{epoch:02d}v{val_loss:.4f}'),
    monitor='val_loss', save_best_only=True)

# Multi-task LSTM model
n_in = window_size
n_features = 2
inputs = Input(shape=(n_in, n_features), name='lstm_input')

lstm_1 = LSTM(32, activation=LeakyReLU(),
              return_sequences=True, name='lstm_1')(inputs)
lstm_1 = BatchNormalization(name='lstm_1-bn_1')(lstm_1)
dropout_1 = Dropout(0.2, name='lstm_1-dropout_1')(lstm_1)

lstm_2 = LSTM(64, activation=LeakyReLU(),
              return_sequences=True, name='lstm_2')(dropout_1)
lstm_2 = BatchNormalization(name='lstm_2-bn_2')(lstm_2)
dropout_2 = Dropout(0.2, name='lstm_2-dropout_2')(lstm_2)

lstm_3 = LSTM(128, activation=LeakyReLU(),
              return_sequences=False, name='lstm_3')(dropout_2)
lstm_3 = BatchNormalization(name='lstm_3-bn_3')(lstm_3)
dropout_3 = Dropout(0.2, name='lstm_3-dropout_3')(lstm_3)

dense_1 = Dense(64, kernel_regularizer=l2(
    0.01), activation='linear', name='lstm_dense')(dropout_3)
dropout_4 = Dropout(0.2, name='lstm_dense-dropout')(dense_1)

lat_output = Dense(2, activation='linear', name='lstm_lat_output')(dropout_4)
lng_output = Dense(2, activation='linear', name='lstm_lng_output')(dropout_4)

lstm_model = Model(inputs=inputs, outputs=[
                   lat_output, lng_output], name='lstm_multi_task')

lstm_model.compile(optimizer=Adam(learning_rate=0.0001),
                   loss={'lstm_lat_output': 'mse',
                         'lstm_lng_output': 'mse'},
                   loss_weights={'lstm_lat_output': 1.0,
                                 'lstm_lng_output': 1.0},
                   metrics={'lstm_lat_output': ['mse', rmse, 'mae'],
                            'lstm_lng_output': ['mse', rmse, 'mae']})

print(lstm_model.summary())

Model: "lstm_multi_task"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 lstm_input (InputLayer)        [(None, 3, 2)]       0           []                               
                                                                                                  
 lstm_1 (LSTM)                  (None, 3, 32)        4480        ['lstm_input[0][0]']             
                                                                                                  
 lstm_1-bn_1 (BatchNormalizatio  (None, 3, 32)       128         ['lstm_1[0][0]']                 
 n)                                                                                               
                                                                                                  
 lstm_1-dropout_1 (Dropout)     (None, 3, 32)        0           ['lstm_1-bn_1[0][0]

In [11]:
# 設定訓練參數
batch_size = 32
epochs = 50

# 使用 .batch() 和 .prefetch() 進行數據集的優化加載
train_dataset = train_dataset.batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE)

val_dataset = val_dataset.batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE)

In [12]:
# 開始訓練模型
history = lstm_model.fit(
    train_dataset,
    validation_data=val_dataset,  # 傳入驗證集
    epochs=epochs,
    callbacks=[early_stopping, checkpoint],
    verbose=1  # 訓練過程中打印進度
)

Epoch 1/50
   3252/Unknown - 727s 222ms/step - loss: 1.2608 - lstm_lat_output_loss: 0.2927 - lstm_lng_output_loss: 0.2832 - lstm_lat_output_mse: 0.2927 - lstm_lat_output_rmse: 0.4255 - lstm_lat_output_mae: 0.3284 - lstm_lng_output_mse: 0.2832 - lstm_lng_output_rmse: 0.4212 - lstm_lng_output_mae: 0.3248



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e01v0.5169\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e01v0.5169\assets


Epoch 2/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e02v0.0896\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e02v0.0896\assets


Epoch 3/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e03v0.0012\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e03v0.0012\assets


Epoch 4/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e04v0.0007\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e04v0.0007\assets


Epoch 5/50
Epoch 6/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e06v0.0007\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e06v0.0007\assets


Epoch 7/50
Epoch 8/50
Epoch 9/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e09v0.0007\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e09v0.0007\assets


Epoch 10/50
Epoch 11/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e11v0.0007\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e11v0.0007\assets


Epoch 12/50
Epoch 13/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e13v0.0006\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e13v0.0006\assets


Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e18v0.0006\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e18v0.0006\assets


Epoch 19/50



INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e19v0.0006\assets


INFO:tensorflow:Assets written to: e:\YuCheng\master_thesis\multi-step\weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e19v0.0006\assets


Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 24: early stopping


# 評估模型

In [8]:
column_name = ['Latitude', 'Longitude']
# 設定滑動窗口的參數
window_size = 3  # 窗口大小
step_size = 2  # 步長

# 創建csv數據集
test_dataset = create_csv_sliding_window_dataset(
    test_files, column_name, scaler, window_size, step_size, mode='test')

In [9]:
test_dataset.element_spec

(TensorSpec(shape=(3, 2), dtype=tf.float32, name=None),
 {'lstm_lat_output': TensorSpec(shape=(2,), dtype=tf.float32, name=None),
  'lstm_lng_output': TensorSpec(shape=(2,), dtype=tf.float32, name=None)})

In [10]:
# 設定訓練參數
batch_size = 32
epochs = 50

# 使用 .batch() 和 .prefetch() 進行數據集的優化加載
test_dataset = test_dataset.batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE)

In [11]:
if split_data_mode == 'month':
    model_path = os.path.join(os.getcwd(), r'weights\month\non_cumulative_diff\lstm_multitask\lstm_mt_diff3-2_e19v0.0006')
    # pass
elif split_data_mode == 'sequence':
    pass
    # model_path = os.path.join(os.getcwd(), r'weights/sequence/lstm_multitask_0419/lstm_mt_diff2-1_e08v0.0010')
    
if os.path.exists(model_path):
    lstm_model = load_model(model_path, custom_objects={'rmse': rmse})
    print('Load model successfully!')
    print(lstm_model.summary())

Load model successfully!
Model: "lstm_multi_task"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 lstm_input (InputLayer)        [(None, 3, 2)]       0           []                               
                                                                                                  
 lstm_1 (LSTM)                  (None, 3, 32)        4480        ['lstm_input[0][0]']             
                                                                                                  
 lstm_1-bn_1 (BatchNormalizatio  (None, 3, 32)       128         ['lstm_1[0][0]']                 
 n)                                                                                               
                                                                                                  
 lstm_1-dropout_1 (Dropout)     (None, 3, 32)        0     

In [None]:
# evaluation = lstm_model.evaluate(test_dataset)

# print("=== 經緯度差（difference）評估 ===")

# print(f'Total Loss: {evaluation[0]:.5f}')         # 總損失

# print(f'Latitude Loss: {evaluation[1]:.5f}')       # 緯度 MSE
# print(f'Longitude Loss: {evaluation[2]:.5f}')      # 經度 MSE

# print(f'Latitude MSE: {evaluation[3]:.5f}')       # 緯度 MSE
# print(f'Longitude MSE: {evaluation[6]:.5f}')      # 經度 MSE

# print(f'Latitude RMSE: {evaluation[4]:.5f}')       # 緯度 RMSE
# print(f'Longitude RMSE: {evaluation[7]:.5f}')      # 經度 RMSE

# print(f'Latitude MAE: {evaluation[5]:.5f}')       # 緯度 MAE
# print(f'Longitude MAE: {evaluation[8]:.5f}')      # 經度 MAE

# import math

# # ============= 計算誤差(公里) =============
# def calculate_mae_distance(lat_mae: float, lon_mae: float, latitude: float = 25.071182):
#     lat_km = lat_mae * 111
#     lon_km = lon_mae * 111 * math.cos(math.radians(latitude))
#     return math.sqrt(lat_km**2 + lon_km**2)

# # 範例數據：經度差 MAE = 0.02，緯度差 MAE = 0.01，五分山雷達站的緯度 = 23.5
# lat_mae = evaluation[5]
# lon_mae = evaluation[8]
# # 固定緯度
# # 五分山雷達站的經緯度
# center_lat = 25.071182
# center_lon = 121.781205

# mae_distance = calculate_mae_distance(lat_mae, lon_mae, center_lat)
# print(f"Average distance error (via MAE): {mae_distance:.2f} km")

In [12]:
from haversine import haversine

distances = []
lat_errors = []
lng_errors = []

# 分開統計 t+1 / t+2 的誤差
lat_errors_1, lng_errors_1, dist_1 = [], [], []
lat_errors_2, lng_errors_2, dist_2 = [], [], []

for csv_file in test_files:
    df = pd.read_csv(csv_file)

    if len(df) < (window_size + step_size) + 1:
        # 資料不夠長，無法進行滑動窗口處理(至少需要 window_size + step_size + 1 筆資料)
        print(f"資料長度不足，跳過 {csv_file}")
        continue

    # 真實經緯度序列
    lats = df['Latitude'].values
    lngs = df['Longitude'].values

    # 差分計算
    delta_lat = np.diff(lats)
    delta_lng = np.diff(lngs)
    combined_data = np.vstack([delta_lat, delta_lng]).T

    # 特徵縮放
    scaled_data_diff = scaler.transform(combined_data)

    # 準備滑動窗口資料
    inputs = []
    for i in range(0, len(scaled_data_diff) - window_size - 1, 1):
        # input_sample = [scaled_data_diff[i], scaled_data_diff[i + 1]]
        input_sample = scaled_data_diff[i : i + window_size]
        inputs.append(input_sample)

    inputs = np.array(inputs)

    # 模型預測差值
    pred_lats_diff, pred_lngs_diff = lstm_model.predict(inputs)

    # 差值還原回經緯度
    pred_lats = []
    pred_lngs = []
    for i, (dlat, dlng) in enumerate(zip(pred_lats_diff, pred_lngs_diff)):
        base_lat = lats[i + window_size]
        base_lng = lngs[i + window_size]
        
        pred_lat1 = base_lat + dlat[0]
        pred_lat2 = pred_lat1 + dlat[1]
        pred_lng1 = base_lng + dlng[0]
        pred_lng2 = pred_lng1 + dlng[1]

        pred_lats.append([pred_lat1, pred_lat2])
        pred_lngs.append([pred_lng1, pred_lng2])

    # 計算實際誤差
    for i in range(len(pred_lats)):
        real_lat1 = lats[i + window_size + 1]
        real_lng1 = lngs[i + window_size + 1]
        real_lat2 = lats[i + window_size + 2]
        real_lng2 = lngs[i + window_size + 2]

        pred_lat1 = pred_lats[i][0]
        pred_lng1 = pred_lngs[i][0]
        pred_lat2 = pred_lats[i][1]
        pred_lng2 = pred_lngs[i][1]

        # 全體誤差
        lat_errors.extend([real_lat1 - pred_lat1, real_lat2 - pred_lat2])
        lng_errors.extend([real_lng1 - pred_lng1, real_lng2 - pred_lng2])
        distances.extend([
            haversine((real_lat1, real_lng1), (pred_lat1, pred_lng1)),
            haversine((real_lat2, real_lng2), (pred_lat2, pred_lng2)),
        ])

        # t+1
        lat_errors_1.append(real_lat1 - pred_lat1)
        lng_errors_1.append(real_lng1 - pred_lng1)
        dist_1.append(haversine((real_lat1, real_lng1), (pred_lat1, pred_lng1)))

        # t+2
        lat_errors_2.append(real_lat2 - pred_lat2)
        lng_errors_2.append(real_lng2 - pred_lng2)
        dist_2.append(haversine((real_lat2, real_lng2), (pred_lat2, pred_lng2)))

# ========= 全部預測誤差 =========
lat_mae_pos = np.mean(np.abs(lat_errors))
lng_mae_pos = np.mean(np.abs(lng_errors))
lat_mse_pos = np.mean(np.square(lat_errors))
lng_mse_pos = np.mean(np.square(lng_errors))
lat_rmse_pos = np.sqrt(lat_mse_pos)
lng_rmse_pos = np.sqrt(lng_mse_pos)
avg_distance = np.mean(distances)

print("=== 經緯度位置（還原後）總體評估 ===")
print(f'Latitude MSE: {lat_mse_pos:.6f} 度')
print(f'Longitude MSE: {lng_mse_pos:.6f} 度')
print(f'Latitude MAE: {lat_mae_pos:.6f} 度')
print(f'Longitude MAE: {lng_mae_pos:.6f} 度')
print(f'Latitude RMSE: {lat_rmse_pos:.6f}')
print(f'Longitude RMSE: {lng_rmse_pos:.6f}')
print(f'Average Haversine distance error: {avg_distance:.3f} km\n')

# ========= t+1 預測誤差 =========
lat1_mae = np.mean(np.abs(lat_errors_1))
lng1_mae = np.mean(np.abs(lng_errors_1))
lat1_mse = np.mean(np.square(lat_errors_1))
lng1_mse = np.mean(np.square(lng_errors_1))
lat1_rmse = np.sqrt(np.mean(np.square(lat_errors_1)))
lng1_rmse = np.sqrt(np.mean(np.square(lng_errors_1)))
dist1_avg = np.mean(dist_1)

print("=== 第一步預測 (t+1) 評估 ===")
print(f'Latitude MSE: {lat1_mse:.6f} 度')
print(f'Longitude MSE: {lng1_mse:.6f} 度')
print(f'Latitude MAE: {lat1_mae:.6f} 度')
print(f'Longitude MAE: {lng1_mae:.6f} 度')
print(f'Latitude RMSE: {lat1_rmse:.6f}')
print(f'Longitude RMSE: {lng1_rmse:.6f}')
print(f'Average Haversine distance error: {dist1_avg:.3f} km\n')

# ========= t+2 預測誤差 =========
lat2_mae = np.mean(np.abs(lat_errors_2))
lng2_mae = np.mean(np.abs(lng_errors_2))
lat2_mse = np.mean(np.square(lat_errors_2))
lng2_mse = np.mean(np.square(lng_errors_2))
lat2_rmse = np.sqrt(np.mean(np.square(lat_errors_2)))
lng2_rmse = np.sqrt(np.mean(np.square(lng_errors_2)))
dist2_avg = np.mean(dist_2)

print("=== 第二步預測 (t+2) 評估 ===")
print(f'Latitude MSE: {lat2_mse:.6f} 度')
print(f'Longitude MSE: {lng2_mse:.6f} 度')
print(f'Latitude MAE: {lat2_mae:.6f} 度')
print(f'Longitude MAE: {lng2_mae:.6f} 度')
print(f'Latitude RMSE: {lat2_rmse:.6f}')
print(f'Longitude RMSE: {lng2_rmse:.6f}')
print(f'Average Haversine distance error: {dist2_avg:.3f} km')


資料長度不足，跳過 H:\cell_data_processed\cells\20210127-014708_2_014708.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210127-210009_1_210009.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210127-213547_3_213547.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210127-231049_2_231049.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210127-235823_8_235823.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210128-004241_11_004241.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210128-012415_16_012415.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210224-000019_68_000019.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210224-000615_80_000615.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210224-001212_78_001212.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210224-001212_82_001212.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210224-005942_83_005942.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210224-012922_93_012922.csv
資料長度不足，跳過 H:\cell_data_processed\cells\20210224-015309_93_015309.csv
資料長度不足，跳過 H:\cell_data_processed\cells\