In [1]:
import os
import cv2
import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
import calendar
from modules.metrics import rmse
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import (
    Input, LSTM, ConvLSTM2D, Dense,
    Dropout, SpatialDropout2D, MaxPooling2D, BatchNormalization,
    TimeDistributed, LeakyReLU, Flatten, Average, Concatenate
)

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
print(gpus)

1 Physical GPUs, 1 Logical GPUs
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Ë≥áÊñôÂàÜÂâ≤

In [3]:
# split data by month
def split_data_by_month(data_path):
    train_files, val_files, test_files = [], [], []
    file_list = sorted(os.listdir(data_path))  # 20210101-160505_1_160505.csv
    train_cases_to_exclude = {'20210116', '20210530', '20210825', '20210722', '20220904'}

    for file_name in file_list:
        datetime = file_name.split('-')[0]
        year = int(datetime[0:4])
        month = int(datetime[4:6])
        day = int(datetime[6:8])
        # ÂèñÂæóÁï∂ÊúàÁöÑÂ§©Êï∏
        days_in_month = calendar.monthrange(year, month)[1]
        part1_end = int(days_in_month * 0.7)
        part2_end = int(days_in_month * 0.85)

        file_path = os.path.join(data_path, file_name)

        # ÂÄãÊ°àÊéíÈô§ÔºåÂä†ÂÖ•ÂÄíÊ∏¨Ë©¶ÈõÜ
        if datetime in train_cases_to_exclude:
            test_files.append(file_path)
            continue

        if day <= part1_end:
            train_files.append(file_path)
        elif day <= part2_end:
            val_files.append(file_path)
        else:
            test_files.append(file_path)

    return (train_files, val_files, test_files)

# split data by sequence
def split_data_by_sequence(data_path):
    train_files, val_files, test_files = [], [], []
    file_list = sorted(os.listdir(data_path))  # 20210101-160505_1_160505.csv

    train_cases_to_exclude = {'20210116', '20210530', '20210825', '20210722', '20220904'}

    for file_name in file_list:
        datetime = file_name.split('-')[0]

        file_path = os.path.join(data_path, file_name)
        # Âä†ÂÖ•Ëá≥Ê∏¨Ë©¶ÈõÜ
        if any(datetime.startswith(case) for case in train_cases_to_exclude):
            test_files.append(file_path)
            continue # Âä†ÂÖ•Ê∏¨Ë©¶ÈõÜÂæåË∑≥ÈÅéÁï∂ÂâçËø¥ÂúàÔºåÁÇ∫‰∫Ü‰∏çÂ∞áÂêå‰∏ÄÊ™îÊ°àÂä†ÂÖ•Ë®ìÁ∑¥ÈõÜÊàñÈ©óË≠âÈõÜ
        if datetime.startswith(('2021', '202201', '202202', '202203', '202204', '202205', '202206')):
            train_files.append(file_path)
        if datetime.startswith(('202207', '202208', '202209')):
            val_files.append(file_path)
        if datetime.startswith(('202210', '202211', '202212')):
            test_files.append(file_path)

    return (train_files, val_files, test_files)


# mont and sequence data intersect
def intersect_test_files(cells_radar_path, cells_csv_path):
    test_radar_files, test_csv_files = [], []
    file_list = sorted(os.listdir(cells_radar_path))  # 20210101-160505_1_160505.csv

    for file_name in file_list:
        datetime = file_name.split('-')[0]

        file_path = os.path.join(cells_radar_path, file_name)
        
        if not datetime.startswith(('202210', '202211', '202212')):
            continue

        datetime = file_name.split('-')[0]
        year = int(datetime[0:4])
        month = int(datetime[4:6])
        day = int(datetime[6:8])
        # ÂèñÂæóÁï∂ÊúàÁöÑÂ§©Êï∏
        days_in_month = calendar.monthrange(year, month)[1]
        part1_end = int(days_in_month * 0.7)
        part2_end = int(days_in_month * 0.85)
        file_path = os.path.join(cells_radar_path, file_name)

        if day > part2_end:
            test_radar_files.append(file_path)

    def make_csv_paths(radar_files):
        return [os.path.join(cells_csv_path, os.path.basename(f) + '.csv') for f in radar_files]
    test_csv_files = make_csv_paths(test_radar_files)

    return test_radar_files, test_csv_files


def get_cells_csv_files(cells_csv_path, train_radar_files, val_radar_files, test_radar_files):
    def make_csv_paths(radar_files):
        return [os.path.join(cells_csv_path, os.path.basename(f) + '.csv') for f in radar_files]

    train_cells_files = make_csv_paths(train_radar_files)
    val_cells_files = make_csv_paths(val_radar_files)
    test_cells_files = make_csv_paths(test_radar_files)

    return train_cells_files, val_cells_files, test_cells_files

# Ë≥áÊñôÁîüÊàêÂô®

In [4]:
def radar_grid_diff_processing(radar_grids):
    diff_radar_grids = []
    for i in range(len(radar_grids) - 1):
        radar_grid_diff = cv2.absdiff(radar_grids[i + 1], radar_grids[i])
        diff_radar_grids.append(radar_grid_diff)
    return np.expand_dims(np.array(diff_radar_grids), axis=-1)

def multimodal_sliding_window_generator(radar_files, csv_files, column_name, window_size, step_size, height, width, channels, scaler, mode):
    """
    Ê†ºÈªûÂíåCSVÊªëÂãïÁ™óÂè£ÁîüÊàêÂô®ÔºåÂæûÊ†ºÈªûË≥áÊñôÂ§æ‰∏≠ËÆÄÂèñÊ†ºÈªûË≥áÊñôÔºåÂæûCSV‰∏≠ËÆÄÂèñyÂÄº„ÄÇ
    """
    for radar_folder_path, csv_file_path in zip(radar_files, csv_files):
        # ËÆÄÂèñÊ†ºÈªûÊï∏Êìö‰∏¶ËôïÁêÜÂ∑ÆÂÄº
        radar_grids = []
        radar_grid_names = sorted(os.listdir(radar_folder_path))
        for radar_grid_name in radar_grid_names:
            radar_grid_path = os.path.join(radar_folder_path, radar_grid_name)
            radar_grid = np.load(radar_grid_path)
            radar_grid_resized = cv2.resize(radar_grid, (height, width))
            radar_grids.append(radar_grid_resized)

        diff_radar_grids = radar_grid_diff_processing(np.array(radar_grids))

        # ËÆÄÂèñCSVÊï∏Êìö‰∏¶Ë®àÁÆóÂ∑ÆÂÄº
        data = pd.read_csv(csv_file_path, encoding='utf-8',
                           dtype={'fileName': str, 'day': str, 'time': str})
        data_diff = data[column_name].diff().dropna().reset_index(drop=True)

        # ÊèêÂèñ y ÂÄº
        lat_diff = data[column_name[0]].diff().dropna().reset_index(drop=True)
        lng_diff = data[column_name[1]].diff().dropna().reset_index(drop=True)

        # lstm Ëº∏ÂÖ•
        combined_data = np.vstack([lat_diff.values, lng_diff.values]).T  # Á∂ìÁ∑ØÂ∫¶Â∑ÆÁï∞Âêà‰Ωµ
        
        # ‰ΩøÁî®scalerÂ∞çXÈÄ≤Ë°åÁ∏ÆÊîæ
        scaled_data_diff = scaler.transform(combined_data)

        # Á¢∫‰øùÊ†ºÈªûÂíåCSVÊï∏ÊìöÁöÑÈï∑Â∫¶Áõ∏Âêå
        if len(diff_radar_grids) != len(data_diff):
            continue

        # ÁîüÊàêÊªëÂãïÁ™óÂè£
        x_lstm, x_convlstm, y_lat, y_lng = [], [], [], []
        for i in range(0, len(diff_radar_grids) - window_size, step_size):
            # Êï∏ÂÄºÁ™óÂè£
            x_lstm.append(scaled_data_diff[i:i + window_size])
            # Ê†ºÈªûÁ™óÂè£
            x_convlstm.append(diff_radar_grids[i:i + window_size])
            # Â∞áÊï∏ÂÄºË≥áÊñôÁöÑyÂÄº‰ΩúÁÇ∫Ê®ôÁ±§
            y_lat.append(lat_diff.values[i + window_size])
            y_lng.append(lng_diff.values[i + window_size])

        for x_lstm_sample, x_convlstm_sample, y_lat_sample, y_lng_sample in zip(x_lstm, x_convlstm, y_lat, y_lng):
            # Â∞áÂúñÂÉèËΩâÊèõÁÇ∫ÈúÄË¶ÅÁöÑÊï∏ÊìöÈ°ûÂûãÂíåÊ†ºÂºè
            yield {
                'lstm_input': np.array(x_lstm_sample, dtype=np.float32),  # LSTM Ëº∏ÂÖ•
                'convlstm_input': np.array(x_convlstm_sample, dtype=np.float32)  # ConvLSTM Ëº∏ÂÖ•
            },{
                'multimodal_lat_output': np.array([y_lat_sample], dtype=np.float32),  # Á∑ØÂ∫¶Â∑ÆÊ®ôÁ±§‰∏çÁ∏ÆÊîæ
                'multimodal_lng_output': np.array([y_lng_sample], dtype=np.float32)   # Á∂ìÂ∫¶Â∑ÆÊ®ôÁ±§‰∏çÁ∏ÆÊîæ
            }


def create_multimodal_sliding_window_dataset(radar_files, csv_files, column_name, window_size, step_size, height, width, channels, scaler, mode):
    """
    ‰ΩøÁî®ÊªëÂãïÁ™óÂè£ÁîüÊàêÂ§öÊ®°ÊÖãTensorFlow DatasetÔºåÂ∞çÊáâLSTMÂíåConvLSTMËº∏ÂÖ•„ÄÇ
    """
    dataset = tf.data.Dataset.from_generator(
        lambda: multimodal_sliding_window_generator(
            radar_files, csv_files, column_name, window_size, step_size, height, width, channels, scaler, mode),
        output_signature=(
            {
                'lstm_input': tf.TensorSpec(shape=(window_size, 2), dtype=tf.float32),  # LSTM Ëº∏ÂÖ•
                'convlstm_input': tf.TensorSpec(shape=(window_size, height, width, channels), dtype=tf.float32)  # ConvLSTM Ëº∏ÂÖ•ÔºàÂÅáË®≠ÂúñÂÉèÊòØ128x128ÔºåÂñÆÈÄöÈÅìÔºâ
            },
            {
                'multimodal_lat_output': tf.TensorSpec(shape=(1,), dtype=tf.float32),  # Á∑ØÂ∫¶Ëº∏Âá∫
                'multimodal_lng_output': tf.TensorSpec(shape=(1,), dtype=tf.float32)   # Á∂ìÂ∫¶Ëº∏Âá∫
            }
        )
    )
    return dataset

In [5]:
split_data_mode = 'month'  # 'month' or 'sequence' or 'old_dataset'

cells_csv_path = r'H:\cell_data_processed\cells'
cells_radar_path = r'H:\cell_data_processed\radar\grids\global'

if split_data_mode == 'month':
    train_radar_files, val_radar_files, test_radar_files = split_data_by_month(
        cells_radar_path)
    train_csv_files, val_csv_files, test_csv_files = get_cells_csv_files(
        cells_csv_path, train_radar_files=train_radar_files, val_radar_files=val_radar_files, test_radar_files=test_radar_files)
else:
    train_radar_files, val_radar_files, test_radar_files = split_data_by_sequence(
        cells_radar_path)
    train_csv_files, val_csv_files, test_csv_files = get_cells_csv_files(
        cells_csv_path, train_radar_files=train_radar_files, val_radar_files=val_radar_files, test_radar_files=test_radar_files)

# test_radar_files, test_csv_files = intersect_test_files(cells_radar_path, cells_csv_path)

print('train_radar_files:', len(train_radar_files))
print('val_radar_files:', len(val_radar_files))
print('test_radar_files:', len(test_radar_files))
print('---' * 10)
print('train_csv_files:', len(train_csv_files))
print('val_csv_files:', len(val_csv_files))
print('test_csv_files:', len(test_csv_files))

train_radar_files: 53023
val_radar_files: 13103
test_radar_files: 14647
------------------------------
train_csv_files: 53023
val_csv_files: 13103
test_csv_files: 14647


In [6]:
column_name = ['Latitude', 'Longitude']
# Ë®≠ÂÆöÊªëÂãïÁ™óÂè£ÁöÑÂèÉÊï∏
window_size = 2  # Á™óÂè£Â§ßÂ∞è
step_size = 1  # Ê≠•Èï∑

height, width, channels = 224, 224, 1 # Ê†ºÈªûÂ§ßÂ∞èÁÇ∫224x224, Ê†ºÈªû

# Á∏ÆÊîæÂô®
scaler = joblib.load(f'config/{split_data_mode}/lstm_multitask_scaler.gz')  # ËºâÂÖ•scaler

# ÂâµÂª∫radarÊ†ºÈªûÂíåCSVÊï∏ÊìöÈõÜ
train_dataset = create_multimodal_sliding_window_dataset(
    train_radar_files, train_csv_files, column_name, window_size, step_size, height, width, channels, scaler, mode='train')

val_dataset = create_multimodal_sliding_window_dataset(
    val_radar_files, val_csv_files, column_name, window_size, step_size, height, width, channels, scaler, mode='val')

In [7]:
print(train_dataset.element_spec)
print(val_dataset.element_spec)

({'lstm_input': TensorSpec(shape=(2, 2), dtype=tf.float32, name=None), 'convlstm_input': TensorSpec(shape=(2, 224, 224, 1), dtype=tf.float32, name=None)}, {'multimodal_lat_output': TensorSpec(shape=(1,), dtype=tf.float32, name=None), 'multimodal_lng_output': TensorSpec(shape=(1,), dtype=tf.float32, name=None)})
({'lstm_input': TensorSpec(shape=(2, 2), dtype=tf.float32, name=None), 'convlstm_input': TensorSpec(shape=(2, 224, 224, 1), dtype=tf.float32, name=None)}, {'multimodal_lat_output': TensorSpec(shape=(1,), dtype=tf.float32, name=None), 'multimodal_lng_output': TensorSpec(shape=(1,), dtype=tf.float32, name=None)})


# Âª∫Á´ãÊ®°Âûã

In [9]:
if split_data_mode == 'month':
    lstm_model_name = 'lstm_mt_diff2-1_e08v0.0005'
# else:  # 'sequence'
#     lstm_model_name = 'lstm_mt_diff2-1_e06v0.0011'

# lstm model
lstm_model = load_model(
    rf'weights\{split_data_mode}\lstm_multitask\{lstm_model_name}',
    custom_objects={'rmse': rmse}
)

lstm_model.summary()

Model: "lstm_multi_task"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 lstm_input (InputLayer)        [(None, 2, 2)]       0           []                               
                                                                                                  
 lstm_1 (LSTM)                  (None, 2, 32)        4480        ['lstm_input[0][0]']             
                                                                                                  
 lstm_1-bn_1 (BatchNormalizatio  (None, 2, 32)       128         ['lstm_1[0][0]']                 
 n)                                                                                               
                                                                                                  
 lstm_1-dropout_1 (Dropout)     (None, 2, 32)        0           ['lstm_1-bn_1[0][0]

In [10]:
if split_data_mode == 'month':
    convlstm_model_name = 'convlstm_mt_diff2-1_e01v0.0014'
# else:  # 'sequence'
#     convlstm_model_name = 'convlstm_mt_diff2-1_e01v0.0018'

convlstm_model = load_model(
    rf'weights\{split_data_mode}\convlstm_multitask\{convlstm_model_name}',
    custom_objects={'rmse': rmse}
)
convlstm_model.summary()

Model: "convlstm_multi_task"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 convlstm_input (InputLayer)    [(None, 2, 224, 224  0           []                               
                                , 1)]                                                             
                                                                                                  
 convlstm_1 (ConvLSTM2D)        (None, 2, 224, 224,  38144       ['convlstm_input[0][0]']         
                                 32)                                                              
                                                                                                  
 time-distributed_1-1 (TimeDist  (None, 2, 224, 224,  128        ['convlstm_1[0][0]']             
 ributed)                        32)                                            

In [11]:
# ÂáçÁµê ConvLSTM Âíå LSTM Â≠êÊ®°ÂûãÁöÑÂ±§
for layer in convlstm_model.layers:
    layer.trainable = False

for layer in lstm_model.layers:
    layer.trainable = False

In [12]:
# Ëº∏Âá∫Â±§ËûçÂêà
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Concatenate, Softmax, Multiply, Lambda, Add
from tensorflow.keras.models import Model

early_stopping = EarlyStopping(monitor='val_loss', patience=5,
                               verbose=1, mode='auto', restore_best_weights=True)

checkpoint = ModelCheckpoint(os.path.join(
    os.getcwd(), 'weights', split_data_mode, 'multimodal_multitask_output-level_fusion', 'attention', 'multimodal_mt_diff2-1_e{epoch:02d}v{val_loss:.4f}'),
    monitor='val_loss', save_best_only=True)

# ÂÅáË®≠ÈÄôÊòØÂ∑≤Á∂ìË®ìÁ∑¥Â•ΩÁöÑÂ≠êÊ®°ÂûãËº∏Âá∫Ôºàshape ÈÉΩÊòØ (None, 1)Ôºâ
conv_lat_output = convlstm_model.output[0]
lstm_lat_output = lstm_model.output[0]
conv_lng_output = convlstm_model.output[1]
lstm_lng_output = lstm_model.output[1]

# --- Á∑ØÂ∫¶ÁöÑÊ≥®ÊÑèÂäõËûçÂêà ---
# ÊãºÊé•ÂÖ©ÂÄãËº∏Âá∫Ôºöshape = (None, 2)
lat_concat = Concatenate(name='lat_concat')([conv_lat_output, lstm_lat_output])

# ÈÄöÈÅé‰∏ÄÂÄã Dense Â±§Ë®àÁÆóÊ≥®ÊÑèÂäõÊ¨äÈáçÔºàÊ≥®ÊÑèÔºösoftmaxÔºâ
lat_attention = Dense(2, activation='softmax', name='lat_attention')(lat_concat)

# ÊãÜÂàÜÊ¨äÈáçÔºàÂàÜÂà•Â∞çÊáâ conv Âíå lstmÔºâ
lat_weight_conv = Lambda(lambda x: x[:, 0:1])(lat_attention)
lat_weight_lstm = Lambda(lambda x: x[:, 1:2])(lat_attention)

# Âä†Ê¨ä‰πòÊ≥ï
lat_weighted_conv = Multiply()([lat_weight_conv, conv_lat_output])
lat_weighted_lstm = Multiply()([lat_weight_lstm, lstm_lat_output])

# ËûçÂêàÊúÄÁµÇËº∏Âá∫
lat_fused = Add(name='multimodal_lat_output')([lat_weighted_conv, lat_weighted_lstm])

# --- Á∂ìÂ∫¶ÁöÑÊ≥®ÊÑèÂäõËûçÂêà ---
lng_concat = Concatenate(name='lng_concat')([conv_lng_output, lstm_lng_output])
lng_attention = Dense(2, activation='softmax', name='lng_attention')(lng_concat)

lng_weight_conv = Lambda(lambda x: x[:, 0:1])(lng_attention)
lng_weight_lstm = Lambda(lambda x: x[:, 1:2])(lng_attention)

lng_weighted_conv = Multiply()([lng_weight_conv, conv_lng_output])
lng_weighted_lstm = Multiply()([lng_weight_lstm, lstm_lng_output])

lng_fused = Add(name='multimodal_lng_output')([lng_weighted_conv, lng_weighted_lstm])

# --- ÂÆöÁæ©ËûçÂêàÊ®°Âûã ---
output_level_fusion_model = Model(
    inputs=[convlstm_model.input, lstm_model.input],
    outputs=[lat_fused, lng_fused],
    name='multimodal_output-level_fusion_model'
)

# Á∑®Ë≠ØÊ®°Âûã
output_level_fusion_model.compile(optimizer=Adam(learning_rate=0.0001),
                                  loss={'multimodal_lat_output': 'mse',
                                        'multimodal_lng_output': 'mse'},
                                  loss_weights={'multimodal_lat_output': 1.0, 
                                                'multimodal_lng_output': 1.0},
                                  metrics={'multimodal_lat_output': ['mse', rmse, 'mae'],
                                           'multimodal_lng_output': ['mse', rmse, 'mae']})

print(output_level_fusion_model.summary())

Model: "multimodal_output-level_fusion_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 convlstm_input (InputLayer)    [(None, 2, 224, 224  0           []                               
                                , 1)]                                                             
                                                                                                  
 convlstm_1 (ConvLSTM2D)        (None, 2, 224, 224,  38144       ['convlstm_input[0][0]']         
                                 32)                                                              
                                                                                                  
 lstm_input (InputLayer)        [(None, 2, 2)]       0           []                               
                                                               

In [13]:
# Ëº∏Âá∫Â±§ËûçÂêà Â¢ûÂä† Dense Â±§
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Concatenate, Softmax, Multiply, Lambda, Add
from tensorflow.keras.models import Model

early_stopping = EarlyStopping(monitor='val_loss', patience=5,
                               verbose=1, mode='auto', restore_best_weights=True)

checkpoint = ModelCheckpoint(os.path.join(
    os.getcwd(), 'weights', split_data_mode, 'multimodal_multitask_output-level_fusion-add_desne', 'attention', 'multimodal_mt_diff2-1_e{epoch:02d}v{val_loss:.4f}'),
    monitor='val_loss', save_best_only=True)

# ÂÅáË®≠ÈÄôÊòØÂ∑≤Á∂ìË®ìÁ∑¥Â•ΩÁöÑÂ≠êÊ®°ÂûãËº∏Âá∫Ôºàshape ÈÉΩÊòØ (None, 1)Ôºâ
conv_lat_output = convlstm_model.output[0]
lstm_lat_output = lstm_model.output[0]
conv_lng_output = convlstm_model.output[1]
lstm_lng_output = lstm_model.output[1]

# --- Á∑ØÂ∫¶ÁöÑÊ≥®ÊÑèÂäõËûçÂêà ---
# ÊãºÊé•ÂÖ©ÂÄãËº∏Âá∫Ôºöshape = (None, 2)
lat_concat = Concatenate(name='lat_concat')([conv_lat_output, lstm_lat_output])

# ÈÄöÈÅé‰∏ÄÂÄã Dense Â±§Ë®àÁÆóÊ≥®ÊÑèÂäõÊ¨äÈáçÔºàÊ≥®ÊÑèÔºösoftmaxÔºâ
lat_attention = Dense(2, activation='softmax', name='lat_attention')(lat_concat)

# ÊãÜÂàÜÊ¨äÈáçÔºàÂàÜÂà•Â∞çÊáâ conv Âíå lstmÔºâ
lat_weight_conv = Lambda(lambda x: x[:, 0:1])(lat_attention)  # shape=(None, 1)
lat_weight_lstm = Lambda(lambda x: x[:, 1:2])(lat_attention)

# Âä†Ê¨ä‰πòÊ≥ï
lat_weighted_conv = Multiply()([lat_weight_conv, conv_lat_output])
lat_weighted_lstm = Multiply()([lat_weight_lstm, lstm_lat_output])

# ËûçÂêàÊúÄÁµÇËº∏Âá∫
lat_fused = Add(name='multimodal_lat_fused')([lat_weighted_conv, lat_weighted_lstm])

# --- Á∂ìÂ∫¶ÁöÑÊ≥®ÊÑèÂäõËûçÂêà ---
lng_concat = Concatenate(name='lng_concat')([conv_lng_output, lstm_lng_output])
lng_attention = Dense(2, activation='softmax', name='lng_attention')(lng_concat)

lng_weight_conv = Lambda(lambda x: x[:, 0:1])(lng_attention)
lng_weight_lstm = Lambda(lambda x: x[:, 1:2])(lng_attention)

lng_weighted_conv = Multiply()([lng_weight_conv, conv_lng_output])
lng_weighted_lstm = Multiply()([lng_weight_lstm, lstm_lng_output])

lng_fused = Add(name='multimodal_lng_fused')([lng_weighted_conv, lng_weighted_lstm])

# üîΩ Êñ∞Â¢ûÊ∑±Â±§ Dense block
dense_lat_1 = Dense(64, activation=LeakyReLU(), kernel_regularizer=l2(0.01), name='dense_lat_1')(lat_fused)
bn_lat_1 = BatchNormalization(name='bn_lat_1')(dense_lat_1)
dropout_lat_1 = Dropout(0.2, name='dropout_lat_1')(bn_lat_1)

dense_lat_2 = Dense(32, activation=LeakyReLU(), kernel_regularizer=l2(0.01), name='dense_lat_2')(dropout_lat_1)
bn_lat_2 = BatchNormalization(name='bn_lat_2')(dense_lat_2)
dropout_lat_2 = Dropout(0.2, name='dropout_lat_2')(bn_lat_2)

dense_lng_1 = Dense(64, activation=LeakyReLU(), kernel_regularizer=l2(0.01), name='dense_lng_1')(lng_fused)
bn_lng_1 = BatchNormalization(name='bn_lng_1')(dense_lng_1)
dropout_lng_1 = Dropout(0.2, name='dropout_lng_1')(bn_lng_1)

dense_lng_2 = Dense(32, activation=LeakyReLU(), kernel_regularizer=l2(0.01), name='dense_lng_2')(dropout_lng_1)
bn_lng_2 = BatchNormalization(name='bn_lng_2')(dense_lng_2)
dropout_lng_2 = Dropout(0.2, name='dropout_lng_2')(bn_lng_2)

# Êé• Dense Â±§ÈÄ≤Ë°åÈÄ≤‰∏ÄÊ≠•Â≠∏Áøí
lat_final_dense = Dense(1, activation='linear', name='multimodal_lat_output')(dropout_lat_2)
lng_final_dense = Dense(1, activation='linear', name='multimodal_lng_output')(dropout_lng_2)

# --- ÂÆöÁæ©ËûçÂêàÊ®°Âûã ---
output_level_fusion_model = Model(
    inputs=[convlstm_model.input, lstm_model.input],
    outputs=[lat_final_dense, lng_final_dense],
    name='multimodal_output-level_fusion_model'
)

# Á∑®Ë≠ØÊ®°Âûã
output_level_fusion_model.compile(optimizer=Adam(learning_rate=0.0001),
                                  loss={'multimodal_lat_output': 'mse',
                                        'multimodal_lng_output': 'mse'},
                                  loss_weights={'multimodal_lat_output': 1.0, 
                                                'multimodal_lng_output': 1.0},
                                  metrics={'multimodal_lat_output': ['mse', rmse, 'mae'],
                                           'multimodal_lng_output': ['mse', rmse, 'mae']})

print(output_level_fusion_model.summary())

Model: "multimodal_output-level_fusion_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 convlstm_input (InputLayer)    [(None, 2, 224, 224  0           []                               
                                , 1)]                                                             
                                                                                                  
 convlstm_1 (ConvLSTM2D)        (None, 2, 224, 224,  38144       ['convlstm_input[0][0]']         
                                 32)                                                              
                                                                                                  
 lstm_input (InputLayer)        [(None, 2, 2)]       0           []                               
                                                               

In [14]:
# Ë®≠ÂÆöË®ìÁ∑¥ÂèÉÊï∏
batch_size = 4
epochs = 50

# ‰ΩøÁî® .batch() Âíå .prefetch() ÈÄ≤Ë°åÊï∏ÊìöÈõÜÁöÑÂÑ™ÂåñÂä†Ëºâ
train_dataset = train_dataset.batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE)

val_dataset = val_dataset.batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE)

In [15]:
# ÈñãÂßãË®ìÁ∑¥Ê®°Âûã
history = output_level_fusion_model.fit(
    train_dataset,
    validation_data=val_dataset,  # ÂÇ≥ÂÖ•È©óË≠âÈõÜ
    epochs=epochs,
    callbacks=[early_stopping, checkpoint],
    verbose=1  # Ë®ìÁ∑¥ÈÅéÁ®ã‰∏≠ÊâìÂç∞ÈÄ≤Â∫¶
)

Epoch 1/50


KeyboardInterrupt: 

# Ë©ï‰º∞Ê®°Âûã

In [8]:
column_name = ['Latitude', 'Longitude']
# Ë®≠ÂÆöÊªëÂãïÁ™óÂè£ÁöÑÂèÉÊï∏
window_size = 2  # Á™óÂè£Â§ßÂ∞è
step_size = 1  # Ê≠•Èï∑

height, width, channels = 224, 224, 1 # Ê†ºÈªûÂ§ßÂ∞èÁÇ∫224x224, Ê†ºÈªû

# Á∏ÆÊîæÂô®
scaler = joblib.load(f'config/{split_data_mode}/lstm_multitask_scaler.gz')  # ËºâÂÖ•scaler

# ÂâµÂª∫radarÊ†ºÈªûÂíåCSVÊï∏ÊìöÈõÜ
test_dataset = create_multimodal_sliding_window_dataset(
    test_radar_files, test_csv_files, column_name, window_size, step_size, height, width, channels, scaler, mode='test')

In [9]:
test_dataset.element_spec

({'lstm_input': TensorSpec(shape=(2, 2), dtype=tf.float32, name=None),
  'convlstm_input': TensorSpec(shape=(2, 224, 224, 1), dtype=tf.float32, name=None)},
 {'multimodal_lat_output': TensorSpec(shape=(1,), dtype=tf.float32, name=None),
  'multimodal_lng_output': TensorSpec(shape=(1,), dtype=tf.float32, name=None)})

In [10]:
# Ë®≠ÂÆöË®ìÁ∑¥ÂèÉÊï∏
batch_size = 4

# ‰ΩøÁî® .batch() Âíå .prefetch() ÈÄ≤Ë°åÊï∏ÊìöÈõÜÁöÑÂÑ™ÂåñÂä†Ëºâ
test_dataset = test_dataset.batch(batch_size)\
                .prefetch(tf.data.experimental.AUTOTUNE)

In [11]:
if split_data_mode == 'month':
    model_path = os.path.join(os.getcwd(), r'weights\month\multimodal_multitask_output-level_fusion\attention\multimodal_mt_diff2-1_e15v0.0006')
# elif split_data_mode == 'sequence':
#     model_path = os.path.join(os.getcwd(), r'weights\sequence\multimodal_multitask_output-level_fusion\multimodal_mt_diff2-1_e06v0.0012')
# else:
#     model_path = os.path.join(os.getcwd(), r'weights\old_dataset\multimodal-multitask\multimodal_mt_diff2-1_e25v0.0008')

if os.path.exists(model_path):
    output_level_fusion_model = load_model(model_path, custom_objects={'rmse': rmse})
    print('Load model successfully!')
    print(output_level_fusion_model.summary())

Load model successfully!
Model: "multimodal_output-level_fusion_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 convlstm_input (InputLayer)    [(None, 2, 224, 224  0           []                               
                                , 1)]                                                             
                                                                                                  
 convlstm_1 (ConvLSTM2D)        (None, 2, 224, 224,  38144       ['convlstm_input[0][0]']         
                                 32)                                                              
                                                                                                  
 lstm_input (InputLayer)        [(None, 2, 2)]       0           []                               
                                      

In [12]:
# evaluation = output_level_fusion_model.evaluate(test_dataset)

# print("=== Á∂ìÁ∑ØÂ∫¶Â∑ÆÔºàdifferenceÔºâË©ï‰º∞ ===")

# print(f'Total Loss: {evaluation[0]:.5f}')         # Á∏ΩÊêçÂ§±

# print(f'Latitude Loss: {evaluation[1]:.5f}')       # Á∑ØÂ∫¶ MSE
# print(f'Longitude Loss: {evaluation[2]:.5f}')      # Á∂ìÂ∫¶ MSE

# print(f'Latitude MSE: {evaluation[3]:.5f}')       # Á∑ØÂ∫¶ MSE
# print(f'Longitude MSE: {evaluation[6]:.5f}')      # Á∂ìÂ∫¶ MSE

# print(f'Latitude RMSE: {evaluation[4]:.5f}')       # Á∑ØÂ∫¶ RMSE
# print(f'Longitude RMSE: {evaluation[7]:.5f}')      # Á∂ìÂ∫¶ RMSE

# print(f'Latitude MAE: {evaluation[5]:.5f}')       # Á∑ØÂ∫¶ MAE
# print(f'Longitude MAE: {evaluation[8]:.5f}')      # Á∂ìÂ∫¶ MAE

# import math

# # ============= Ë®àÁÆóË™§Â∑Æ(ÂÖ¨Èáå) =============
# def calculate_mae_distance(lat_mae: float, lon_mae: float, latitude: float = 25.071182):
#     lat_km = lat_mae * 111
#     lon_km = lon_mae * 111 * math.cos(math.radians(latitude))
#     return math.sqrt(lat_km**2 + lon_km**2)

# # ÁØÑ‰æãÊï∏ÊìöÔºöÁ∂ìÂ∫¶Â∑Æ MAE = 0.02ÔºåÁ∑ØÂ∫¶Â∑Æ MAE = 0.01Ôºå‰∫îÂàÜÂ±±Èõ∑ÈÅîÁ´ôÁöÑÁ∑ØÂ∫¶ = 23.5
# lat_mae = evaluation[5]
# lon_mae = evaluation[8]
# # Âõ∫ÂÆöÁ∑ØÂ∫¶
# # ‰∫îÂàÜÂ±±Èõ∑ÈÅîÁ´ôÁöÑÁ∂ìÁ∑ØÂ∫¶
# center_lat = 25.071182
# center_lon = 121.781205

# mae_distance = calculate_mae_distance(lat_mae, lon_mae, center_lat)
# print(f"Average distance error (via MAE): {mae_distance:.2f} km")

In [13]:
from haversine import haversine

distances = []
lat_errors = []
lon_errors = []

for radar_folder_path, csv_file_path in zip(test_radar_files, test_csv_files):
    df = pd.read_csv(csv_file_path)
    
    # ÂèñÂæóÁ∂ìÁ∑ØÂ∫¶ÁúüÂØ¶ÂÄº
    lats = df['Latitude'].values
    lons = df['Longitude'].values

    # Ë®àÁÆóÂ∑ÆÂÄº (Â∑ÆÂàÜ)
    delta_lat = np.diff(lats)
    delta_lon = np.diff(lons)
    combined_data = np.vstack([delta_lat, delta_lon]).T  # Á∂ìÁ∑ØÂ∫¶Â∑ÆÁï∞Âêà‰Ωµ

    # ÁâπÂæµÁ∏ÆÊîæ
    scaled_data_diff = scaler.transform(combined_data)

    radar_grids = []
    radar_grid_names = sorted(os.listdir(radar_folder_path))
    for radar_grid_name in radar_grid_names:
        radar_grid_path = os.path.join(radar_folder_path, radar_grid_name)
        radar_grid = np.load(radar_grid_path)
        radar_grid_resized = cv2.resize(radar_grid, (height, width))
        radar_grids.append(radar_grid_resized)

    diff_radar_grids = radar_grid_diff_processing(np.array(radar_grids))

    if len(diff_radar_grids) != len(scaled_data_diff):
        with open(f'attention-error_log-{split_data_mode}.txt', 'a') as f:
            f.write(f"Length mismatch: {len(diff_radar_grids)} vs {len(scaled_data_diff)}\n")
            f.write(f"Radar folder: {radar_folder_path}\n")
            f.write(f"CSV file: {csv_file_path}\n")
            f.write(f"======================\n")
        continue

    # Ê∫ñÂÇôÊªëÂãïÁ™óÂè£Ë≥áÊñô
    lstm_inputs = []
    convlstm_inputs = []
    for i in range(len(diff_radar_grids) - window_size):
        lstm_input_sample = [scaled_data_diff[i], scaled_data_diff[i+1]]
        convlstm_input_sample = [diff_radar_grids[i], diff_radar_grids[i+1]]

        lstm_inputs.append(lstm_input_sample)
        convlstm_inputs.append(convlstm_input_sample)


    lstm_inputs = np.array(lstm_inputs)  # shape: (samples, 2, 2)
    convlstm_inputs = np.array(convlstm_inputs)  # shape: (samples, 2, 224, 224, 1)

    # Ê®°ÂûãÈ†êÊ∏¨
    pred_lats_diff, pred_lons_diff = output_level_fusion_model.predict({
        'lstm_input': lstm_inputs,
        'convlstm_input': convlstm_inputs
    })

    # È†êÊ∏¨Â∑ÆÂÄºÈÇÑÂéüÂõûÁ∂ìÁ∑ØÂ∫¶
    pred_lats = []
    pred_lons = []
    for i, (dlat, dlon) in enumerate(zip(pred_lats_diff, pred_lons_diff)):
        base_lat = lats[i + 2]
        base_lon = lons[i + 2]
        pred_lat = base_lat + dlat.item()
        pred_lon = base_lon + dlon.item()
        pred_lats.append(pred_lat)
        pred_lons.append(pred_lon)

    # Ë®àÁÆóÂØ¶ÈöõË™§Â∑Æ
    for i in range(len(pred_lats)):
        real_lat = lats[i + 3]
        real_lon = lons[i + 3]
        pred_lat = pred_lats[i]
        pred_lon = pred_lons[i]

        lat_errors.append(real_lat - pred_lat)
        lon_errors.append(real_lon - pred_lon)
        distances.append(haversine((real_lat, real_lon), (pred_lat, pred_lon)))

# === Ë©ï‰º∞ÊåáÊ®ô ===
lat_errors = np.array(lat_errors)
lon_errors = np.array(lon_errors)
distances = np.array(distances)

# MAE
lat_mae = np.mean(np.abs(lat_errors))
lon_mae = np.mean(np.abs(lon_errors))

# MSE
lat_mse = np.mean(lat_errors ** 2)
lon_mse = np.mean(lon_errors ** 2)

# RMSE
lat_rmse = np.sqrt(lat_mse)
lon_rmse = np.sqrt(lon_mse)

# ÁµêÊûúËº∏Âá∫
print("=== Á∂ìÁ∑ØÂ∫¶‰ΩçÁΩÆË©ï‰º∞ ===")
print(f"Latitude MSE: {lat_mse:.6f}")
print(f"Longitude MSE: {lon_mse:.6f}")
print(f"Latitude MAE: {lat_mae:.6f}")
print(f"Longitude MAE: {lon_mae:.6f}")
print(f"Latitude RMSE: {lat_rmse:.6f}")
print(f"Longitude RMSE: {lon_rmse:.6f}")
print(f"Average Haversine distance: {np.mean(distances):.6f} km")

=== Á∂ìÁ∑ØÂ∫¶‰ΩçÁΩÆË©ï‰º∞ ===
Latitude MSE: 0.000220
Longitude MSE: 0.000312
Latitude MAE: 0.010794
Longitude MAE: 0.013887
Latitude RMSE: 0.014842
Longitude RMSE: 0.017655
Average Haversine distance: 2.048096 km


In [14]:
# ÁµêÊûúËº∏Âá∫
print("=== Á∂ìÁ∑ØÂ∫¶‰ΩçÁΩÆË©ï‰º∞ ===")
print(f"Latitude MSE: {lat_mse:.7f}")
print(f"Longitude MSE: {lon_mse:.7f}")
print(f"Latitude MAE: {lat_mae:.7f}")
print(f"Longitude MAE: {lon_mae:.7f}")
print(f"Latitude RMSE: {lat_rmse:.7f}")
print(f"Longitude RMSE: {lon_rmse:.7f}")
print(f"Average Haversine distance: {np.mean(distances):.7f} km")

=== Á∂ìÁ∑ØÂ∫¶‰ΩçÁΩÆË©ï‰º∞ ===
Latitude MSE: 0.0002203
Longitude MSE: 0.0003117
Latitude MAE: 0.0107945
Longitude MAE: 0.0138870
Latitude RMSE: 0.0148421
Longitude RMSE: 0.0176553
Average Haversine distance: 2.0480959 km
