In [None]:
# 라이브러리 임포트
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [None]:
# LOSS FUNCTION
class DestinationLoss(nn.Module):
    def __init__(self, weight_main=0.99, cog_weight=0.9):
        super().__init__()
        self.weight_main = weight_main
        self.cog_weight = cog_weight
        
    def forward(self, y_pred, y_true):
        # [[]] 꼴로 변환
        y_pred = y_pred.squeeze(1)
        y_true = y_true.squeeze(1)
        
        # 예측 결과 분해
        pred_lat, pred_lon = y_pred[:, 0], y_pred[:, 1]
        true_lat, true_lon = y_true[:, 0], y_true[:, 1]
        pred_sog, pred_cog = y_pred[:, 2], y_pred[:, 3]
        true_sog, true_cog = y_true[:, 2], y_true[:, 3]

        # 위치 오차 (위경도 기준)
        loc_loss = F.mse_loss(pred_lat, true_lat) + F.mse_loss(pred_lon, true_lon)

        # SOG, COG 오차
        motion_loss = (
            (1-self.cog_weight) * F.mse_loss(pred_sog, true_sog) + 
            self.cog_weight * F.mse_loss(pred_cog, true_cog)
        )

        # 최종 Loss: 위치 오차 + SOG, COG 오차
        total_loss = (
            self.weight_main * loc_loss +
            (1 - self.weight_main) * motion_loss
        )
        
        return total_loss

In [None]:
# Train & Validation Pipline
def train_transformer_model(model, train_data, val_data=None, num_epochs=20, batch_size=128, learning_rate=1e-5, device='cpu'):
    model.to(device)
    # VAL LOSS 가장 낮은 모델 저장 경로
    save_path='model_history/best_model.pth'
    best_val_loss = 1
    
    ## train_data를 train / validation data로 분할 --------------------------------
    x_train, y_train = train_data
    x_train_f, x_val, y_train_f, y_val = train_test_split(
        x_train, y_train, test_size=0.1, random_state=42
    )
    
    train_dataset = TensorDataset(x_train_f, y_train_f)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    val_dataset = TensorDataset(x_val, y_val)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    # ----------------------------------------------------------------------------
    
    # Loss & Optimizer -----------------------------------------------------------
    criterion = DestinationLoss(weight_main=0.999)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    # ----------------------------------------------------------------------------
    
    # Learning-rate annealing
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        
        # Train
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            
            # ✅ Noise Injection ---------------------------------------------------------
            # Feature-wise noise 적용
            feature_noise_std = torch.tensor([1e-5, 1e-5, 1e-3, 1e-2, 1e-2], device=batch_x.device)
            noise = torch.randn_like(batch_x) * feature_noise_std
            batch_x_noisy = batch_x + noise
            # ----------------------------------------------------------------------------    
            optimizer.zero_grad()
            outputs = model(batch_x_noisy)  
            loss = criterion(outputs, batch_y)
            loss.backward()
            # gradient exploding 방지용 Cliping
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.1) 
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"[Epoch {epoch+1}/{num_epochs}] Train Loss: {avg_loss:.6f}")

        # Validation 
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for val_x, val_y in val_loader:
                val_x, val_y = val_x.to(device), val_y.to(device)
                val_outputs = model(val_x)
                val_loss = criterion(val_outputs, val_y)
                total_val_loss += val_loss.item()
        
            avg_val_loss = total_val_loss / len(val_loader)
            print(f"           ↳ Val Loss: {avg_val_loss:.6f}")
            
        # Save best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model, save_path)
            print(f"           ↳ ✅ Best model saved (Val Loss: {best_val_loss:.6f})")
            
        scheduler.step()