In [2]:
import json
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import random # For randomized search
import sys # For min_delta

# -------------------- Utility Functions --------------------
def mean_absolute_percentage_error(y_true, y_pred):
    """
    Calculates MAPE, preventing division by zero.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    # Add a small epsilon to prevent division by zero for y_true values that are zero
    epsilon = 1e-10
    return np.mean(np.abs((y_true - y_pred) / (y_true + epsilon))) * 100

# -------------------- Device Setup --------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# -------------------- Step 1: 데이터 로드 및 전처리 (JSON 파일용) --------------------
# 'review.json' 파일을 올바르게 로드합니다.
try:
    # JSON 파일이 한 줄에 하나의 JSON 객체로 되어 있는 경우 (JSONL 형식)
    df = pd.read_json('review.json', lines=True)
    print("✅ JSON 파일 로드 성공.")
except FileNotFoundError:
    print("❌ Error: 'review.json' 파일이 존재하지 않습니다.")
    sys.exit()
except ValueError as e:
    # JSON 파일이 단일 JSON 배열인 경우
    print(f"JSONL 형식이 아닌 것 같습니다. 일반 JSON 파일로 다시 시도합니다. (오류: {e})")
    try:
        df = pd.read_json('review.json')
        print("✅ 일반 JSON 파일 로드 성공.")
    except Exception as e:
        print(f"❌ Error: 'review.json' 파일을 읽는 데 실패했습니다. 파일 형식을 확인해주세요. (오류: {e})")
        sys.exit()

# 필요한 컬럼만 선택하고, 결측치를 제거합니다.
df = df[['user_id', 'business_id', 'stars', 'text']].dropna()

# -------------------- SBERT 모델 로딩 (한 번만) --------------------
print("SBERT 모델 로딩 중...")
sbert_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
print("SBERT 모델 로딩 완료.")

# -------------------- Dataset 클래스 정의 --------------------
class UCAMDataset(Dataset):
    def __init__(self, users, items, ratings, contexts):
        self.users = users
        self.items = items
        self.ratings = ratings
        self.contexts = contexts

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.users[idx], dtype=torch.long),
            torch.tensor(self.items[idx], dtype=torch.long),
            torch.tensor(self.contexts[idx], dtype=torch.float32),
            torch.tensor(self.ratings[idx], dtype=torch.float32)
        )

# -------------------- 모델 정의 --------------------
class UCAM(nn.Module):
    def __init__(self, num_users, num_items, context_dim=384, embed_dim=64, hidden_dims=[128, 64]):
        super().__init__()
        # '알 수 없음' 인덱스를 위해 +1
        self.user_embed = nn.Embedding(num_users + 1, embed_dim)
        self.item_embed = nn.Embedding(num_items + 1, embed_dim)

        # MLP 레이어 동적으로 생성
        layers = []
        input_dim = embed_dim * 2 + context_dim
        for h_dim in hidden_dims:
            layers.append(nn.Linear(input_dim, h_dim))
            layers.append(nn.ReLU())
            input_dim = h_dim
        
        layers.append(nn.Linear(input_dim, 1)) # 최종 출력 레이어
        self.fc_layers = nn.Sequential(*layers)

    def forward(self, user_ids, item_ids, context_vecs):
        u = self.user_embed(user_ids)
        i = self.item_embed(item_ids)
        x = torch.cat([u, i, context_vecs], dim=-1)
        return self.fc_layers(x).squeeze()

# -------------------- 평가 지표 함수 --------------------
def evaluate_model(model, data_loader, device):
    model.eval()
    preds, targets = [], []

    with torch.no_grad():
        for users, items, contexts, ratings in data_loader:
            users = users.to(device)
            items = items.to(device)
            contexts = contexts.to(device)
            ratings = ratings.to(device)

            output = model(users, items, contexts)
            preds.extend(output.cpu().numpy())
            targets.extend(ratings.cpu().numpy())

    preds = np.array(preds)
    targets = np.array(targets)

    mae = mean_absolute_error(targets, preds)
    mse = mean_squared_error(targets, preds)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(targets, preds)

    return mae, mse, rmse, mape

# -------------------- Hyperparameter Search Setup --------------------
param_grid = {
    'embed_dim': [32, 64, 128], # Embedding dimension for users and items
    'hidden_dims': [[64, 32], [128, 64], [256, 128]], # Hidden layers for MLP
    'learning_rate': [0.0005, 0.001, 0.002],
    'batch_size': [128, 256, 512],
    'patience': [5, 7, 10] # Early stopping patience
}

num_trials = 10 # Number of random combinations to try
best_params = None
best_rmse = float('inf')
results_log = [] # To store results of each trial
min_delta = 1e-4 # Minimum change to be considered an improvement

# Fixed random state for data splitting. This ensures that
# all hyperparameter trials use the exact same train/val/test splits,
# making the comparison of hyperparameters fair.
DATA_SPLIT_RANDOM_STATE = 42

print(f"\n--- Starting Hyperparameter Search with {num_trials} trials ---")
print(f"Data Split Random State: {DATA_SPLIT_RANDOM_STATE}")

# Perform data splitting once
train_val_df, test_df = train_test_split(df, test_size=0.2, random_state=DATA_SPLIT_RANDOM_STATE)

# 훈련 세트의 고유한 사용자/아이템 ID를 기반으로 인덱스를 생성합니다.
user2idx = {uid: i for i, uid in enumerate(train_val_df['user_id'].unique())}
item2idx = {iid: i for i, iid in enumerate(train_val_df['business_id'].unique())}

# 테스트 세트에만 존재하는 사용자/아이템을 위한 '알 수 없음' 인덱스를 추가합니다.
# 이 인덱스는 train_val_df에 없는 ID를 처리합니다.
unknown_user_idx = len(user2idx)
unknown_item_idx = len(item2idx)

# SBERT 문맥 벡터 생성 (전체 데이터에서 한 번만)
print("SBERT 문맥 벡터 생성 중 (전체 데이터)...")
# Note: For efficiency, we encode the full text column and then split
# This avoids re-encoding the same texts if they appear in different splits.
all_texts = df['text'].tolist()
all_context_vectors = sbert_model.encode(all_texts, show_progress_bar=True)
df['context_vectors'] = list(all_context_vectors) # Add as a new column

# Split dataframes with context vectors
train_val_df_with_contexts = df.loc[train_val_df.index]
test_df_with_contexts = df.loc[test_df.index]

# Map user/item IDs to indices for all splits
train_val_df_with_contexts['user_idx'] = train_val_df_with_contexts['user_id'].map(user2idx)
train_val_df_with_contexts['item_idx'] = train_val_df_with_contexts['business_id'].map(item2idx)
test_df_with_contexts['user_idx'] = test_df_with_contexts['user_id'].map(user2idx).fillna(unknown_user_idx).astype(int)
test_df_with_contexts['item_idx'] = test_df_with_contexts['business_id'].map(item2idx).fillna(unknown_item_idx).astype(int)

# Further split train_val into train and validation for early stopping
train_df, val_df = train_test_split(train_val_df_with_contexts, test_size=0.125, random_state=DATA_SPLIT_RANDOM_STATE) # 0.125 * 0.8 = 0.1 (10%)

print(f"Fixed Data Split: Train={len(train_df)}, Val={len(val_df)}, Test={len(test_df_with_contexts)}")

epochs = 50 # Maximum epochs for each trial
context_dim = sbert_model.get_sentence_embedding_dimension() # Get SBERT embedding dimension

# --- Hyperparameter Search Loop ---
for trial_num in range(num_trials):
    print(f"\n==================== Trial {trial_num + 1}/{num_trials} ====================")

    # Randomly select hyperparameters for the current trial
    current_params = {k: random.choice(v) for k, v in param_grid.items()}
    
    # Ensure embed_dim is not too small relative to context_dim or hidden_dims to avoid issues
    # This is a soft check; adjust ranges in param_grid for robust solutions
    if current_params['embed_dim'] * 2 + context_dim < current_params['hidden_dims'][0]:
        print(f"Warning: Initial MLP input dim ({current_params['embed_dim'] * 2 + context_dim}) is smaller than first hidden layer ({current_params['hidden_dims'][0]}). This might not be optimal. Skipping this configuration.")
        # Optionally skip this trial or adjust hidden_dims to be smaller
        continue
    
    print(f"Current Parameters: {current_params}")

    # Create DataLoaders for the current trial
    train_dataset = UCAMDataset(
        train_df['user_idx'].values,
        train_df['item_idx'].values,
        train_df['stars'].values.astype(np.float32),
        np.stack(train_df['context_vectors'].values) # Convert list of arrays to 2D array
    )
    val_dataset = UCAMDataset(
        val_df['user_idx'].values,
        val_df['item_idx'].values,
        val_df['stars'].values.astype(np.float32),
        np.stack(val_df['context_vectors'].values)
    )
    # 🚨 수정된 부분: test_df 대신 test_df_with_contexts 사용
    test_dataset = UCAMDataset(
        test_df_with_contexts['user_idx'].values,
        test_df_with_contexts['item_idx'].values,
        test_df_with_contexts['stars'].values.astype(np.float32),
        np.stack(test_df_with_contexts['context_vectors'].values)
    )

    train_loader = DataLoader(train_dataset, batch_size=current_params['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=current_params['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=current_params['batch_size'], shuffle=False)

    # Model, Loss Function, and Optimizer are re-initialized for each trial
    model = UCAM(num_users=len(user2idx), num_items=len(item2idx),
                 context_dim=context_dim,
                 embed_dim=current_params['embed_dim'],
                 hidden_dims=current_params['hidden_dims']).to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=current_params['learning_rate'])
    criterion = nn.MSELoss()
    
    # Path to save the best model for this *specific trial*
    trial_model_save_path = f'temp_ucam_model_trial_{trial_num+1}.pt'

    trial_best_val_rmse = float('inf')
    epochs_no_improve = 0
    patience = current_params['patience'] # Use patience from current_params

    # --- Training Loop for current trial ---
    print("모델 학습 시작...")
    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        train_bar = tqdm(train_loader, desc=f"[Trial {trial_num+1}, Epoch {epoch+1}] Training", leave=False)

        for user_ids, item_ids, context_vectors, stars in train_bar:
            user_ids = user_ids.to(device)
            item_ids = item_ids.to(device)
            context_vectors = context_vectors.to(device)
            stars = stars.to(device)

            optimizer.zero_grad()
            predictions = model(user_ids, item_ids, context_vectors)
            loss = criterion(predictions, stars)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
            train_bar.set_postfix(loss=loss.item())
            
        # --- Validation after each epoch ---
        model.eval()
        val_mae, val_mse, val_rmse, val_mape = evaluate_model(model, val_loader, device)

        print(f"Epoch {epoch+1} | Train Loss: {total_train_loss / len(train_loader):.4f} | "
              f"Val RMSE: {val_rmse:.4f}, MAE: {val_mae:.4f}, MAPE: {val_mape:.2f}%")

        # Early stopping logic for this trial
        if val_rmse < trial_best_val_rmse - min_delta:
            trial_best_val_rmse = val_rmse
            epochs_no_improve = 0
            torch.save(model.state_dict(), trial_model_save_path)
            # print(f"    --> Improved. Model saved (RMSE: {trial_best_val_rmse:.4f})") # Optional: detailed logging
        else:
            epochs_no_improve += 1
            # print(f"    --> No improvement. ({epochs_no_improve}/{patience})")
            if epochs_no_improve == patience:
                print("Early stopping triggered for this trial.")
                break
    
    # --- Evaluate the best model from this trial on the Test Set ---
    if os.path.exists(trial_model_save_path):
        model.load_state_dict(torch.load(trial_model_save_path))
        print(f"\nLoaded best model for trial {trial_num+1} from {trial_model_save_path}")
        test_mae, test_mse, test_rmse, test_mape = evaluate_model(model, test_loader, device)
    else:
        print(f"\nWarning: Best model for trial {trial_num+1} not found. Testing with last model state.")
        test_mae, test_mse, test_rmse, test_mape = evaluate_model(model, test_loader, device)

    print(f"\n✅ Trial {trial_num+1} Test Results:")
    print(f" - MSE  : {test_mse:.4f}")
    print(f" - RMSE : {test_rmse:.4f}")
    print(f" - MAE  : {test_mae:.4f}")
    print(f" - MAPE : {test_mape:.2f}%")

    # Store results for this trial
    trial_results = {
        'trial_num': trial_num + 1,
        'parameters': current_params,
        'test_mse': test_mse,
        'test_rmse': test_rmse,
        'test_mae': test_mae,
        'test_mape': test_mape
    }
    results_log.append(trial_results)

    # Check if this trial yielded the overall best RMSE
    if test_rmse < best_rmse:
        best_rmse = test_rmse
        best_params = current_params
        # Save the overall best model
        torch.save(model.state_dict(), 'best_overall_ucam_model.pt')
        print(f"  --> New overall best RMSE found: {best_rmse:.4f} with params: {best_params}")

    # Clean up the temporary model file for this trial
    if os.path.exists(trial_model_save_path):
        os.remove(trial_model_save_path)

# -------------------- Final Results Output --------------------
print(f"\n--- Hyperparameter Search Completed ---")
print(f"Overall Best RMSE found: {best_rmse:.4f}")
print(f"Optimal Parameters: {best_params}")


Using device: cuda
✅ JSON 파일 로드 성공.
SBERT 모델 로딩 중...
SBERT 모델 로딩 완료.

--- Starting Hyperparameter Search with 10 trials ---
Data Split Random State: 42
SBERT 문맥 벡터 생성 중 (전체 데이터)...


Batches: 100%|██████████| 13994/13994 [06:23<00:00, 36.44it/s] 


Fixed Data Split: Train=313456, Val=44780, Test=89560

Current Parameters: {'embed_dim': 128, 'hidden_dims': [256, 128], 'learning_rate': 0.001, 'batch_size': 128, 'patience': 10}
모델 학습 시작...


                                                                                             

Epoch 1 | Train Loss: 0.8242 | Val RMSE: 0.8432, MAE: 0.6711, MAPE: 24.08%


                                                                                             

Epoch 2 | Train Loss: 0.6458 | Val RMSE: 0.8237, MAE: 0.6498, MAPE: 23.92%


                                                                                             

Epoch 3 | Train Loss: 0.5841 | Val RMSE: 0.8218, MAE: 0.6433, MAPE: 23.97%


                                                                                             

Epoch 4 | Train Loss: 0.5274 | Val RMSE: 0.8129, MAE: 0.6331, MAPE: 22.95%


                                                                                             

Epoch 5 | Train Loss: 0.4593 | Val RMSE: 0.8160, MAE: 0.6302, MAPE: 22.95%


                                                                                             

Epoch 6 | Train Loss: 0.4035 | Val RMSE: 0.8126, MAE: 0.6252, MAPE: 22.86%


                                                                                             

Epoch 7 | Train Loss: 0.3513 | Val RMSE: 0.8198, MAE: 0.6332, MAPE: 22.53%


                                                                                             

Epoch 8 | Train Loss: 0.3037 | Val RMSE: 0.8383, MAE: 0.6416, MAPE: 23.69%


                                                                                             

Epoch 9 | Train Loss: 0.2602 | Val RMSE: 0.8433, MAE: 0.6494, MAPE: 23.01%


                                                                                              

Epoch 10 | Train Loss: 0.2229 | Val RMSE: 0.8543, MAE: 0.6540, MAPE: 23.42%


                                                                                              

Epoch 11 | Train Loss: 0.1914 | Val RMSE: 0.8685, MAE: 0.6638, MAPE: 23.76%


                                                                                              

Epoch 12 | Train Loss: 0.1648 | Val RMSE: 0.8800, MAE: 0.6713, MAPE: 23.92%


                                                                                               

Epoch 13 | Train Loss: 0.1436 | Val RMSE: 0.8769, MAE: 0.6691, MAPE: 24.28%


                                                                                               

Epoch 14 | Train Loss: 0.1256 | Val RMSE: 0.8846, MAE: 0.6753, MAPE: 24.21%


                                                                                               

Epoch 15 | Train Loss: 0.1110 | Val RMSE: 0.8884, MAE: 0.6779, MAPE: 24.20%


                                                                                               

Epoch 16 | Train Loss: 0.0986 | Val RMSE: 0.8921, MAE: 0.6815, MAPE: 24.51%
Early stopping triggered for this trial.

Loaded best model for trial 1 from temp_ucam_model_trial_1.pt

✅ Trial 1 Test Results:
 - MSE  : 0.6371
 - RMSE : 0.7982
 - MAE  : 0.6161
 - MAPE : 22.44%
  --> New overall best RMSE found: 0.7982 with params: {'embed_dim': 128, 'hidden_dims': [256, 128], 'learning_rate': 0.001, 'batch_size': 128, 'patience': 10}

Current Parameters: {'embed_dim': 32, 'hidden_dims': [256, 128], 'learning_rate': 0.002, 'batch_size': 512, 'patience': 5}
모델 학습 시작...


                                                                                          

Epoch 1 | Train Loss: 0.9355 | Val RMSE: 0.8392, MAE: 0.6643, MAPE: 24.39%


                                                                                          

Epoch 2 | Train Loss: 0.6551 | Val RMSE: 0.8198, MAE: 0.6429, MAPE: 24.18%


                                                                                          

Epoch 3 | Train Loss: 0.6050 | Val RMSE: 0.8089, MAE: 0.6381, MAPE: 22.94%


                                                                                          

Epoch 4 | Train Loss: 0.5648 | Val RMSE: 0.7996, MAE: 0.6269, MAPE: 22.99%


                                                                                          

Epoch 5 | Train Loss: 0.5326 | Val RMSE: 0.8042, MAE: 0.6295, MAPE: 22.80%


                                                                                          

Epoch 6 | Train Loss: 0.5062 | Val RMSE: 0.8094, MAE: 0.6304, MAPE: 23.09%


                                                                                          

Epoch 7 | Train Loss: 0.4663 | Val RMSE: 0.8057, MAE: 0.6207, MAPE: 22.90%


                                                                                          

Epoch 8 | Train Loss: 0.4285 | Val RMSE: 0.7909, MAE: 0.6086, MAPE: 22.17%


                                                                                          

Epoch 9 | Train Loss: 0.3973 | Val RMSE: 0.7986, MAE: 0.6131, MAPE: 22.30%


                                                                                           

Epoch 10 | Train Loss: 0.3686 | Val RMSE: 0.8036, MAE: 0.6120, MAPE: 22.28%


                                                                                           

Epoch 11 | Train Loss: 0.3420 | Val RMSE: 0.8124, MAE: 0.6172, MAPE: 22.68%


                                                                                           

Epoch 12 | Train Loss: 0.3160 | Val RMSE: 0.8326, MAE: 0.6406, MAPE: 21.86%


                                                                                            

Epoch 13 | Train Loss: 0.2913 | Val RMSE: 0.8348, MAE: 0.6298, MAPE: 23.38%
Early stopping triggered for this trial.

Loaded best model for trial 2 from temp_ucam_model_trial_2.pt

✅ Trial 2 Test Results:
 - MSE  : 0.6080
 - RMSE : 0.7798
 - MAE  : 0.6020
 - MAPE : 21.82%
  --> New overall best RMSE found: 0.7798 with params: {'embed_dim': 32, 'hidden_dims': [256, 128], 'learning_rate': 0.002, 'batch_size': 512, 'patience': 5}

Current Parameters: {'embed_dim': 128, 'hidden_dims': [128, 64], 'learning_rate': 0.002, 'batch_size': 256, 'patience': 5}
모델 학습 시작...


                                                                                             

Epoch 1 | Train Loss: 0.8791 | Val RMSE: 0.8420, MAE: 0.6682, MAPE: 24.50%


                                                                                             

Epoch 2 | Train Loss: 0.6380 | Val RMSE: 0.8292, MAE: 0.6601, MAPE: 23.38%


                                                                                             

Epoch 3 | Train Loss: 0.5649 | Val RMSE: 0.8276, MAE: 0.6480, MAPE: 24.25%


                                                                                             

Epoch 4 | Train Loss: 0.5058 | Val RMSE: 0.8286, MAE: 0.6529, MAPE: 23.48%


                                                                                             

Epoch 5 | Train Loss: 0.4478 | Val RMSE: 0.8377, MAE: 0.6560, MAPE: 23.91%


                                                                                             

Epoch 6 | Train Loss: 0.3857 | Val RMSE: 0.8513, MAE: 0.6645, MAPE: 24.08%


                                                                                             

Epoch 7 | Train Loss: 0.3236 | Val RMSE: 0.8539, MAE: 0.6665, MAPE: 24.01%


                                                                                             

Epoch 8 | Train Loss: 0.2651 | Val RMSE: 0.8794, MAE: 0.6800, MAPE: 25.25%
Early stopping triggered for this trial.

Loaded best model for trial 3 from temp_ucam_model_trial_3.pt

✅ Trial 3 Test Results:
 - MSE  : 0.6682
 - RMSE : 0.8174
 - MAE  : 0.6421
 - MAPE : 23.87%

Current Parameters: {'embed_dim': 64, 'hidden_dims': [128, 64], 'learning_rate': 0.002, 'batch_size': 256, 'patience': 10}
모델 학습 시작...


                                                                                             

Epoch 1 | Train Loss: 0.8744 | Val RMSE: 0.8411, MAE: 0.6692, MAPE: 23.90%


                                                                                             

Epoch 2 | Train Loss: 0.6413 | Val RMSE: 0.8167, MAE: 0.6430, MAPE: 23.64%


                                                                                             

Epoch 3 | Train Loss: 0.5815 | Val RMSE: 0.8174, MAE: 0.6395, MAPE: 23.91%


                                                                                             

Epoch 4 | Train Loss: 0.5324 | Val RMSE: 0.8197, MAE: 0.6478, MAPE: 22.90%


                                                                                             

Epoch 5 | Train Loss: 0.4861 | Val RMSE: 0.8171, MAE: 0.6346, MAPE: 23.15%


                                                                                             

Epoch 6 | Train Loss: 0.4245 | Val RMSE: 0.8133, MAE: 0.6283, MAPE: 23.01%


                                                                                             

Epoch 7 | Train Loss: 0.3711 | Val RMSE: 0.8190, MAE: 0.6342, MAPE: 22.49%


                                                                                             

Epoch 8 | Train Loss: 0.3232 | Val RMSE: 0.8285, MAE: 0.6369, MAPE: 22.73%


                                                                                             

Epoch 9 | Train Loss: 0.2814 | Val RMSE: 0.8466, MAE: 0.6472, MAPE: 23.78%


                                                                                              

Epoch 10 | Train Loss: 0.2456 | Val RMSE: 0.8522, MAE: 0.6535, MAPE: 23.40%


                                                                                              

Epoch 11 | Train Loss: 0.2136 | Val RMSE: 0.8622, MAE: 0.6558, MAPE: 24.31%


                                                                                              

Epoch 12 | Train Loss: 0.1877 | Val RMSE: 0.8698, MAE: 0.6647, MAPE: 23.95%


                                                                                              

Epoch 13 | Train Loss: 0.1666 | Val RMSE: 0.8792, MAE: 0.6688, MAPE: 24.83%


                                                                                              

Epoch 14 | Train Loss: 0.1477 | Val RMSE: 0.8856, MAE: 0.6755, MAPE: 24.18%


                                                                                               

Epoch 15 | Train Loss: 0.1325 | Val RMSE: 0.8895, MAE: 0.6793, MAPE: 24.57%


                                                                                               

Epoch 16 | Train Loss: 0.1205 | Val RMSE: 0.8983, MAE: 0.6855, MAPE: 24.53%
Early stopping triggered for this trial.

Loaded best model for trial 4 from temp_ucam_model_trial_4.pt

✅ Trial 4 Test Results:
 - MSE  : 0.6458
 - RMSE : 0.8036
 - MAE  : 0.6224
 - MAPE : 22.68%

Current Parameters: {'embed_dim': 128, 'hidden_dims': [64, 32], 'learning_rate': 0.0005, 'batch_size': 256, 'patience': 10}
모델 학습 시작...


                                                                                             

Epoch 1 | Train Loss: 1.3791 | Val RMSE: 0.8581, MAE: 0.6811, MAPE: 25.50%


                                                                                             

Epoch 2 | Train Loss: 0.6750 | Val RMSE: 0.8209, MAE: 0.6443, MAPE: 23.37%


                                                                                             

Epoch 3 | Train Loss: 0.6028 | Val RMSE: 0.8053, MAE: 0.6301, MAPE: 22.54%


                                                                                             

Epoch 4 | Train Loss: 0.5590 | Val RMSE: 0.7958, MAE: 0.6211, MAPE: 22.21%


                                                                                             

Epoch 5 | Train Loss: 0.5249 | Val RMSE: 0.7941, MAE: 0.6155, MAPE: 22.45%


                                                                                             

Epoch 6 | Train Loss: 0.4952 | Val RMSE: 0.7918, MAE: 0.6139, MAPE: 21.99%


                                                                                             

Epoch 7 | Train Loss: 0.4675 | Val RMSE: 0.7944, MAE: 0.6142, MAPE: 22.11%


                                                                                             

Epoch 8 | Train Loss: 0.4410 | Val RMSE: 0.7944, MAE: 0.6159, MAPE: 21.88%


                                                                                             

Epoch 9 | Train Loss: 0.4145 | Val RMSE: 0.7974, MAE: 0.6169, MAPE: 21.78%


                                                                                              

Epoch 10 | Train Loss: 0.3893 | Val RMSE: 0.8016, MAE: 0.6199, MAPE: 22.07%


                                                                                              

Epoch 11 | Train Loss: 0.3653 | Val RMSE: 0.8077, MAE: 0.6266, MAPE: 21.80%


                                                                                              

Epoch 12 | Train Loss: 0.3423 | Val RMSE: 0.8129, MAE: 0.6250, MAPE: 22.55%


                                                                                              

Epoch 13 | Train Loss: 0.3198 | Val RMSE: 0.8194, MAE: 0.6311, MAPE: 22.51%


                                                                                              

Epoch 14 | Train Loss: 0.2991 | Val RMSE: 0.8247, MAE: 0.6336, MAPE: 22.73%


                                                                                              

Epoch 15 | Train Loss: 0.2789 | Val RMSE: 0.8305, MAE: 0.6390, MAPE: 22.66%


                                                                                              

Epoch 16 | Train Loss: 0.2603 | Val RMSE: 0.8401, MAE: 0.6450, MAPE: 22.90%
Early stopping triggered for this trial.

Loaded best model for trial 5 from temp_ucam_model_trial_5.pt

✅ Trial 5 Test Results:
 - MSE  : 0.6111
 - RMSE : 0.7818
 - MAE  : 0.6073
 - MAPE : 21.73%

Current Parameters: {'embed_dim': 128, 'hidden_dims': [256, 128], 'learning_rate': 0.002, 'batch_size': 512, 'patience': 7}
모델 학습 시작...


                                                                                           

Epoch 1 | Train Loss: 0.9172 | Val RMSE: 0.8448, MAE: 0.6699, MAPE: 24.93%


                                                                                          

Epoch 2 | Train Loss: 0.6433 | Val RMSE: 0.8283, MAE: 0.6540, MAPE: 24.12%


                                                                                          

Epoch 3 | Train Loss: 0.5803 | Val RMSE: 0.8220, MAE: 0.6491, MAPE: 23.58%


                                                                                          

Epoch 4 | Train Loss: 0.5279 | Val RMSE: 0.8247, MAE: 0.6486, MAPE: 23.82%


                                                                                          

Epoch 5 | Train Loss: 0.4809 | Val RMSE: 0.8343, MAE: 0.6571, MAPE: 23.63%


                                                                                          

Epoch 6 | Train Loss: 0.4359 | Val RMSE: 0.8406, MAE: 0.6628, MAPE: 23.63%


                                                                                          

Epoch 7 | Train Loss: 0.3856 | Val RMSE: 0.8731, MAE: 0.6787, MAPE: 25.50%


                                                                                          

Epoch 8 | Train Loss: 0.3278 | Val RMSE: 0.8540, MAE: 0.6660, MAPE: 23.43%


                                                                                          

Epoch 9 | Train Loss: 0.2709 | Val RMSE: 0.8647, MAE: 0.6734, MAPE: 23.47%


                                                                                           

Epoch 10 | Train Loss: 0.2272 | Val RMSE: 0.8698, MAE: 0.6713, MAPE: 24.36%
Early stopping triggered for this trial.

Loaded best model for trial 6 from temp_ucam_model_trial_6.pt

✅ Trial 6 Test Results:
 - MSE  : 0.6603
 - RMSE : 0.8126
 - MAE  : 0.6434
 - MAPE : 23.22%

Current Parameters: {'embed_dim': 32, 'hidden_dims': [256, 128], 'learning_rate': 0.001, 'batch_size': 512, 'patience': 7}
모델 학습 시작...


                                                                                          

Epoch 1 | Train Loss: 1.0367 | Val RMSE: 0.8488, MAE: 0.6716, MAPE: 25.00%


                                                                                           

Epoch 2 | Train Loss: 0.6789 | Val RMSE: 0.8269, MAE: 0.6518, MAPE: 24.20%


                                                                                           

Epoch 3 | Train Loss: 0.6401 | Val RMSE: 0.8175, MAE: 0.6476, MAPE: 23.31%


                                                                                           

Epoch 4 | Train Loss: 0.6062 | Val RMSE: 0.8078, MAE: 0.6378, MAPE: 22.95%


                                                                                           

Epoch 5 | Train Loss: 0.5684 | Val RMSE: 0.7966, MAE: 0.6183, MAPE: 22.80%


                                                                                          

Epoch 6 | Train Loss: 0.5223 | Val RMSE: 0.7810, MAE: 0.6042, MAPE: 21.48%


                                                                                          

Epoch 7 | Train Loss: 0.4877 | Val RMSE: 0.7782, MAE: 0.6010, MAPE: 21.36%


                                                                                           

Epoch 8 | Train Loss: 0.4633 | Val RMSE: 0.7784, MAE: 0.5967, MAPE: 21.77%


                                                                                           

Epoch 9 | Train Loss: 0.4401 | Val RMSE: 0.7763, MAE: 0.5960, MAPE: 21.46%


                                                                                           

Epoch 10 | Train Loss: 0.4217 | Val RMSE: 0.7755, MAE: 0.5947, MAPE: 21.37%


                                                                                            

Epoch 11 | Train Loss: 0.4032 | Val RMSE: 0.7828, MAE: 0.5960, MAPE: 21.78%


                                                                                            

Epoch 12 | Train Loss: 0.3859 | Val RMSE: 0.7887, MAE: 0.6060, MAPE: 20.82%


                                                                                            

Epoch 13 | Train Loss: 0.3678 | Val RMSE: 0.7861, MAE: 0.5969, MAPE: 21.07%


                                                                                            

Epoch 14 | Train Loss: 0.3509 | Val RMSE: 0.7897, MAE: 0.5975, MAPE: 21.21%


                                                                                           

Epoch 15 | Train Loss: 0.3350 | Val RMSE: 0.7922, MAE: 0.5976, MAPE: 21.68%


                                                                                           

Epoch 16 | Train Loss: 0.3192 | Val RMSE: 0.7971, MAE: 0.6043, MAPE: 21.07%


                                                                                           

Epoch 17 | Train Loss: 0.3038 | Val RMSE: 0.7988, MAE: 0.6027, MAPE: 21.45%
Early stopping triggered for this trial.

Loaded best model for trial 7 from temp_ucam_model_trial_7.pt

✅ Trial 7 Test Results:
 - MSE  : 0.5824
 - RMSE : 0.7631
 - MAE  : 0.5880
 - MAPE : 21.03%
  --> New overall best RMSE found: 0.7631 with params: {'embed_dim': 32, 'hidden_dims': [256, 128], 'learning_rate': 0.001, 'batch_size': 512, 'patience': 7}

Current Parameters: {'embed_dim': 32, 'hidden_dims': [64, 32], 'learning_rate': 0.0005, 'batch_size': 128, 'patience': 7}
모델 학습 시작...


                                                                                             

Epoch 1 | Train Loss: 1.1320 | Val RMSE: 0.8400, MAE: 0.6612, MAPE: 24.45%


                                                                                             

Epoch 2 | Train Loss: 0.6518 | Val RMSE: 0.8033, MAE: 0.6297, MAPE: 22.47%


                                                                                             

Epoch 3 | Train Loss: 0.6050 | Val RMSE: 0.7899, MAE: 0.6164, MAPE: 22.05%


                                                                                             

Epoch 4 | Train Loss: 0.5718 | Val RMSE: 0.7795, MAE: 0.6052, MAPE: 21.89%


                                                                                             

Epoch 5 | Train Loss: 0.5422 | Val RMSE: 0.7713, MAE: 0.5961, MAPE: 21.45%


                                                                                             

Epoch 6 | Train Loss: 0.5173 | Val RMSE: 0.7663, MAE: 0.5887, MAPE: 21.54%


                                                                                             

Epoch 7 | Train Loss: 0.4957 | Val RMSE: 0.7663, MAE: 0.5898, MAPE: 21.08%


                                                                                             

Epoch 8 | Train Loss: 0.4755 | Val RMSE: 0.7676, MAE: 0.5934, MAPE: 21.21%


                                                                                             

Epoch 9 | Train Loss: 0.4578 | Val RMSE: 0.7663, MAE: 0.5928, MAPE: 20.93%


                                                                                              

Epoch 10 | Train Loss: 0.4405 | Val RMSE: 0.7657, MAE: 0.5882, MAPE: 21.11%


                                                                                              

Epoch 11 | Train Loss: 0.4241 | Val RMSE: 0.7705, MAE: 0.5928, MAPE: 20.95%


                                                                                              

Epoch 12 | Train Loss: 0.4092 | Val RMSE: 0.7740, MAE: 0.5946, MAPE: 21.14%


                                                                                              

Epoch 13 | Train Loss: 0.3944 | Val RMSE: 0.7776, MAE: 0.5926, MAPE: 21.40%


                                                                                              

Epoch 14 | Train Loss: 0.3805 | Val RMSE: 0.7826, MAE: 0.5940, MAPE: 21.26%


                                                                                              

Epoch 15 | Train Loss: 0.3673 | Val RMSE: 0.7861, MAE: 0.5964, MAPE: 21.47%


                                                                                              

Epoch 16 | Train Loss: 0.3543 | Val RMSE: 0.7912, MAE: 0.5994, MAPE: 21.70%


                                                                                              

Epoch 17 | Train Loss: 0.3417 | Val RMSE: 0.7959, MAE: 0.6011, MAPE: 21.95%
Early stopping triggered for this trial.

Loaded best model for trial 8 from temp_ucam_model_trial_8.pt

✅ Trial 8 Test Results:
 - MSE  : 0.5692
 - RMSE : 0.7545
 - MAE  : 0.5827
 - MAPE : 20.83%
  --> New overall best RMSE found: 0.7545 with params: {'embed_dim': 32, 'hidden_dims': [64, 32], 'learning_rate': 0.0005, 'batch_size': 128, 'patience': 7}

Current Parameters: {'embed_dim': 32, 'hidden_dims': [64, 32], 'learning_rate': 0.001, 'batch_size': 512, 'patience': 10}
모델 학습 시작...


                                                                                          

Epoch 1 | Train Loss: 1.4086 | Val RMSE: 0.8621, MAE: 0.6810, MAPE: 25.56%


                                                                                          

Epoch 2 | Train Loss: 0.6975 | Val RMSE: 0.8325, MAE: 0.6578, MAPE: 24.20%


                                                                                          

Epoch 3 | Train Loss: 0.6526 | Val RMSE: 0.8127, MAE: 0.6396, MAPE: 22.59%


                                                                                           

Epoch 4 | Train Loss: 0.5932 | Val RMSE: 0.7906, MAE: 0.6134, MAPE: 22.20%


                                                                                           

Epoch 5 | Train Loss: 0.5544 | Val RMSE: 0.7809, MAE: 0.6053, MAPE: 21.99%


                                                                                           

Epoch 6 | Train Loss: 0.5258 | Val RMSE: 0.7780, MAE: 0.6026, MAPE: 21.51%


                                                                                           

Epoch 7 | Train Loss: 0.5005 | Val RMSE: 0.7720, MAE: 0.5966, MAPE: 21.41%


                                                                                          

Epoch 8 | Train Loss: 0.4795 | Val RMSE: 0.7731, MAE: 0.5983, MAPE: 21.17%


                                                                                          

Epoch 9 | Train Loss: 0.4591 | Val RMSE: 0.7742, MAE: 0.5960, MAPE: 21.48%


                                                                                           

Epoch 10 | Train Loss: 0.4421 | Val RMSE: 0.7769, MAE: 0.5959, MAPE: 21.64%


                                                                                            

Epoch 11 | Train Loss: 0.4257 | Val RMSE: 0.7788, MAE: 0.5993, MAPE: 21.39%


                                                                                            

Epoch 12 | Train Loss: 0.4087 | Val RMSE: 0.7815, MAE: 0.6001, MAPE: 21.57%


                                                                                           

Epoch 13 | Train Loss: 0.3935 | Val RMSE: 0.7832, MAE: 0.6020, MAPE: 21.60%


                                                                                           

Epoch 14 | Train Loss: 0.3787 | Val RMSE: 0.7868, MAE: 0.6061, MAPE: 21.53%


                                                                                           

Epoch 15 | Train Loss: 0.3641 | Val RMSE: 0.7934, MAE: 0.6091, MAPE: 21.79%


                                                                                           

Epoch 16 | Train Loss: 0.3491 | Val RMSE: 0.7966, MAE: 0.6113, MAPE: 22.03%


                                                                                           

Epoch 17 | Train Loss: 0.3357 | Val RMSE: 0.8015, MAE: 0.6132, MAPE: 22.27%
Early stopping triggered for this trial.

Loaded best model for trial 9 from temp_ucam_model_trial_9.pt

✅ Trial 9 Test Results:
 - MSE  : 0.5794
 - RMSE : 0.7612
 - MAE  : 0.5913
 - MAPE : 21.11%

Current Parameters: {'embed_dim': 32, 'hidden_dims': [64, 32], 'learning_rate': 0.0005, 'batch_size': 512, 'patience': 7}
모델 학습 시작...


                                                                                           

Epoch 1 | Train Loss: 2.1069 | Val RMSE: 0.8924, MAE: 0.7113, MAPE: 26.06%


                                                                                           

Epoch 2 | Train Loss: 0.7418 | Val RMSE: 0.8511, MAE: 0.6756, MAPE: 24.59%


                                                                                           

Epoch 3 | Train Loss: 0.6905 | Val RMSE: 0.8360, MAE: 0.6623, MAPE: 24.04%


                                                                                           

Epoch 4 | Train Loss: 0.6625 | Val RMSE: 0.8247, MAE: 0.6497, MAPE: 23.77%


                                                                                           

Epoch 5 | Train Loss: 0.6265 | Val RMSE: 0.8042, MAE: 0.6272, MAPE: 22.77%


                                                                                           

Epoch 6 | Train Loss: 0.5917 | Val RMSE: 0.7951, MAE: 0.6179, MAPE: 22.48%


                                                                                            

Epoch 7 | Train Loss: 0.5671 | Val RMSE: 0.7864, MAE: 0.6091, MAPE: 22.28%


                                                                                            

Epoch 8 | Train Loss: 0.5469 | Val RMSE: 0.7802, MAE: 0.6062, MAPE: 21.69%


                                                                                           

Epoch 9 | Train Loss: 0.5296 | Val RMSE: 0.7772, MAE: 0.6002, MAPE: 21.83%


                                                                                             

Epoch 10 | Train Loss: 0.5139 | Val RMSE: 0.7784, MAE: 0.5993, MAPE: 22.20%


                                                                                            

Epoch 11 | Train Loss: 0.4990 | Val RMSE: 0.7729, MAE: 0.5963, MAPE: 21.58%


                                                                                            

Epoch 12 | Train Loss: 0.4856 | Val RMSE: 0.7713, MAE: 0.5943, MAPE: 21.31%


                                                                                            

Epoch 13 | Train Loss: 0.4728 | Val RMSE: 0.7693, MAE: 0.5918, MAPE: 21.27%


                                                                                            

Epoch 14 | Train Loss: 0.4604 | Val RMSE: 0.7701, MAE: 0.5915, MAPE: 21.20%


                                                                                            

Epoch 15 | Train Loss: 0.4487 | Val RMSE: 0.7732, MAE: 0.5910, MAPE: 21.59%


                                                                                            

Epoch 16 | Train Loss: 0.4373 | Val RMSE: 0.7804, MAE: 0.5955, MAPE: 22.18%


                                                                                            

Epoch 17 | Train Loss: 0.4277 | Val RMSE: 0.7736, MAE: 0.5926, MAPE: 21.36%


                                                                                            

Epoch 18 | Train Loss: 0.4174 | Val RMSE: 0.7747, MAE: 0.5937, MAPE: 21.12%


                                                                                             

Epoch 19 | Train Loss: 0.4073 | Val RMSE: 0.7792, MAE: 0.5955, MAPE: 21.56%


                                                                                            

Epoch 20 | Train Loss: 0.3979 | Val RMSE: 0.7789, MAE: 0.5966, MAPE: 21.33%
Early stopping triggered for this trial.

Loaded best model for trial 10 from temp_ucam_model_trial_10.pt

✅ Trial 10 Test Results:
 - MSE  : 0.5748
 - RMSE : 0.7582
 - MAE  : 0.5862
 - MAPE : 20.95%

--- Hyperparameter Search Completed ---
Overall Best RMSE found: 0.7545
Optimal Parameters: {'embed_dim': 32, 'hidden_dims': [64, 32], 'learning_rate': 0.0005, 'batch_size': 128, 'patience': 7}


TypeError: Object of type float32 is not JSON serializable

In [3]:
import json
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import sys

# -------------------- Utility Functions --------------------
def mean_absolute_percentage_error(y_true, y_pred):
    """
    Calculates MAPE, preventing division by zero.
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    epsilon = 1e-10
    return np.mean(np.abs((y_true - y_pred) / (y_true + epsilon))) * 100

# -------------------- Step 1: 데이터 로드 및 전처리 (JSON 파일용) --------------------
# 'review.json' 파일을 올바르게 로드합니다.
try:
    df = pd.read_json('review.json', lines=True)
    print("✅ JSON 파일 로드 성공.")
except FileNotFoundError:
    print("❌ Error: 'review.json' 파일이 존재하지 않습니다.")
    sys.exit()
except ValueError as e:
    print(f"JSONL 형식이 아닌 것 같습니다. 일반 JSON 파일로 다시 시도합니다. (오류: {e})")
    try:
        df = pd.read_json('review.json')
        print("✅ 일반 JSON 파일 로드 성공.")
    except Exception as e:
        print(f"❌ Error: 'review.json' 파일을 읽는 데 실패했습니다. 파일 형식을 확인해주세요. (오류: {e})")
        sys.exit()

df = df[['user_id', 'business_id', 'stars', 'text']].dropna()

# -------------------- Dataset 클래스 정의 --------------------
class UCAMDataset(Dataset):
    def __init__(self, users, items, ratings, contexts):
        self.users = users
        self.items = items
        self.ratings = ratings
        self.contexts = contexts

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.users[idx], dtype=torch.long),
            torch.tensor(self.items[idx], dtype=torch.long),
            torch.tensor(self.contexts[idx], dtype=torch.float32),
            torch.tensor(self.ratings[idx], dtype=torch.float32)
        )

# -------------------- 모델 정의 (하이퍼파라미터 적용) --------------------
class UCAM(nn.Module):
    def __init__(self, num_users, num_items, context_dim=384, embed_dim=32):
        super().__init__()
        self.user_embed = nn.Embedding(num_users + 1, embed_dim)
        self.item_embed = nn.Embedding(num_items + 1, embed_dim)
        
        # 최적의 hidden_dims: [64, 32] 적용
        self.fc_layers = nn.Sequential(
            nn.Linear(embed_dim * 2 + context_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, user_ids, item_ids, context_vecs):
        u = self.user_embed(user_ids)
        i = self.item_embed(item_ids)
        x = torch.cat([u, i, context_vecs], dim=-1)
        return self.fc_layers(x).squeeze()

# -------------------- 실험을 위한 함수 정의 (하이퍼파라미터 적용) --------------------
def run_experiment(df, random_seed):
    print(f"\n==================== 실험 시작: random_state={random_seed} ====================")
    
    # 데이터 분할
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=random_seed)

    # 훈련 세트의 고유한 사용자/아이템 ID를 기반으로 인덱스를 생성합니다.
    user2idx = {uid: i for i, uid in enumerate(train_df['user_id'].unique())}
    item2idx = {iid: i for i, iid in enumerate(train_df['business_id'].unique())}

    # 테스트 세트에만 존재하는 사용자/아이템을 위한 '알 수 없음' 인덱스를 추가합니다.
    unknown_user_idx = len(user2idx)
    unknown_item_idx = len(item2idx)

    # SBERT 모델 로딩 및 문맥 벡터 생성
    print("SBERT 모델 로딩 및 문맥 벡터 생성 중...")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    sbert = SentenceTransformer('all-MiniLM-L6-v2', device=device)
    train_context_vectors = sbert.encode(train_df['text'].tolist(), show_progress_bar=True)
    test_context_vectors = sbert.encode(test_df['text'].tolist(), show_progress_bar=True)

    # 사용자/아이템 ID를 인덱스로 매핑하고, '알 수 없음'은 새로운 인덱스로 채웁니다.
    train_df['user'] = train_df['user_id'].map(user2idx)
    train_df['item'] = train_df['business_id'].map(item2idx)
    test_df['user'] = test_df['user_id'].map(user2idx).fillna(unknown_user_idx).astype(int)
    test_df['item'] = test_df['business_id'].map(item2idx).fillna(unknown_item_idx).astype(int)

    # DataLoader 생성 (최적의 batch_size: 128 적용)
    train_dataset = UCAMDataset(
        train_df['user'].values,
        train_df['item'].values,
        train_df['stars'].values.astype(np.float32),
        train_context_vectors
    )
    test_dataset = UCAMDataset(
        test_df['user'].values,
        test_df['item'].values,
        test_df['stars'].values.astype(np.float32),
        test_context_vectors
    )

    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

    # 모델, 옵티마이저, 손실 함수 정의 (최적의 embed_dim: 32, learning_rate: 0.0005 적용)
    context_dim = sbert.get_sentence_embedding_dimension()
    model = UCAM(num_users=len(user2idx), num_items=len(item2idx), embed_dim=32, context_dim=context_dim).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
    criterion = nn.MSELoss()

    best_val_rmse = float('inf')
    epochs_no_improve = 0
    patience = 7  # 최적의 patience: 7 적용
    min_delta = 0.001
    epochs = 50
    model_path = f'best_ucam_model_{random_seed}.pt'

    print("모델 학습 시작...")
    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} (Train)", leave=False)

        for user_ids, item_ids, context_vectors, stars in progress_bar:
            user_ids = user_ids.to(device)
            item_ids = item_ids.to(device)
            context_vectors = context_vectors.to(device)
            stars = stars.to(device)

            optimizer.zero_grad()
            predictions = model(user_ids, item_ids, context_vectors)
            loss = criterion(predictions, stars)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
            
        # --- 검증 ---
        model.eval()
        val_preds, val_true = [], []
        with torch.no_grad():
            for user_ids, item_ids, context_vectors, stars in test_loader:
                user_ids = user_ids.to(device)
                item_ids = item_ids.to(device)
                context_vectors = context_vectors.to(device)
                stars = stars.to(device)

                preds = model(user_ids, item_ids, context_vectors)
                val_preds.extend(preds.cpu().numpy())
                val_true.extend(stars.cpu().numpy())

        val_preds = np.array(val_preds)
        val_true = np.array(val_true)
        val_mse = mean_squared_error(val_true, val_preds)
        val_rmse = np.sqrt(val_mse)
        val_mae = mean_absolute_error(val_true, val_preds)
        val_mape = mean_absolute_percentage_error(val_true, val_preds)

        print(f"\nEpoch {epoch+1} | Train Loss: {total_train_loss / len(train_loader):.4f} | "
              f"Val RMSE: {val_rmse:.4f}, MAE: {val_mae:.4f}, MAPE: {val_mape:.2f}%")

        if val_rmse < best_val_rmse - min_delta:
            best_val_rmse = val_rmse
            epochs_no_improve = 0
            torch.save(model.state_dict(), model_path)
            print(f"  --> 개선됨. 모델 저장됨 (RMSE: {best_val_rmse:.4f})")
        else:
            epochs_no_improve += 1
            print(f"  --> 개선 없음. ({epochs_no_improve}/{patience})")
            if epochs_no_improve == patience:
                print("조기 종료 발생.")
                break
    
    # -------------------- 테스트셋 평가 --------------------
    def evaluate_model(model, data_loader, device):
        model.eval()
        preds, targets = [], []

        with torch.no_grad():
            for users, items, contexts, ratings in data_loader:
                users = users.to(device)
                items = items.to(device)
                contexts = contexts.to(device)
                ratings = ratings.to(device)

                output = model(users, items, contexts)
                preds.extend(output.cpu().numpy())
                targets.extend(ratings.cpu().numpy())

        preds = np.array(preds)
        targets = np.array(targets)

        mae = mean_absolute_error(targets, preds)
        mse = mean_squared_error(targets, preds)
        rmse = np.sqrt(mse)
        mape = mean_absolute_percentage_error(targets, preds)

        return mae, mse, rmse, mape

    if os.path.exists(model_path):
        model.load_state_dict(torch.load(model_path))
        print(f"\n최적 모델 로드 완료: {model_path}")
        mae, mse, rmse, mape = evaluate_model(model, test_loader, device)

        print(f"\n✅ [UCAM] 최종 테스트 평가 지표 (random_state={random_seed}):")
        print(f"    - MSE  : {mse:.4f}")
        print(f"    - RMSE : {rmse:.4f}")
        print(f"    - MAE  : {mae:.4f}")
        print(f"    - MAPE : {mape:.2f}%")
        
        return {'mse': mse, 'rmse': rmse, 'mae': mae, 'mape': mape}
    
    return None

# -------------------- Step 9: 5회 실험 및 평균 계산 --------------------
all_results = []
num_runs = 5
start_seed = 42

for i in range(num_runs):
    seed = start_seed + i
    results = run_experiment(df, seed)
    if results:
        all_results.append(results)

if all_results:
    avg_mse = np.mean([r['mse'] for r in all_results])
    avg_rmse = np.mean([r['rmse'] for r in all_results])
    avg_mae = np.mean([r['mae'] for r in all_results])
    avg_mape = np.mean([r['mape'] for r in all_results])

    print("\n\n==================== 5회 실험 평균 결과 ====================")
    print(f"✔️ 평균 MSE  : {avg_mse:.4f}")
    print(f"✔️ 평균 RMSE : {avg_rmse:.4f}")
    print(f"✔️ 평균 MAE  : {avg_mae:.4f}")
    print(f"✔️ 평균 MAPE : {avg_mape:.2f}%")
else:
    print("❌ 실험 결과를 얻지 못했습니다. 오류를 확인해주세요.")

✅ JSON 파일 로드 성공.

SBERT 모델 로딩 및 문맥 벡터 생성 중...


Batches: 100%|██████████| 11195/11195 [03:44<00:00, 49.80it/s] 
Batches: 100%|██████████| 2799/2799 [00:57<00:00, 49.01it/s] 


모델 학습 시작...


                                                                        


Epoch 1 | Train Loss: 1.0736 | Val RMSE: 0.8306, MAE: 0.6574, MAPE: 24.22%
  --> 개선됨. 모델 저장됨 (RMSE: 0.8306)


                                                                        


Epoch 2 | Train Loss: 0.6514 | Val RMSE: 0.7920, MAE: 0.6199, MAPE: 22.38%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7920)


                                                                        


Epoch 3 | Train Loss: 0.6015 | Val RMSE: 0.7763, MAE: 0.6059, MAPE: 22.07%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7763)


                                                                        


Epoch 4 | Train Loss: 0.5692 | Val RMSE: 0.7697, MAE: 0.5977, MAPE: 21.78%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7697)


                                                                        


Epoch 5 | Train Loss: 0.5436 | Val RMSE: 0.7595, MAE: 0.5876, MAPE: 21.30%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7595)


                                                                        


Epoch 6 | Train Loss: 0.5209 | Val RMSE: 0.7561, MAE: 0.5877, MAPE: 20.99%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7561)


                                                                        


Epoch 7 | Train Loss: 0.5014 | Val RMSE: 0.7524, MAE: 0.5819, MAPE: 20.82%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7524)


                                                                        


Epoch 8 | Train Loss: 0.4832 | Val RMSE: 0.7509, MAE: 0.5829, MAPE: 20.71%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7509)


                                                                        


Epoch 9 | Train Loss: 0.4668 | Val RMSE: 0.7502, MAE: 0.5815, MAPE: 20.56%
  --> 개선 없음. (1/7)


                                                                         


Epoch 10 | Train Loss: 0.4512 | Val RMSE: 0.7524, MAE: 0.5795, MAPE: 20.80%
  --> 개선 없음. (2/7)


                                                                         


Epoch 11 | Train Loss: 0.4359 | Val RMSE: 0.7553, MAE: 0.5793, MAPE: 20.50%
  --> 개선 없음. (3/7)


                                                                         


Epoch 12 | Train Loss: 0.4222 | Val RMSE: 0.7578, MAE: 0.5801, MAPE: 20.98%
  --> 개선 없음. (4/7)


                                                                         


Epoch 13 | Train Loss: 0.4088 | Val RMSE: 0.7584, MAE: 0.5815, MAPE: 20.81%
  --> 개선 없음. (5/7)


                                                                         


Epoch 14 | Train Loss: 0.3955 | Val RMSE: 0.7599, MAE: 0.5838, MAPE: 20.94%
  --> 개선 없음. (6/7)


                                                                         


Epoch 15 | Train Loss: 0.3824 | Val RMSE: 0.7657, MAE: 0.5855, MAPE: 20.93%
  --> 개선 없음. (7/7)
조기 종료 발생.

최적 모델 로드 완료: best_ucam_model_42.pt

✅ [UCAM] 최종 테스트 평가 지표 (random_state=42):
    - MSE  : 0.5639
    - RMSE : 0.7509
    - MAE  : 0.5829
    - MAPE : 20.71%

SBERT 모델 로딩 및 문맥 벡터 생성 중...


Batches: 100%|██████████| 11195/11195 [15:13<00:00, 12.26it/s] 
Batches: 100%|██████████| 2799/2799 [06:17<00:00,  7.41it/s] 


모델 학습 시작...


                                                                        


Epoch 1 | Train Loss: 1.0119 | Val RMSE: 0.8198, MAE: 0.6488, MAPE: 22.91%
  --> 개선됨. 모델 저장됨 (RMSE: 0.8198)


                                                                        


Epoch 2 | Train Loss: 0.6378 | Val RMSE: 0.7956, MAE: 0.6276, MAPE: 21.86%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7956)


                                                                        


Epoch 3 | Train Loss: 0.5954 | Val RMSE: 0.7765, MAE: 0.6061, MAPE: 21.65%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7765)


                                                                        


Epoch 4 | Train Loss: 0.5646 | Val RMSE: 0.7652, MAE: 0.5956, MAPE: 21.20%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7652)


                                                                        


Epoch 5 | Train Loss: 0.5401 | Val RMSE: 0.7601, MAE: 0.5928, MAPE: 20.78%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7601)


                                                                        


Epoch 6 | Train Loss: 0.5181 | Val RMSE: 0.7570, MAE: 0.5849, MAPE: 20.45%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7570)


                                                                        


Epoch 7 | Train Loss: 0.4985 | Val RMSE: 0.7518, MAE: 0.5827, MAPE: 20.57%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7518)


                                                                        


Epoch 8 | Train Loss: 0.4801 | Val RMSE: 0.7563, MAE: 0.5880, MAPE: 20.28%
  --> 개선 없음. (1/7)


                                                                        


Epoch 9 | Train Loss: 0.4626 | Val RMSE: 0.7507, MAE: 0.5795, MAPE: 20.48%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7507)


                                                                        


Epoch 10 | Train Loss: 0.4466 | Val RMSE: 0.7521, MAE: 0.5797, MAPE: 20.55%
  --> 개선 없음. (1/7)


                                                                        


Epoch 11 | Train Loss: 0.4316 | Val RMSE: 0.7537, MAE: 0.5815, MAPE: 20.47%
  --> 개선 없음. (2/7)


                                                                         


Epoch 12 | Train Loss: 0.4172 | Val RMSE: 0.7556, MAE: 0.5787, MAPE: 20.60%
  --> 개선 없음. (3/7)


                                                                         


Epoch 13 | Train Loss: 0.4028 | Val RMSE: 0.7618, MAE: 0.5810, MAPE: 21.03%
  --> 개선 없음. (4/7)


                                                                         


Epoch 14 | Train Loss: 0.3896 | Val RMSE: 0.7611, MAE: 0.5821, MAPE: 20.61%
  --> 개선 없음. (5/7)


                                                                         


Epoch 15 | Train Loss: 0.3763 | Val RMSE: 0.7625, MAE: 0.5820, MAPE: 20.70%
  --> 개선 없음. (6/7)


                                                                         


Epoch 16 | Train Loss: 0.3639 | Val RMSE: 0.7647, MAE: 0.5866, MAPE: 20.69%
  --> 개선 없음. (7/7)
조기 종료 발생.

최적 모델 로드 완료: best_ucam_model_43.pt

✅ [UCAM] 최종 테스트 평가 지표 (random_state=43):
    - MSE  : 0.5635
    - RMSE : 0.7507
    - MAE  : 0.5795
    - MAPE : 20.48%

SBERT 모델 로딩 및 문맥 벡터 생성 중...


Batches: 100%|██████████| 11195/11195 [05:31<00:00, 33.74it/s] 
Batches: 100%|██████████| 2799/2799 [01:09<00:00, 40.04it/s] 


모델 학습 시작...


                                                                        


Epoch 1 | Train Loss: 1.0780 | Val RMSE: 0.8311, MAE: 0.6559, MAPE: 24.03%
  --> 개선됨. 모델 저장됨 (RMSE: 0.8311)


                                                                        


Epoch 2 | Train Loss: 0.6449 | Val RMSE: 0.7923, MAE: 0.6193, MAPE: 22.47%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7923)


                                                                        


Epoch 3 | Train Loss: 0.5947 | Val RMSE: 0.7780, MAE: 0.6059, MAPE: 21.62%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7780)


                                                                        


Epoch 4 | Train Loss: 0.5618 | Val RMSE: 0.7688, MAE: 0.5973, MAPE: 21.37%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7688)


                                                                        


Epoch 5 | Train Loss: 0.5352 | Val RMSE: 0.7639, MAE: 0.5903, MAPE: 21.08%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7639)


                                                                       


Epoch 6 | Train Loss: 0.5129 | Val RMSE: 0.7580, MAE: 0.5861, MAPE: 21.07%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7580)


                                                                        


Epoch 7 | Train Loss: 0.4934 | Val RMSE: 0.7633, MAE: 0.5855, MAPE: 21.28%
  --> 개선 없음. (1/7)


                                                                        


Epoch 8 | Train Loss: 0.4760 | Val RMSE: 0.7539, MAE: 0.5812, MAPE: 20.53%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7539)


                                                                        


Epoch 9 | Train Loss: 0.4587 | Val RMSE: 0.7588, MAE: 0.5881, MAPE: 20.31%
  --> 개선 없음. (1/7)


                                                                         


Epoch 10 | Train Loss: 0.4428 | Val RMSE: 0.7553, MAE: 0.5819, MAPE: 20.37%
  --> 개선 없음. (2/7)


                                                                         


Epoch 11 | Train Loss: 0.4283 | Val RMSE: 0.7620, MAE: 0.5889, MAPE: 20.51%
  --> 개선 없음. (3/7)


                                                                         


Epoch 12 | Train Loss: 0.4134 | Val RMSE: 0.7617, MAE: 0.5855, MAPE: 20.73%
  --> 개선 없음. (4/7)


                                                                         


Epoch 13 | Train Loss: 0.3999 | Val RMSE: 0.7694, MAE: 0.5931, MAPE: 20.32%
  --> 개선 없음. (5/7)


                                                                        


Epoch 14 | Train Loss: 0.3870 | Val RMSE: 0.7680, MAE: 0.5877, MAPE: 20.79%
  --> 개선 없음. (6/7)


                                                                        


Epoch 15 | Train Loss: 0.3740 | Val RMSE: 0.7712, MAE: 0.5891, MAPE: 21.19%
  --> 개선 없음. (7/7)
조기 종료 발생.

최적 모델 로드 완료: best_ucam_model_44.pt

✅ [UCAM] 최종 테스트 평가 지표 (random_state=44):
    - MSE  : 0.5684
    - RMSE : 0.7539
    - MAE  : 0.5812
    - MAPE : 20.53%

SBERT 모델 로딩 및 문맥 벡터 생성 중...


Batches: 100%|██████████| 11195/11195 [16:30<00:00, 11.31it/s] 
Batches: 100%|██████████| 2799/2799 [02:58<00:00, 15.68it/s] 


모델 학습 시작...


                                                                        


Epoch 1 | Train Loss: 1.0316 | Val RMSE: 0.8359, MAE: 0.6609, MAPE: 24.46%
  --> 개선됨. 모델 저장됨 (RMSE: 0.8359)


                                                                        


Epoch 2 | Train Loss: 0.6584 | Val RMSE: 0.8001, MAE: 0.6258, MAPE: 22.55%
  --> 개선됨. 모델 저장됨 (RMSE: 0.8001)


                                                                        


Epoch 3 | Train Loss: 0.5978 | Val RMSE: 0.7809, MAE: 0.6086, MAPE: 21.90%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7809)


                                                                        


Epoch 4 | Train Loss: 0.5608 | Val RMSE: 0.7685, MAE: 0.5950, MAPE: 21.66%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7685)


                                                                        


Epoch 5 | Train Loss: 0.5322 | Val RMSE: 0.7624, MAE: 0.5895, MAPE: 21.41%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7624)


                                                                        


Epoch 6 | Train Loss: 0.5081 | Val RMSE: 0.7633, MAE: 0.5924, MAPE: 20.80%
  --> 개선 없음. (1/7)


                                                                        


Epoch 7 | Train Loss: 0.4873 | Val RMSE: 0.7696, MAE: 0.5872, MAPE: 22.17%
  --> 개선 없음. (2/7)


                                                                        


Epoch 8 | Train Loss: 0.4680 | Val RMSE: 0.7566, MAE: 0.5805, MAPE: 21.24%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7566)


                                                                        


Epoch 9 | Train Loss: 0.4507 | Val RMSE: 0.7543, MAE: 0.5787, MAPE: 20.78%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7543)


                                                                         


Epoch 10 | Train Loss: 0.4347 | Val RMSE: 0.7581, MAE: 0.5831, MAPE: 20.75%
  --> 개선 없음. (1/7)


                                                                         


Epoch 11 | Train Loss: 0.4190 | Val RMSE: 0.7594, MAE: 0.5790, MAPE: 20.77%
  --> 개선 없음. (2/7)


                                                                         


Epoch 12 | Train Loss: 0.4041 | Val RMSE: 0.7663, MAE: 0.5869, MAPE: 20.36%
  --> 개선 없음. (3/7)


                                                                         


Epoch 13 | Train Loss: 0.3906 | Val RMSE: 0.7630, MAE: 0.5855, MAPE: 20.93%
  --> 개선 없음. (4/7)


                                                                         


Epoch 14 | Train Loss: 0.3776 | Val RMSE: 0.7707, MAE: 0.5841, MAPE: 21.50%
  --> 개선 없음. (5/7)


                                                                         


Epoch 15 | Train Loss: 0.3651 | Val RMSE: 0.7805, MAE: 0.5904, MAPE: 21.59%
  --> 개선 없음. (6/7)


                                                                         


Epoch 16 | Train Loss: 0.3539 | Val RMSE: 0.7725, MAE: 0.5866, MAPE: 21.12%
  --> 개선 없음. (7/7)
조기 종료 발생.

최적 모델 로드 완료: best_ucam_model_45.pt

✅ [UCAM] 최종 테스트 평가 지표 (random_state=45):
    - MSE  : 0.5690
    - RMSE : 0.7543
    - MAE  : 0.5787
    - MAPE : 20.78%

SBERT 모델 로딩 및 문맥 벡터 생성 중...


Batches: 100%|██████████| 11195/11195 [09:01<00:00, 20.69it/s] 
Batches: 100%|██████████| 2799/2799 [06:10<00:00,  7.56it/s]


모델 학습 시작...


                                                                       


Epoch 1 | Train Loss: 1.0543 | Val RMSE: 0.8261, MAE: 0.6515, MAPE: 23.64%
  --> 개선됨. 모델 저장됨 (RMSE: 0.8261)


                                                                        


Epoch 2 | Train Loss: 0.6422 | Val RMSE: 0.7907, MAE: 0.6179, MAPE: 22.33%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7907)


                                                                        


Epoch 3 | Train Loss: 0.5948 | Val RMSE: 0.7743, MAE: 0.6028, MAPE: 21.52%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7743)


                                                                        


Epoch 4 | Train Loss: 0.5604 | Val RMSE: 0.7640, MAE: 0.5953, MAPE: 21.11%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7640)


                                                                        


Epoch 5 | Train Loss: 0.5332 | Val RMSE: 0.7545, MAE: 0.5840, MAPE: 20.86%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7545)


                                                                        


Epoch 6 | Train Loss: 0.5104 | Val RMSE: 0.7522, MAE: 0.5782, MAPE: 20.82%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7522)


                                                                        


Epoch 7 | Train Loss: 0.4902 | Val RMSE: 0.7510, MAE: 0.5800, MAPE: 20.45%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7510)


                                                                        


Epoch 8 | Train Loss: 0.4727 | Val RMSE: 0.7500, MAE: 0.5796, MAPE: 20.30%
  --> 개선 없음. (1/7)


                                                                        


Epoch 9 | Train Loss: 0.4560 | Val RMSE: 0.7502, MAE: 0.5755, MAPE: 20.66%
  --> 개선 없음. (2/7)


                                                                         


Epoch 10 | Train Loss: 0.4405 | Val RMSE: 0.7479, MAE: 0.5750, MAPE: 20.55%
  --> 개선됨. 모델 저장됨 (RMSE: 0.7479)


                                                                         


Epoch 11 | Train Loss: 0.4264 | Val RMSE: 0.7520, MAE: 0.5761, MAPE: 20.77%
  --> 개선 없음. (1/7)


                                                                         


Epoch 12 | Train Loss: 0.4126 | Val RMSE: 0.7539, MAE: 0.5800, MAPE: 20.46%
  --> 개선 없음. (2/7)


                                                                         


Epoch 13 | Train Loss: 0.3996 | Val RMSE: 0.7617, MAE: 0.5888, MAPE: 20.62%
  --> 개선 없음. (3/7)


                                                                         


Epoch 14 | Train Loss: 0.3871 | Val RMSE: 0.7624, MAE: 0.5836, MAPE: 21.02%
  --> 개선 없음. (4/7)


                                                                         


Epoch 15 | Train Loss: 0.3749 | Val RMSE: 0.7694, MAE: 0.5933, MAPE: 20.59%
  --> 개선 없음. (5/7)


                                                                         


Epoch 16 | Train Loss: 0.3627 | Val RMSE: 0.7695, MAE: 0.5896, MAPE: 20.91%
  --> 개선 없음. (6/7)


                                                                         


Epoch 17 | Train Loss: 0.3510 | Val RMSE: 0.7740, MAE: 0.5931, MAPE: 20.97%
  --> 개선 없음. (7/7)
조기 종료 발생.

최적 모델 로드 완료: best_ucam_model_46.pt

✅ [UCAM] 최종 테스트 평가 지표 (random_state=46):
    - MSE  : 0.5594
    - RMSE : 0.7479
    - MAE  : 0.5750
    - MAPE : 20.55%


✔️ 평균 MSE  : 0.5648
✔️ 평균 RMSE : 0.7515
✔️ 평균 MAE  : 0.5795
✔️ 평균 MAPE : 20.61%
