dataset_fl

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import re
from collections import Counter
import random
import json

# --- 장치 설정 (GPU 사용 가능 시) ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- 데이터 전처리 및 어휘 구축 ---
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text.split()

class Vocabulary:
    def __init__(self, min_freq):
        self.stoi = {"<PAD>": 0, "<UNK>": 1}
        self.itos = {0: "<PAD>", 1: "<UNK>"}
        self.freq = Counter()
        self.min_freq = min_freq
    
    def build_vocabulary(self, text_list):
        for text in text_list:
            self.freq.update(text)
        
        idx = 2
        for word, count in self.freq.items():
            if count >= self.min_freq:
                self.stoi[word] = idx
                self.itos[idx] = word
                idx += 1
    
    def numericalize(self, text):
        return [self.stoi.get(token, self.stoi["<UNK>"]) for token in text]

# --- PyTorch Dataset 및 DataLoader 정의 ---
class AFRAMDataset(Dataset):
    def __init__(self, df):
        self.user_ids = torch.tensor(df['user_encoded'].values, dtype=torch.long)
        self.business_ids = torch.tensor(df['business_encoded'].values, dtype=torch.long)
        self.reviews = torch.tensor(np.array(df['numericalized_text'].tolist()), dtype=torch.long)
        self.stars = torch.tensor(df['review_stars'].values, dtype=torch.float)

    def __len__(self):
        return len(self.stars)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.business_ids[idx], self.reviews[idx], self.stars[idx]

# --- AFRAM 모델 아키텍처 정의 ---
class TextEncoderWithAttention(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, dropout_rate):
        super(TextEncoderWithAttention, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.conv = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.attn_proj = nn.Linear(hidden_dim * 2, hidden_dim * 2)
        self.v = nn.Parameter(torch.rand(hidden_dim * 2, 1))
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, text_seq):
        embedded = self.embedding(text_seq)
        embedded = embedded.permute(0, 2, 1)
        conv_out = torch.relu(self.conv(embedded))
        conv_out = conv_out.permute(0, 2, 1)
        lstm_out, _ = self.lstm(self.dropout(conv_out))
        attn_weights = torch.tanh(self.attn_proj(lstm_out))
        v_expanded = self.v.unsqueeze(0).expand(attn_weights.shape[0], -1, -1)
        scores = torch.bmm(attn_weights, v_expanded)
        attention_weights = torch.softmax(scores, dim=1)
        context_vector = torch.sum(lstm_out * attention_weights, dim=1)
        return context_vector

class AFRAMModel(nn.Module):
    def __init__(self, num_users, num_businesses, vocab_size, embedding_dim,
                 text_encoder_hidden_dim, user_item_mlp_dims, final_mlp_dims, dropout_rate):
        super(AFRAMModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.business_embedding = nn.Embedding(num_businesses, embedding_dim)
        self.review_encoder = TextEncoderWithAttention(vocab_size, embedding_dim, text_encoder_hidden_dim, dropout_rate)
        user_item_mlp_input_dim = embedding_dim * 2
        user_item_layers = []
        for dim in user_item_mlp_dims:
            user_item_layers.append(nn.Linear(user_item_mlp_input_dim, dim))
            user_item_layers.append(nn.ReLU())
            user_item_mlp_input_dim = dim
        self.user_item_mlp = nn.Sequential(*user_item_layers)
        self.user_item_mlp_output_dim = user_item_mlp_dims[-1] if user_item_mlp_dims else embedding_dim * 2
        final_mlp_input_dim = self.user_item_mlp_output_dim + text_encoder_hidden_dim * 2
        final_layers = []
        for dim in final_mlp_dims:
            final_layers.append(nn.Linear(final_mlp_input_dim, dim))
            final_layers.append(nn.ReLU())
            final_mlp_input_dim = dim
        final_layers.append(nn.Linear(final_mlp_input_dim, 1))
        self.prediction_mlp = nn.Sequential(*final_layers)

    def forward(self, user_ids, business_ids, reviews):
        user_vec = self.user_embedding(user_ids)
        business_vec = self.business_embedding(business_ids)
        user_item_combined = torch.cat((user_vec, business_vec), dim=1)
        user_item_features = self.user_item_mlp(user_item_combined)
        review_features = self.review_encoder(reviews)
        combined_features = torch.cat((user_item_features, review_features), dim=1)
        predicted_rating = self.prediction_mlp(combined_features)
        return predicted_rating.reshape(-1) # <-- 이 부분을 수정했습니다.

def train_and_evaluate(params, train_df, val_df, test_df, num_users, num_businesses, vocab_size, run_num):
    train_loader = DataLoader(AFRAMDataset(train_df), batch_size=params['batch_size'], shuffle=True)
    val_loader = DataLoader(AFRAMDataset(val_df), batch_size=params['batch_size'], shuffle=False)
    test_loader = DataLoader(AFRAMDataset(test_df), batch_size=params['batch_size'], shuffle=False)
    model = AFRAMModel(num_users, num_businesses, vocab_size, params['embedding_dim'],
                       params['text_encoder_hidden_dim'], params['user_item_mlp_dims'],
                       params['final_mlp_dims'], params['dropout_rate']).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])
    epochs = 50
    patience = 7
    min_delta = 0.0005
    best_val_rmse = float('inf')
    epochs_no_improve = 0
    model_save_path = f'temp_best_model_run_{run_num}.pt'

    for epoch in range(epochs):
        model.train()
        for user_ids, business_ids, reviews, stars in train_loader:
            user_ids, business_ids, reviews, stars = user_ids.to(device), business_ids.to(device), reviews.to(device), stars.to(device)
            optimizer.zero_grad()
            predictions = model(user_ids, business_ids, reviews)
            loss = criterion(predictions, stars)
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_predictions = []
        val_true_ratings = []
        with torch.no_grad():
            for user_ids, business_ids, reviews, stars in val_loader:
                user_ids, business_ids, reviews, stars = user_ids.to(device), business_ids.to(device), reviews.to(device), stars.to(device)
                predictions = model(user_ids, business_ids, reviews)
                val_predictions.extend(predictions.tolist())
                val_true_ratings.extend(stars.tolist())
        
        current_val_rmse = np.sqrt(mean_squared_error(val_true_ratings, val_predictions))
        
        if current_val_rmse < best_val_rmse - min_delta:
            best_val_rmse = current_val_rmse
            epochs_no_improve = 0
            torch.save(model.state_dict(), model_save_path)
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                break
    
    if os.path.exists(model_save_path):
        model.load_state_dict(torch.load(model_save_path))
    
    model.eval()
    test_predictions = []
    true_ratings = []
    with torch.no_grad():
        for user_ids, business_ids, reviews, stars in test_loader:
            user_ids, business_ids, reviews, stars = user_ids.to(device), business_ids.to(device), reviews.to(device), stars.to(device)
            predictions = model(user_ids, business_ids, reviews)
            test_predictions.extend(predictions.tolist())
            true_ratings.extend(stars.tolist())

    mse = mean_squared_error(true_ratings, test_predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(true_ratings, test_predictions)

    if os.path.exists(model_save_path):
        os.remove(model_save_path)

    return mse, rmse, mae

def main():
    # --- 1. 데이터 로드 및 전처리 ---
    try:
        df = pd.read_json('dataset_fl.json', lines=True)
    except ValueError:
        df = pd.read_json('dataset_fl.json')
    df_processed = df[['user_id', 'business_id', 'review_stars', 'text']].copy()
    user_encoder = LabelEncoder()
    business_encoder = LabelEncoder()
    df_processed.loc[:, 'user_encoded'] = user_encoder.fit_transform(df_processed['user_id'])
    df_processed.loc[:, 'business_encoded'] = business_encoder.fit_transform(df_processed['business_id'])
    num_users = len(user_encoder.classes_)
    num_businesses = len(business_encoder.classes_)
    all_texts = df_processed['text'].apply(preprocess_text).tolist()
    min_word_freq = 5
    vocab = Vocabulary(min_word_freq)
    vocab.build_vocabulary(all_texts)
    vocab_size = len(vocab.stoi)
    print(f"Vocabulary size: {vocab_size}")
    MAX_REVIEW_LEN = 100
    df_processed.loc[:, 'numericalized_text'] = df_processed['text'].apply(vocab.numericalize)
    df_processed['numericalized_text'] = df_processed['numericalized_text'].apply(
        lambda x: x[:MAX_REVIEW_LEN] if len(x) > MAX_REVIEW_LEN else x + [vocab.stoi["<PAD>"]] * (MAX_REVIEW_LEN - len(x))
    )

    # --- 2. 하이퍼파라미터 탐색 (Random Search) ---
    param_grid = {
        'embedding_dim': [32, 64, 128],
        'text_encoder_hidden_dim': [64, 128, 256],
        'learning_rate': [0.0005, 0.001, 0.002],
        'batch_size': [128, 256, 512],
        'user_item_mlp_dims': [[64, 32], [128, 64], [256, 128]],
        'final_mlp_dims': [[32, 16], [64, 32], [128, 64]],
        'dropout_rate': [0.1, 0.2, 0.3]
    }
    num_trials = 10
    best_params = None
    best_val_rmse = float('inf')
    
    print(f"\n--- Starting Hyperparameter Search with {num_trials} trials ---")
    train_val_df, test_df_hp = train_test_split(df_processed, test_size=0.2, random_state=42)
    train_df_hp, val_df_hp = train_test_split(train_val_df, test_size=1/8, random_state=42)

    for trial_num in range(num_trials):
        current_params = {k: random.choice(v) for k, v in param_grid.items()}
        print(f"\n--- Trial {trial_num + 1}/{num_trials} ---")
        print(f"Parameters: {current_params}")
        
        mse, rmse, mae = train_and_evaluate(current_params, train_df_hp, val_df_hp, test_df_hp,
                                            num_users, num_businesses, vocab_size, trial_num)
        
        print(f"  Trial {trial_num+1} Test RMSE: {rmse:.4f}")
        
        if rmse < best_val_rmse:
            best_val_rmse = rmse
            best_params = current_params
            print(f"  --> New best RMSE found: {best_val_rmse:.4f} with params: {best_params}")

    print(f"\n--- Hyperparameter Search Completed ---")
    print(f"Best Parameters found: {best_params}")

    # --- 3. 최적 파라미터로 모델 5회 반복 학습 및 평균 성능 계산 ---
    if best_params:
        all_rmse = []
        all_mae = []
        num_runs = 5
        
        print(f"\n--- Starting {num_runs} runs with Best Parameters ---")
        print(f"Best Parameters: {best_params}")
        
        for i in range(num_runs):
            current_random_state = 42 + i
            print(f"\n--- Run {i+1}/{num_runs} (Random State: {current_random_state}) ---")
            train_val_df, test_df = train_test_split(df_processed, test_size=0.2, random_state=current_random_state)
            train_df, val_df = train_test_split(train_val_df, test_size=1/8, random_state=current_random_state)
            mse, rmse, mae = train_and_evaluate(best_params, train_df, val_df, test_df,
                                                num_users, num_businesses, vocab_size, i)
            
            print(f"Run {i+1} Performance on Test Set:")
            print(f"  RMSE: {rmse:.4f}")
            print(f"  MAE: {mae:.4f}")

            all_rmse.append(rmse)
            all_mae.append(mae)

        print(f"\n--- Average Performance over {num_runs} Runs ---")
        print(f"Average RMSE: {np.mean(all_rmse):.4f} +/- {np.std(all_rmse):.4f}")
        print(f"Average MAE: {np.mean(all_mae):.4f} +/- {np.std(all_mae):.4f}")
        
if __name__ == "__main__":
    main()

Using device: cuda
Vocabulary size: 45967

--- Starting Hyperparameter Search with 10 trials ---

--- Trial 1/10 ---
Parameters: {'embedding_dim': 32, 'text_encoder_hidden_dim': 128, 'learning_rate': 0.0005, 'batch_size': 512, 'user_item_mlp_dims': [128, 64], 'final_mlp_dims': [128, 64], 'dropout_rate': 0.2}
  Trial 1 Test RMSE: 0.9486
  --> New best RMSE found: 0.9486 with params: {'embedding_dim': 32, 'text_encoder_hidden_dim': 128, 'learning_rate': 0.0005, 'batch_size': 512, 'user_item_mlp_dims': [128, 64], 'final_mlp_dims': [128, 64], 'dropout_rate': 0.2}

--- Trial 2/10 ---
Parameters: {'embedding_dim': 128, 'text_encoder_hidden_dim': 64, 'learning_rate': 0.002, 'batch_size': 128, 'user_item_mlp_dims': [64, 32], 'final_mlp_dims': [64, 32], 'dropout_rate': 0.3}
  Trial 2 Test RMSE: 0.9620

--- Trial 3/10 ---
Parameters: {'embedding_dim': 64, 'text_encoder_hidden_dim': 64, 'learning_rate': 0.001, 'batch_size': 256, 'user_item_mlp_dims': [128, 64], 'final_mlp_dims': [128, 64], 'dropo

dataset_la

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import os
import re
from collections import Counter
import random
import json

# --- 장치 설정 (GPU 사용 가능 시) ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- 데이터 전처리 및 어휘 구축 ---
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text.split()

class Vocabulary:
    def __init__(self, min_freq):
        self.stoi = {"<PAD>": 0, "<UNK>": 1}
        self.itos = {0: "<PAD>", 1: "<UNK>"}
        self.freq = Counter()
        self.min_freq = min_freq
    
    def build_vocabulary(self, text_list):
        for text in text_list:
            self.freq.update(text)
        
        idx = 2
        for word, count in self.freq.items():
            if count >= self.min_freq:
                self.stoi[word] = idx
                self.itos[idx] = word
                idx += 1
    
    def numericalize(self, text):
        return [self.stoi.get(token, self.stoi["<UNK>"]) for token in text]

# --- PyTorch Dataset 및 DataLoader 정의 ---
class AFRAMDataset(Dataset):
    def __init__(self, df):
        self.user_ids = torch.tensor(df['user_encoded'].values, dtype=torch.long)
        self.business_ids = torch.tensor(df['business_encoded'].values, dtype=torch.long)
        self.reviews = torch.tensor(np.array(df['numericalized_text'].tolist()), dtype=torch.long)
        self.stars = torch.tensor(df['review_stars'].values, dtype=torch.float)

    def __len__(self):
        return len(self.stars)

    def __getitem__(self, idx):
        return self.user_ids[idx], self.business_ids[idx], self.reviews[idx], self.stars[idx]

# --- AFRAM 모델 아키텍처 정의 ---
class TextEncoderWithAttention(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, dropout_rate):
        super(TextEncoderWithAttention, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.conv = nn.Conv1d(in_channels=embedding_dim, out_channels=hidden_dim, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.attn_proj = nn.Linear(hidden_dim * 2, hidden_dim * 2)
        self.v = nn.Parameter(torch.rand(hidden_dim * 2, 1))
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, text_seq):
        embedded = self.embedding(text_seq)
        embedded = embedded.permute(0, 2, 1)
        conv_out = torch.relu(self.conv(embedded))
        conv_out = conv_out.permute(0, 2, 1)
        lstm_out, _ = self.lstm(self.dropout(conv_out))
        attn_weights = torch.tanh(self.attn_proj(lstm_out))
        v_expanded = self.v.unsqueeze(0).expand(attn_weights.shape[0], -1, -1)
        scores = torch.bmm(attn_weights, v_expanded)
        attention_weights = torch.softmax(scores, dim=1)
        context_vector = torch.sum(lstm_out * attention_weights, dim=1)
        return context_vector

class AFRAMModel(nn.Module):
    def __init__(self, num_users, num_businesses, vocab_size, embedding_dim,
                 text_encoder_hidden_dim, user_item_mlp_dims, final_mlp_dims, dropout_rate):
        super(AFRAMModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.business_embedding = nn.Embedding(num_businesses, embedding_dim)
        self.review_encoder = TextEncoderWithAttention(vocab_size, embedding_dim, text_encoder_hidden_dim, dropout_rate)
        user_item_mlp_input_dim = embedding_dim * 2
        user_item_layers = []
        for dim in user_item_mlp_dims:
            user_item_layers.append(nn.Linear(user_item_mlp_input_dim, dim))
            user_item_layers.append(nn.ReLU())
            user_item_mlp_input_dim = dim
        self.user_item_mlp = nn.Sequential(*user_item_layers)
        self.user_item_mlp_output_dim = user_item_mlp_dims[-1] if user_item_mlp_dims else embedding_dim * 2
        final_mlp_input_dim = self.user_item_mlp_output_dim + text_encoder_hidden_dim * 2
        final_layers = []
        for dim in final_mlp_dims:
            final_layers.append(nn.Linear(final_mlp_input_dim, dim))
            final_layers.append(nn.ReLU())
            final_mlp_input_dim = dim
        final_layers.append(nn.Linear(final_mlp_input_dim, 1))
        self.prediction_mlp = nn.Sequential(*final_layers)

    def forward(self, user_ids, business_ids, reviews):
        user_vec = self.user_embedding(user_ids)
        business_vec = self.business_embedding(business_ids)
        user_item_combined = torch.cat((user_vec, business_vec), dim=1)
        user_item_features = self.user_item_mlp(user_item_combined)
        review_features = self.review_encoder(reviews)
        combined_features = torch.cat((user_item_features, review_features), dim=1)
        predicted_rating = self.prediction_mlp(combined_features)
        return predicted_rating.reshape(-1) # <-- 이 부분을 수정했습니다.

def train_and_evaluate(params, train_df, val_df, test_df, num_users, num_businesses, vocab_size, run_num):
    train_loader = DataLoader(AFRAMDataset(train_df), batch_size=params['batch_size'], shuffle=True)
    val_loader = DataLoader(AFRAMDataset(val_df), batch_size=params['batch_size'], shuffle=False)
    test_loader = DataLoader(AFRAMDataset(test_df), batch_size=params['batch_size'], shuffle=False)
    model = AFRAMModel(num_users, num_businesses, vocab_size, params['embedding_dim'],
                       params['text_encoder_hidden_dim'], params['user_item_mlp_dims'],
                       params['final_mlp_dims'], params['dropout_rate']).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])
    epochs = 50
    patience = 7
    min_delta = 0.0005
    best_val_rmse = float('inf')
    epochs_no_improve = 0
    model_save_path = f'temp_best_model_run_{run_num}.pt'

    for epoch in range(epochs):
        model.train()
        for user_ids, business_ids, reviews, stars in train_loader:
            user_ids, business_ids, reviews, stars = user_ids.to(device), business_ids.to(device), reviews.to(device), stars.to(device)
            optimizer.zero_grad()
            predictions = model(user_ids, business_ids, reviews)
            loss = criterion(predictions, stars)
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_predictions = []
        val_true_ratings = []
        with torch.no_grad():
            for user_ids, business_ids, reviews, stars in val_loader:
                user_ids, business_ids, reviews, stars = user_ids.to(device), business_ids.to(device), reviews.to(device), stars.to(device)
                predictions = model(user_ids, business_ids, reviews)
                val_predictions.extend(predictions.tolist())
                val_true_ratings.extend(stars.tolist())
        
        current_val_rmse = np.sqrt(mean_squared_error(val_true_ratings, val_predictions))
        
        if current_val_rmse < best_val_rmse - min_delta:
            best_val_rmse = current_val_rmse
            epochs_no_improve = 0
            torch.save(model.state_dict(), model_save_path)
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                break
    
    if os.path.exists(model_save_path):
        model.load_state_dict(torch.load(model_save_path))
    
    model.eval()
    test_predictions = []
    true_ratings = []
    with torch.no_grad():
        for user_ids, business_ids, reviews, stars in test_loader:
            user_ids, business_ids, reviews, stars = user_ids.to(device), business_ids.to(device), reviews.to(device), stars.to(device)
            predictions = model(user_ids, business_ids, reviews)
            test_predictions.extend(predictions.tolist())
            true_ratings.extend(stars.tolist())

    mse = mean_squared_error(true_ratings, test_predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(true_ratings, test_predictions)

    if os.path.exists(model_save_path):
        os.remove(model_save_path)

    return mse, rmse, mae

def main():
    # --- 1. 데이터 로드 및 전처리 ---
    try:
        df = pd.read_json('dataset_la.json', lines=True)
    except ValueError:
        df = pd.read_json('dataset_la.json')
    df_processed = df[['user_id', 'business_id', 'review_stars', 'text']].copy()
    user_encoder = LabelEncoder()
    business_encoder = LabelEncoder()
    df_processed.loc[:, 'user_encoded'] = user_encoder.fit_transform(df_processed['user_id'])
    df_processed.loc[:, 'business_encoded'] = business_encoder.fit_transform(df_processed['business_id'])
    num_users = len(user_encoder.classes_)
    num_businesses = len(business_encoder.classes_)
    all_texts = df_processed['text'].apply(preprocess_text).tolist()
    min_word_freq = 5
    vocab = Vocabulary(min_word_freq)
    vocab.build_vocabulary(all_texts)
    vocab_size = len(vocab.stoi)
    print(f"Vocabulary size: {vocab_size}")
    MAX_REVIEW_LEN = 100
    df_processed.loc[:, 'numericalized_text'] = df_processed['text'].apply(vocab.numericalize)
    df_processed['numericalized_text'] = df_processed['numericalized_text'].apply(
        lambda x: x[:MAX_REVIEW_LEN] if len(x) > MAX_REVIEW_LEN else x + [vocab.stoi["<PAD>"]] * (MAX_REVIEW_LEN - len(x))
    )

    # --- 2. 하이퍼파라미터 탐색 (Random Search) ---
    param_grid = {
        'embedding_dim': [32, 64, 128],
        'text_encoder_hidden_dim': [64, 128, 256],
        'learning_rate': [0.0005, 0.001, 0.002],
        'batch_size': [128, 256, 512],
        'user_item_mlp_dims': [[64, 32], [128, 64], [256, 128]],
        'final_mlp_dims': [[32, 16], [64, 32], [128, 64]],
        'dropout_rate': [0.1, 0.2, 0.3]
    }
    num_trials = 10
    best_params = None
    best_val_rmse = float('inf')
    
    print(f"\n--- Starting Hyperparameter Search with {num_trials} trials ---")
    train_val_df, test_df_hp = train_test_split(df_processed, test_size=0.2, random_state=42)
    train_df_hp, val_df_hp = train_test_split(train_val_df, test_size=1/8, random_state=42)

    for trial_num in range(num_trials):
        current_params = {k: random.choice(v) for k, v in param_grid.items()}
        print(f"\n--- Trial {trial_num + 1}/{num_trials} ---")
        print(f"Parameters: {current_params}")
        
        mse, rmse, mae = train_and_evaluate(current_params, train_df_hp, val_df_hp, test_df_hp,
                                            num_users, num_businesses, vocab_size, trial_num)
        
        print(f"  Trial {trial_num+1} Test RMSE: {rmse:.4f}")
        
        if rmse < best_val_rmse:
            best_val_rmse = rmse
            best_params = current_params
            print(f"  --> New best RMSE found: {best_val_rmse:.4f} with params: {best_params}")

    print(f"\n--- Hyperparameter Search Completed ---")
    print(f"Best Parameters found: {best_params}")

    # --- 3. 최적 파라미터로 모델 5회 반복 학습 및 평균 성능 계산 ---
    if best_params:
        all_rmse = []
        all_mae = []
        num_runs = 5
        
        print(f"\n--- Starting {num_runs} runs with Best Parameters ---")
        print(f"Best Parameters: {best_params}")
        
        for i in range(num_runs):
            current_random_state = 42 + i
            print(f"\n--- Run {i+1}/{num_runs} (Random State: {current_random_state}) ---")
            train_val_df, test_df = train_test_split(df_processed, test_size=0.2, random_state=current_random_state)
            train_df, val_df = train_test_split(train_val_df, test_size=1/8, random_state=current_random_state)
            mse, rmse, mae = train_and_evaluate(best_params, train_df, val_df, test_df,
                                                num_users, num_businesses, vocab_size, i)
            
            print(f"Run {i+1} Performance on Test Set:")
            print(f"  RMSE: {rmse:.4f}")
            print(f"  MAE: {mae:.4f}")

            all_rmse.append(rmse)
            all_mae.append(mae)

        print(f"\n--- Average Performance over {num_runs} Runs ---")
        print(f"Average RMSE: {np.mean(all_rmse):.4f} +/- {np.std(all_rmse):.4f}")
        print(f"Average MAE: {np.mean(all_mae):.4f} +/- {np.std(all_mae):.4f}")
        
if __name__ == "__main__":
    main()

Using device: cuda
Vocabulary size: 37302

--- Starting Hyperparameter Search with 10 trials ---

--- Trial 1/10 ---
Parameters: {'embedding_dim': 64, 'text_encoder_hidden_dim': 64, 'learning_rate': 0.001, 'batch_size': 256, 'user_item_mlp_dims': [256, 128], 'final_mlp_dims': [32, 16], 'dropout_rate': 0.3}
  Trial 1 Test RMSE: 0.9842
  --> New best RMSE found: 0.9842 with params: {'embedding_dim': 64, 'text_encoder_hidden_dim': 64, 'learning_rate': 0.001, 'batch_size': 256, 'user_item_mlp_dims': [256, 128], 'final_mlp_dims': [32, 16], 'dropout_rate': 0.3}

--- Trial 2/10 ---
Parameters: {'embedding_dim': 64, 'text_encoder_hidden_dim': 128, 'learning_rate': 0.001, 'batch_size': 512, 'user_item_mlp_dims': [128, 64], 'final_mlp_dims': [32, 16], 'dropout_rate': 0.2}
  Trial 2 Test RMSE: 0.9947

--- Trial 3/10 ---
Parameters: {'embedding_dim': 32, 'text_encoder_hidden_dim': 64, 'learning_rate': 0.001, 'batch_size': 128, 'user_item_mlp_dims': [256, 128], 'final_mlp_dims': [32, 16], 'dropout_