# Transformer 기반 Rating 예측 모델

유저의 시청 이력 (영화 임베딩 + 평점)을 바탕으로 새로운 영화의 평점을 예측

## 모델 구조
```
Input:
  - History: [(emb_1, rating_1), (emb_2, rating_2), ..., (emb_t, rating_t)]
  - Query: emb_query (평점을 예측할 영화)
       ↓
  [emb + rating_embedding] for each history item
       ↓
  Transformer Encoder (시청 이력 인코딩)
       ↓
  Cross-Attention with Query embedding
       ↓
  MLP Head → Predicted Rating (1.0 ~ 5.0)
```

## 학습 방식
- Input: 유저의 과거 시청 이력 + 타겟 영화 임베딩
- Target: 타겟 영화에 대한 실제 평점
- Loss: MSE (Mean Squared Error)

In [1]:
import sys
import os

# MPS fallback 설정
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

sys.path.insert(0, "/Users/jisoo/projects/thesis/carte_test")

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import platform

from config import PROCESSED

# 한글 폰트
if platform.system() == 'Darwin':
    plt.rcParams['font.family'] = 'AppleGothic'
plt.rcParams['axes.unicode_minus'] = False

# Device
device = torch.device('mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

Device: mps


  from .autonotebook import tqdm as notebook_tqdm


## 1. 데이터 로드

In [2]:
# 평점 데이터 로드
ratings = pd.read_parquet(PROCESSED.RATINGS_PARQUET)
print(f"평점 수: {len(ratings):,}")
print(f"유저 수: {ratings['userId'].nunique():,}")
print(f"영화 수: {ratings['movieId'].nunique():,}")
print(f"평점 분포:")
print(ratings['rating'].value_counts().sort_index())

평점 수: 13,717,662
유저 수: 200,948
영화 수: 54,520
평점 분포:
rating
0.5     196137
1.0     388549
1.5     168463
2.0     763558
2.5     526958
3.0    2552185
3.5    1550226
4.0    3769455
4.5    1371038
5.0    2431093
Name: count, dtype: int64


In [3]:
# KG+BERT 임베딩 로드
EMB_PATH = PROCESSED.DIR / "ablation_embeddings" / "emb_kg_gnn_bert.parquet"

if not EMB_PATH.exists():
    print(f"Warning: {EMB_PATH} not found, trying kg_gnn...")
    EMB_PATH = PROCESSED.DIR / "ablation_embeddings" / "emb_kg_gnn.parquet"

emb_df = pd.read_parquet(EMB_PATH)
print(f"임베딩 영화 수: {len(emb_df):,}")

# 임베딩 행렬 생성
movie_ids = emb_df['movieId'].to_numpy()
embeddings = np.array(emb_df['embedding'].tolist(), dtype=np.float32)
emb_dim = embeddings.shape[1]
print(f"임베딩 차원: {emb_dim}")

# movieId → index 매핑
movie_to_idx = {mid: i for i, mid in enumerate(movie_ids)}
idx_to_movie = {i: mid for mid, i in movie_to_idx.items()}

# 정규화
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
embeddings_norm = embeddings / (norms + 1e-12)

임베딩 영화 수: 53,630
임베딩 차원: 300


## 2. 시퀀스 데이터 생성

In [4]:
# 임베딩이 있는 영화만 필터링
valid_movies = set(movie_ids)
ratings_filtered = ratings[ratings['movieId'].isin(valid_movies)].copy()
print(f"필터링 후 평점 수: {len(ratings_filtered):,}")

# ========================================
# 빠른 테스트를 위한 유저 샘플링
# ========================================
SAMPLE_USERS = 2000  # 빠른 테스트용

if SAMPLE_USERS is not None:
    sampled_user_ids = ratings_filtered['userId'].drop_duplicates().sample(n=SAMPLE_USERS, random_state=42)
    ratings_filtered = ratings_filtered[ratings_filtered['userId'].isin(sampled_user_ids)]
    print(f"샘플링 후 평점 수: {len(ratings_filtered):,} ({SAMPLE_USERS:,} 유저)")

# 유저별로 시청 시퀀스 생성 (timestamp 순서, rating 포함)
print("유저별 시퀀스 생성 중...")
user_sequences = (
    ratings_filtered
    .sort_values(['userId', 'timestamp'])
    .groupby('userId')
    .apply(lambda x: list(zip(x['movieId'].tolist(), x['rating'].tolist())))
    .to_dict()
)

# 시퀀스 길이 통계
seq_lengths = [len(seq) for seq in user_sequences.values()]
print(f"\n유저 수: {len(user_sequences):,}")
print(f"시퀀스 길이 - min: {min(seq_lengths)}, max: {max(seq_lengths)}, mean: {np.mean(seq_lengths):.1f}")

필터링 후 평점 수: 13,680,490
샘플링 후 평점 수: 137,588 (2,000 유저)
유저별 시퀀스 생성 중...

유저 수: 2,000
시퀀스 길이 - min: 19, max: 100, mean: 68.8


  .apply(lambda x: list(zip(x['movieId'].tolist(), x['rating'].tolist())))


In [5]:
# Train/Val/Test 분할
# 각 유저의 시청 이력에서 마지막 영화의 rating을 예측

MIN_SEQ_LEN = 10  # 최소 시퀀스 길이 (history로 사용할 최소 개수)
MAX_SEQ_LEN = 30  # 최대 시퀀스 길이
MAX_SAMPLES_PER_USER = 5  # 유저당 최대 샘플 수 (빠른 테스트용)

train_data = []
val_data = []
test_data = []

for user_id, seq in user_sequences.items():
    if len(seq) < MIN_SEQ_LEN + 1:
        continue
    
    # Test: 마지막 영화 rating 예측
    history = seq[:-1][-MAX_SEQ_LEN:]  # (movieId, rating) 튜플 리스트
    target_movie, target_rating = seq[-1]
    test_data.append((user_id, history, target_movie, target_rating))
    
    # Val: 마지막-1 영화 rating 예측
    if len(seq) >= MIN_SEQ_LEN + 2:
        history = seq[:-2][-MAX_SEQ_LEN:]
        target_movie, target_rating = seq[-2]
        val_data.append((user_id, history, target_movie, target_rating))
    
    # Train: 나머지 위치에서 샘플링
    train_positions = list(range(MIN_SEQ_LEN, len(seq) - 2))
    if len(train_positions) > MAX_SAMPLES_PER_USER:
        step = len(train_positions) // MAX_SAMPLES_PER_USER
        train_positions = train_positions[::step][:MAX_SAMPLES_PER_USER]
    
    for i in train_positions:
        history = seq[:i][-MAX_SEQ_LEN:]
        target_movie, target_rating = seq[i]
        train_data.append((user_id, history, target_movie, target_rating))

print(f"Train samples: {len(train_data):,}")
print(f"Val samples: {len(val_data):,}")
print(f"Test samples: {len(test_data):,}")

# Rating 분포 확인
train_ratings = [d[3] for d in train_data]
print(f"\nTrain rating 분포: mean={np.mean(train_ratings):.2f}, std={np.std(train_ratings):.2f}")

Train samples: 10,000
Val samples: 2,000
Test samples: 2,000

Train rating 분포: mean=3.70, std=1.05


## 3. Dataset & DataLoader

In [6]:
class RatingDataset(Dataset):
    def __init__(self, data, movie_to_idx, embeddings, max_len=30):
        """
        data: list of (user_id, history, target_movie_id, target_rating)
        history: list of (movieId, rating) tuples
        """
        self.data = data
        self.movie_to_idx = movie_to_idx
        self.embeddings = torch.from_numpy(embeddings)
        self.max_len = max_len
        self.emb_dim = embeddings.shape[1]
        
        # Rating 정규화 파라미터 (1-5 → 0-1)
        self.rating_min = 0.5
        self.rating_max = 5.0
    
    def normalize_rating(self, rating):
        """Rating을 0-1 범위로 정규화"""
        return (rating - self.rating_min) / (self.rating_max - self.rating_min)
    
    def denormalize_rating(self, rating):
        """정규화된 rating을 원래 범위로 복원"""
        return rating * (self.rating_max - self.rating_min) + self.rating_min
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        user_id, history, target_movie, target_rating = self.data[idx]
        
        # History 임베딩 및 rating
        hist_idx = [self.movie_to_idx[mid] for mid, _ in history]
        hist_ratings = [self.normalize_rating(r) for _, r in history]  # 정규화
        
        hist_emb = self.embeddings[hist_idx]  # (seq_len, emb_dim)
        hist_ratings = torch.tensor(hist_ratings, dtype=torch.float32)  # (seq_len,)
        
        # Target
        target_idx = self.movie_to_idx[target_movie]
        target_emb = self.embeddings[target_idx]  # (emb_dim,)
        target_rating_norm = self.normalize_rating(target_rating)  # 정규화
        target_rating_tensor = torch.tensor(target_rating_norm, dtype=torch.float32)
        
        # 패딩 (앞에서부터)
        seq_len = len(hist_idx)
        if seq_len < self.max_len:
            pad_len = self.max_len - seq_len
            hist_emb = torch.cat([torch.zeros(pad_len, self.emb_dim), hist_emb], dim=0)
            hist_ratings = torch.cat([torch.zeros(pad_len), hist_ratings], dim=0)
            mask = torch.cat([torch.zeros(pad_len), torch.ones(seq_len)])
        else:
            hist_emb = hist_emb[-self.max_len:]
            hist_ratings = hist_ratings[-self.max_len:]
            mask = torch.ones(self.max_len)
        
        return {
            'hist_emb': hist_emb,           # (max_len, emb_dim)
            'hist_ratings': hist_ratings,   # (max_len,) - 정규화됨
            'mask': mask,                   # (max_len,)
            'target_emb': target_emb,       # (emb_dim,)
            'target_rating': target_rating_tensor, # scalar - 정규화됨
            'target_rating_orig': torch.tensor(target_rating, dtype=torch.float32),  # 원본
        }

In [7]:
# 데이터셋 생성
BATCH_SIZE = 128

train_dataset = RatingDataset(train_data, movie_to_idx, embeddings_norm, MAX_SEQ_LEN)
val_dataset = RatingDataset(val_data, movie_to_idx, embeddings_norm, MAX_SEQ_LEN)
test_dataset = RatingDataset(test_data, movie_to_idx, embeddings_norm, MAX_SEQ_LEN)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

# 샘플 확인
sample = train_dataset[0]
print(f"\nSample shapes:")
for k, v in sample.items():
    print(f"  {k}: {v.shape if hasattr(v, 'shape') else v}")

Train batches: 79
Val batches: 16
Test batches: 16

Sample shapes:
  hist_emb: torch.Size([30, 300])
  hist_ratings: torch.Size([30])
  mask: torch.Size([30])
  target_emb: torch.Size([300])
  target_rating: torch.Size([])
  target_rating_orig: torch.Size([])


## 4. Rating Predictor 모델

In [8]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=100):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))
    
    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]

In [9]:
class RatingPredictor(nn.Module):
    """
    간단하고 효과적인 Rating 예측 모델
    
    핵심 아이디어:
    1. Target 영화와 History 영화들 간의 유사도 계산
    2. 유사도 기반 가중 평균 + 학습 가능한 bias
    3. 유저의 평균 rating 경향 반영
    """
    def __init__(self, emb_dim=300, hidden_dim=128, dropout=0.1):
        super().__init__()
        
        self.emb_dim = emb_dim
        
        # 유사도 계산을 위한 projection
        self.query_proj = nn.Linear(emb_dim, hidden_dim)
        self.key_proj = nn.Linear(emb_dim, hidden_dim)
        
        # Rating과 결합하는 layer
        self.rating_encoder = nn.Sequential(
            nn.Linear(1, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, hidden_dim),
        )
        
        # Value projection (embedding + rating 정보)
        self.value_proj = nn.Linear(emb_dim + hidden_dim, hidden_dim)
        
        # 최종 rating 예측
        self.output_head = nn.Sequential(
            nn.Linear(hidden_dim + hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, 1),
            nn.Sigmoid(),  # 0-1 범위 출력
        )
        
        # 유저 평균 rating 예측용 (bias term)
        self.user_bias = nn.Sequential(
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid(),
        )
        
        self.scale = hidden_dim ** 0.5
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, hist_emb, hist_ratings, mask, target_emb):
        """
        hist_emb: (batch, seq_len, emb_dim)
        hist_ratings: (batch, seq_len) - 정규화된 rating (0-1)
        mask: (batch, seq_len)
        target_emb: (batch, emb_dim)
        """
        batch_size, seq_len, _ = hist_emb.shape
        
        # Query: target 영화
        query = self.query_proj(target_emb)  # (batch, hidden_dim)
        
        # Key: history 영화들
        key = self.key_proj(hist_emb)  # (batch, seq_len, hidden_dim)
        
        # Attention score (유사도)
        attn_scores = torch.bmm(key, query.unsqueeze(-1)).squeeze(-1)  # (batch, seq_len)
        attn_scores = attn_scores / self.scale
        
        # Mask 적용
        attn_scores = attn_scores.masked_fill(mask == 0, float('-inf'))
        attn_weights = F.softmax(attn_scores, dim=-1)  # (batch, seq_len)
        attn_weights = self.dropout(attn_weights)
        
        # Rating 정보 인코딩
        rating_encoded = self.rating_encoder(hist_ratings.unsqueeze(-1))  # (batch, seq_len, hidden_dim)
        
        # Value: embedding + rating 정보
        value_input = torch.cat([hist_emb, rating_encoded], dim=-1)  # (batch, seq_len, emb_dim + hidden_dim)
        value = self.value_proj(value_input)  # (batch, seq_len, hidden_dim)
        
        # Attention-weighted sum
        context = torch.bmm(attn_weights.unsqueeze(1), value).squeeze(1)  # (batch, hidden_dim)
        
        # 유저 평균 rating (history ratings의 가중 평균)
        user_avg = torch.bmm(attn_weights.unsqueeze(1), rating_encoded).squeeze(1)  # (batch, hidden_dim)
        user_bias = self.user_bias(user_avg).squeeze(-1)  # (batch,)
        
        # 최종 예측: context + target query 결합
        combined = torch.cat([context, query], dim=-1)  # (batch, hidden_dim * 2)
        pred = self.output_head(combined).squeeze(-1)  # (batch,)
        
        # User bias와 결합 (가중 평균)
        final_pred = 0.7 * pred + 0.3 * user_bias
        
        return final_pred

In [10]:
# 모델 생성
model = RatingPredictor(
    emb_dim=emb_dim,
    hidden_dim=128,
    dropout=0.1,
).to(device)

print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(model)

Model parameters: 181,762
RatingPredictor(
  (query_proj): Linear(in_features=300, out_features=128, bias=True)
  (key_proj): Linear(in_features=300, out_features=128, bias=True)
  (rating_encoder): Sequential(
    (0): Linear(in_features=1, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=128, bias=True)
  )
  (value_proj): Linear(in_features=428, out_features=128, bias=True)
  (output_head): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Linear(in_features=64, out_features=1, bias=True)
    (6): Sigmoid()
  )
  (user_bias): Sequential(
    (0): Linear(in_features=128, out_features=1, bias=True)
    (1): Sigmoid()
  )
  (dropout): Dropout(p=0.1, inplace=False)
)


## 5. 학습

In [11]:
# Loss & Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)  # lr 증가
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-5)

# Rating 역정규화 함수
def denormalize_rating(rating, rating_min=0.5, rating_max=5.0):
    return rating * (rating_max - rating_min) + rating_min

In [12]:
def evaluate(model, loader, criterion, device):
    """RMSE, MAE 계산 (원본 rating 스케일)"""
    model.eval()
    
    total_loss = 0
    total_samples = 0
    
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        for batch in loader:
            hist_emb = batch['hist_emb'].to(device)
            hist_ratings = batch['hist_ratings'].to(device)
            mask = batch['mask'].to(device)
            target_emb = batch['target_emb'].to(device)
            target_rating = batch['target_rating'].to(device)  # 정규화된 값
            target_rating_orig = batch['target_rating_orig'].to(device)  # 원본 값
            
            pred = model(hist_emb, hist_ratings, mask, target_emb)  # 정규화된 예측
            
            loss = criterion(pred, target_rating)
            total_loss += loss.item() * len(target_rating)
            total_samples += len(target_rating)
            
            # 역정규화하여 원본 스케일로 변환
            pred_orig = denormalize_rating(pred.cpu().numpy())
            all_preds.extend(pred_orig)
            all_targets.extend(target_rating_orig.cpu().numpy())
    
    preds = np.array(all_preds)
    targets = np.array(all_targets)
    
    mse = np.mean((preds - targets) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(preds - targets))
    
    return {
        'MSE': mse, 
        'RMSE': rmse, 
        'MAE': mae, 
        'loss': total_loss / total_samples,  # 정규화된 loss
        'preds': all_preds, 
        'targets': all_targets
    }

In [None]:
# 학습 루프
N_EPOCHS = 30
best_rmse = float('inf')
history = {'train_loss': [], 'val_loss': [], 'train_rmse': [], 'val_rmse': [], 'val_mae': []}

for epoch in range(N_EPOCHS):
    # Train
    model.train()
    train_loss = 0
    train_preds = []
    train_targets = []
    
    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{N_EPOCHS}')
    for batch in pbar:
        hist_emb = batch['hist_emb'].to(device)
        hist_ratings = batch['hist_ratings'].to(device)
        mask = batch['mask'].to(device)
        target_emb = batch['target_emb'].to(device)
        target_rating = batch['target_rating'].to(device)  # 정규화된 값
        target_rating_orig = batch['target_rating_orig']  # 원본
        
        optimizer.zero_grad()
        
        pred = model(hist_emb, hist_ratings, mask, target_emb)
        loss = criterion(pred, target_rating)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        train_loss += loss.item()
        
        # 원본 스케일로 변환하여 저장
        pred_orig = denormalize_rating(pred.detach().cpu().numpy())
        train_preds.extend(pred_orig)
        train_targets.extend(target_rating_orig.numpy())
        
        pbar.set_postfix(loss=f'{loss.item():.4f}')
    
    train_loss /= len(train_loader)
    train_rmse = np.sqrt(np.mean((np.array(train_preds) - np.array(train_targets)) ** 2))
    scheduler.step()
    
    # Validation
    val_results = evaluate(model, val_loader, criterion, device)
    
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_results['loss'])
    history['train_rmse'].append(train_rmse)
    history['val_rmse'].append(val_results['RMSE'])
    history['val_mae'].append(val_results['MAE'])
    
    print(f"Epoch {epoch+1}: Train RMSE={train_rmse:.4f}, Val RMSE={val_results['RMSE']:.4f}, Val MAE={val_results['MAE']:.4f}")
    
    # Best model 저장
    if val_results['RMSE'] < best_rmse:
        best_rmse = val_results['RMSE']
        torch.save(model.state_dict(), PROCESSED.DIR / 'rating_predictor_best.pt')
        print(f"  -> Best model saved! (RMSE={best_rmse:.4f})")

Epoch 1/30:  32%|███▏      | 25/79 [00:01<00:02, 26.24it/s, loss=0.0625]

In [None]:
# 학습 곡선 시각화
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# 1. Loss (MSE)
axes[0].plot(range(1, len(history['train_loss'])+1), history['train_loss'], marker='o', label='Train', color='blue')
axes[0].plot(range(1, len(history['val_loss'])+1), history['val_loss'], marker='s', label='Val', color='red')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('MSE Loss')
axes[0].set_title('Training & Validation Loss (MSE)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# 2. RMSE
axes[1].plot(range(1, len(history['train_rmse'])+1), history['train_rmse'], marker='o', label='Train', color='blue')
axes[1].plot(range(1, len(history['val_rmse'])+1), history['val_rmse'], marker='s', label='Val', color='red')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('RMSE')
axes[1].set_title('Training & Validation RMSE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# 3. MAE
axes[2].plot(range(1, len(history['val_mae'])+1), history['val_mae'], marker='s', color='green')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('MAE')
axes[2].set_title('Validation MAE')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# 최종 결과 출력
print("\n[학습 결과 요약]")
print(f"  최종 Train RMSE: {history['train_rmse'][-1]:.4f}")
print(f"  최종 Val RMSE:   {history['val_rmse'][-1]:.4f}")
print(f"  최종 Val MAE:    {history['val_mae'][-1]:.4f}")
print(f"  Best Val RMSE:   {best_rmse:.4f}")

## 6. 테스트 평가

In [None]:
# Best model 로드
model.load_state_dict(torch.load(PROCESSED.DIR / 'rating_predictor_best.pt', weights_only=True))

# Test 평가
test_results = evaluate(model, test_loader, criterion, device)

print("\n" + "="*50)
print("Test Results")
print("="*50)
print(f"  RMSE: {test_results['RMSE']:.4f}")
print(f"  MAE:  {test_results['MAE']:.4f}")
print(f"  MSE:  {test_results['MSE']:.4f}")

In [None]:
# 예측 vs 실제 시각화
preds = np.array(test_results['preds'])
targets = np.array(test_results['targets'])

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# 1. Scatter plot
axes[0].scatter(targets, preds, alpha=0.3, s=10)
axes[0].plot([0.5, 5.5], [0.5, 5.5], 'r--', label='Perfect prediction')
axes[0].set_xlabel('Actual Rating')
axes[0].set_ylabel('Predicted Rating')
axes[0].set_title('Predicted vs Actual Ratings')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[0].set_xlim(0.5, 5.5)
axes[0].set_ylim(0.5, 5.5)

# 2. Error distribution
errors = preds - targets
axes[1].hist(errors, bins=50, edgecolor='black', alpha=0.7)
axes[1].axvline(x=0, color='r', linestyle='--', label='Zero error')
axes[1].set_xlabel('Prediction Error (Pred - Actual)')
axes[1].set_ylabel('Count')
axes[1].set_title(f'Error Distribution (mean={np.mean(errors):.3f}, std={np.std(errors):.3f})')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Baseline 비교

In [None]:
# Baseline 1: 유저 평균 rating
def evaluate_baseline_user_mean(test_data, train_data):
    """유저의 평균 평점을 예측값으로 사용"""
    # 유저별 평균 rating 계산 (train 데이터 기반)
    user_ratings = {}
    for user_id, history, _, _ in train_data:
        if user_id not in user_ratings:
            user_ratings[user_id] = []
        user_ratings[user_id].extend([r for _, r in history])
    
    user_mean = {uid: np.mean(ratings) for uid, ratings in user_ratings.items()}
    global_mean = np.mean([r for ratings in user_ratings.values() for r in ratings])
    
    preds = []
    targets = []
    for user_id, _, _, target_rating in test_data:
        pred = user_mean.get(user_id, global_mean)
        preds.append(pred)
        targets.append(target_rating)
    
    preds = np.array(preds)
    targets = np.array(targets)
    
    rmse = np.sqrt(np.mean((preds - targets) ** 2))
    mae = np.mean(np.abs(preds - targets))
    
    return {'RMSE': rmse, 'MAE': mae}

# Baseline 2: 전체 평균 rating
def evaluate_baseline_global_mean(test_data, train_data):
    """전체 평균 평점을 예측값으로 사용"""
    all_ratings = []
    for _, history, _, _ in train_data:
        all_ratings.extend([r for _, r in history])
    
    global_mean = np.mean(all_ratings)
    
    targets = [r for _, _, _, r in test_data]
    preds = [global_mean] * len(targets)
    
    preds = np.array(preds)
    targets = np.array(targets)
    
    rmse = np.sqrt(np.mean((preds - targets) ** 2))
    mae = np.mean(np.abs(preds - targets))
    
    return {'RMSE': rmse, 'MAE': mae}

# Baseline 3: 최근 rating 평균
def evaluate_baseline_recent_mean(test_data, k=5):
    """최근 k개 영화의 평균 평점을 예측값으로 사용"""
    preds = []
    targets = []
    
    for _, history, _, target_rating in test_data:
        recent_ratings = [r for _, r in history[-k:]]
        pred = np.mean(recent_ratings) if recent_ratings else 3.0
        preds.append(pred)
        targets.append(target_rating)
    
    preds = np.array(preds)
    targets = np.array(targets)
    
    rmse = np.sqrt(np.mean((preds - targets) ** 2))
    mae = np.mean(np.abs(preds - targets))
    
    return {'RMSE': rmse, 'MAE': mae}

In [None]:
# Baseline 평가
baseline_global = evaluate_baseline_global_mean(test_data, train_data)
baseline_user = evaluate_baseline_user_mean(test_data, train_data)
baseline_recent = evaluate_baseline_recent_mean(test_data, k=5)

print("\n" + "="*60)
print("Baseline vs Transformer 비교")
print("="*60)
print(f"{'Method':<25} {'RMSE':>12} {'MAE':>12}")
print("-"*60)
print(f"{'Global Mean':<25} {baseline_global['RMSE']:>12.4f} {baseline_global['MAE']:>12.4f}")
print(f"{'User Mean':<25} {baseline_user['RMSE']:>12.4f} {baseline_user['MAE']:>12.4f}")
print(f"{'Recent-5 Mean':<25} {baseline_recent['RMSE']:>12.4f} {baseline_recent['MAE']:>12.4f}")
print(f"{'Transformer (Ours)':<25} {test_results['RMSE']:>12.4f} {test_results['MAE']:>12.4f}")
print("="*60)

# 개선율
improvement = (baseline_user['RMSE'] - test_results['RMSE']) / baseline_user['RMSE'] * 100
print(f"\nUser Mean 대비 RMSE 개선율: {improvement:.1f}%")

In [None]:
# 비교 시각화
methods = ['Global Mean', 'User Mean', 'Recent-5', 'Transformer']
rmse_values = [baseline_global['RMSE'], baseline_user['RMSE'], baseline_recent['RMSE'], test_results['RMSE']]
mae_values = [baseline_global['MAE'], baseline_user['MAE'], baseline_recent['MAE'], test_results['MAE']]

fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(methods))
width = 0.35

bars1 = ax.bar(x - width/2, rmse_values, width, label='RMSE', color='steelblue')
bars2 = ax.bar(x + width/2, mae_values, width, label='MAE', color='coral')

# 값 표시
for bar in bars1:
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
            f'{bar.get_height():.3f}', ha='center', va='bottom', fontsize=10)
for bar in bars2:
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
            f'{bar.get_height():.3f}', ha='center', va='bottom', fontsize=10)

ax.set_ylabel('Error')
ax.set_title('Rating 예측 성능 비교')
ax.set_xticks(x)
ax.set_xticklabels(methods)
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## 8. 예측 예시

In [None]:
# 카탈로그 로드
catalog = pd.read_parquet(PROCESSED.MOVIE_CATALOG_PARQUET)
movie_titles = catalog.set_index('movieId')['original_title'].to_dict()

def get_movie_title(movie_id):
    return movie_titles.get(movie_id, f'Unknown ({movie_id})')

In [None]:
# 예측 예시
model.eval()

for i in [0, 100, 500]:
    if i >= len(test_data):
        continue
    
    user_id, history, target_movie, target_rating = test_data[i]
    
    # 모델 예측
    sample = test_dataset[i]
    with torch.no_grad():
        pred_norm = model(
            sample['hist_emb'].unsqueeze(0).to(device),
            sample['hist_ratings'].unsqueeze(0).to(device),
            sample['mask'].unsqueeze(0).to(device),
            sample['target_emb'].unsqueeze(0).to(device),
        ).item()
    
    # 역정규화
    pred = denormalize_rating(pred_norm)
    
    print("="*80)
    print(f"[유저 {user_id}]")
    print("\n최근 시청 이력 (최근 5개):")
    for mid, rating in history[-5:]:
        title = get_movie_title(mid)[:40]
        print(f"  - {title}: ⭐{rating}")
    
    print(f"\n타겟 영화: {get_movie_title(target_movie)}")
    print(f"  실제 평점: ⭐{target_rating}")
    print(f"  예측 평점: ⭐{pred:.2f}")
    print(f"  오차: {abs(pred - target_rating):.2f}")

## 9. 모델 저장

In [None]:
# 최종 모델 저장
save_path = PROCESSED.DIR / 'rating_predictor_final.pt'
torch.save({
    'model_state_dict': model.state_dict(),
    'config': {
        'emb_dim': emb_dim,
        'hidden_dim': 128,
        'max_len': MAX_SEQ_LEN,
    },
    'test_results': {
        'RMSE': test_results['RMSE'],
        'MAE': test_results['MAE'],
    },
    'history': history,
}, save_path)

print(f"모델 저장: {save_path}")
print(f"\n최종 Test 성능:")
print(f"  RMSE: {test_results['RMSE']:.4f}")
print(f"  MAE:  {test_results['MAE']:.4f}")