# Movie & Game Embedding 결합 및 Latent Transformation
1. 영화 & 게임 임베딩(.npy) 불러오기
2. TruncatedSVD로 차원 축소
3. MLP를 이용한 latent embedding 생성 (128차원)
4. 사용자 latent embedding placeholder 생성
5. 영화 + 게임 + 사용자 embedding 결합
6. 추천 시스템에서 바로 사용할 수 있는 구조 생성

* 현재 user latent는 랜덤 placeholder → 실제 NCF 학습 후 교체 필요

* embedding 수는 min_len 기준으로 맞춤 → 전체 영화/게임 수와 다를 수 있음

In [95]:
# ======================================================
# 0. 환경 준비
# ======================================================
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
import torch
import torch.nn as nn

# ======================================================
# 1. 영화 & 게임 임베딩 불러오기
# ======================================================
movie_path = '/content/drive/MyDrive/2025Bigdata/movie_dataset/hybrid_movie_embeddings.npy'
game_path  = '/content/drive/MyDrive/2025Bigdata/game_dataset/hybrid_game_embeddings_BERT_final.npy'
movies_csv = '/content/drive/MyDrive/2025Bigdata/movie_dataset/movies.csv'
games_csv  = '/content/drive/MyDrive/2025Bigdata/game_dataset/cleaned_games.csv'

movie_embeddings = np.load(movie_path)
game_embeddings  = np.load(game_path)

movie_df = pd.read_csv(movies_csv)
game_df  = pd.read_csv(games_csv)

movie_titles = movie_df['title'].tolist()
game_titles  = game_df['title'].tolist()

# 길이 맞추기
min_len = min(len(movie_embeddings), len(game_embeddings))
movie_embeddings = movie_embeddings[:min_len]
game_embeddings  = game_embeddings[:min_len]

# ======================================================
# 2. 차원 축소 (TruncatedSVD)
# ======================================================
target_dim = 512
svd_movie = TruncatedSVD(n_components=target_dim, random_state=42)
svd_game  = TruncatedSVD(n_components=target_dim, random_state=42)

movie_reduced = svd_movie.fit_transform(movie_embeddings)
game_reduced  = svd_game.fit_transform(game_embeddings)

# ======================================================
# 3. Weighted sum hybrid embedding (선택 사항)
# ======================================================
alpha = 0.5
hybrid_embeddings = alpha * movie_reduced + (1-alpha) * game_reduced

# ======================================================
# 4. MLP latent embedding 생성
# ======================================================
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleMLP(nn.Module):
    def __init__(self, input_dim, latent_dim=128):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, latent_dim)
        )
    def forward(self, x):
        return self.model(x)

mlp = SimpleMLP(input_dim=target_dim, latent_dim=128).to(DEVICE)
mlp.eval()

with torch.no_grad():
    movie_tensor = torch.tensor(movie_reduced, dtype=torch.float32).to(DEVICE)
    game_tensor  = torch.tensor(game_reduced, dtype=torch.float32).to(DEVICE)
    movie_latent = mlp(movie_tensor).cpu().numpy()
    game_latent  = mlp(game_tensor).cpu().numpy()

# ======================================================
# 5. User latent embedding 준비 (placeholder)
# ======================================================
num_users = 610
user_latent = np.random.rand(num_users, 128)  # 실제 NCF 학습 후 교체 가능

# ======================================================
# 6. 전체 embedding 결합
# ======================================================
latent_embeddings = np.vstack([movie_latent, game_latent])
final_embeddings = np.vstack([latent_embeddings, user_latent])

# item types & titles mapping
item_types = ['movie']*movie_latent.shape[0] + ['game']*game_latent.shape[0] + ['user']*user_latent.shape[0]
item_titles = movie_titles[:min_len] + game_titles[:min_len] + [f"user_{i}" for i in range(num_users)]

print("Final embeddings shape:", final_embeddings.shape)
print("Item types length:", len(item_types))
print("Item titles length:", len(item_titles))

# ======================================================
# 7. 추천용 similarity 예시 (user 0 기준 top 5 아이템)
# ======================================================
user_index = -1  # 예: 마지막 user
user_vec = final_embeddings[user_index].reshape(1, -1)
item_vecs = final_embeddings[:movie_latent.shape[0]+game_latent.shape[0]]

sim_scores = cosine_similarity(user_vec, item_vecs).flatten()
top_indices = sim_scores.argsort()[::-1][:5]

for idx in top_indices:
    print(item_titles[idx], item_types[idx], sim_scores[idx])

Final embeddings shape: (10610, 128)
Item types length: 10610
Item titles length: 10610
Bounce (2000) movie 0.06409394824613292
Dead Man (1995) movie 0.06374894218476801
Girl 6 (1996) movie 0.05706766935992481
Crash (1996) movie 0.05286406158202327
2010: The Year We Make Contact (1984) movie 0.05226071645795847
