In [1]:
from google.colab import drive
drive.mount('/content/drive')
# ===============================================
# 1. 라이브러리 불러오기
# ===============================================
import numpy as np
import pandas as pd

# ===============================================
# 2. 기본 경로 설정 (드라이브 내 저장 위치)
# ===============================================
BASE_DIR = "/content/drive/MyDrive/2025Bigdata/"

# ===============================================
# 3. 영화 + 게임 임베딩 불러오기
# ===============================================
movie_embeddings = np.load(BASE_DIR + "movie_dataset/hybrid_movie_embeddings.npy")
game_embeddings = np.load(BASE_DIR + "game_dataset/game_plot_embeddings.npy")

print("Movie embeddings shape:", movie_embeddings.shape)
print("Game embeddings shape:", game_embeddings.shape)

# ===============================================
# 4. 메타 정보 불러오기 (GitHub raw 링크)
# ===============================================
movie_meta = pd.read_csv("https://raw.githubusercontent.com/cup0927/2025Bigdata/main/movie_dataset/movies.csv")
game_meta = pd.read_csv("https://raw.githubusercontent.com/cup0927/2025Bigdata/main/game_dataset/cleaned_games.csv") #rawg로도 실행해봐야 함

print("Movie meta shape:", movie_meta.shape)
print("Game meta shape:", game_meta.shape)

# ===============================================
# 5. 게임-영화 임베딩 결합
#    (단순 concatenate 예시)
# ===============================================
# 영화 임베딩 오른쪽에 게임 임베딩 차원만큼 0 패딩
padded_movie = np.pad(movie_embeddings, ((0,0),(0, game_embeddings.shape[1])), 'constant')

# 게임 임베딩 왼쪽에 영화 임베딩 차원만큼 0 패딩
padded_game = np.pad(game_embeddings, ((0,0),(movie_embeddings.shape[1],0)), 'constant')

# 수평 결합
hybrid_embeddings = np.vstack([padded_movie, padded_game])

print("Hybrid embeddings shape:", hybrid_embeddings.shape)

# ===============================================
# 6. 최종 파일 저장
# ===============================================
np.save(BASE_DIR + "hybrid_movie_game_embeddings.npy", hybrid_embeddings)
print("✅ Hybrid movie-game embeddings saved at:")
print(BASE_DIR + "hybrid_movie_game_embeddings.npy")

# ===============================================
# 7. 메타 정보도 저장
# ===============================================
movie_meta.to_csv(BASE_DIR + "movie_dataset/movie_meta.csv", index=False)
game_meta.to_csv(BASE_DIR + "game_dataset/game_meta.csv", index=False)
print("✅ Movie & Game meta saved.")

from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity

TARGET_DIM = 512  # PCA 후 공통 차원

# 1. 영화 벡터 PCA
pca_movie = PCA(n_components=TARGET_DIM, random_state=42)
movie_vecs_pca = pca_movie.fit_transform(movie_embeddings)

# 2. 게임 벡터 PCA
pca_game = PCA(n_components=TARGET_DIM, random_state=42)
game_vecs_pca = pca_game.fit_transform(game_embeddings)

# 이제 두 벡터의 shape가 모두 (N, 512)로 맞춰짐

# 첫 번째 영화
first_movie_vec = movie_vecs_pca[0]
first_movie_title = movie_meta.iloc[0]['title']
print("🎬 첫 번째 영화:", first_movie_title)

similarities = cosine_similarity([first_movie_vec], game_vecs_pca)[0]
top_idx = similarities.argsort()[::-1][:5]

print("\n🎮 첫 번째 영화와 가장 유사한 게임 Top 5:")
for i in top_idx:
    print(game_meta.iloc[i]['title'], f"(유사도: {similarities[i]:.4f})")


Mounted at /content/drive
Movie embeddings shape: (9742, 1723)
Game embeddings shape: (5000, 768)
Movie meta shape: (9742, 3)
Game meta shape: (5000, 13)
Hybrid embeddings shape: (14742, 2491)
✅ Hybrid movie-game embeddings saved at:
/content/drive/MyDrive/2025Bigdata/hybrid_movie_game_embeddings.npy
✅ Movie & Game meta saved.
🎬 첫 번째 영화: Toy Story (1995)

🎮 첫 번째 영화와 가장 유사한 게임 Top 5:
Ring Runner: Flight of the Sages (유사도: 0.6913)
Hover: Revolt Of Gamers (유사도: 0.6805)
Empyrion - Galactic Survival (유사도: 0.6777)
Heckabomb (유사도: 0.6712)
Demigod (유사도: 0.6679)
