#### import

In [2]:
import json
import numpy as np
from collections import defaultdict
from sklearn.preprocessing import normalize
from sklearn.metrics import precision_score
from tqdm import tqdm

### JSONL 파일에서 리뷰 로드

In [3]:
TRAIN_FILE = "train_80.json"
TEST_FILE = "test_20.json"
TOP_N = 5


def load_reviews(path):
    data = []
    with open(path, encoding="utf-8") as f:
        for line in f:
            obj = json.loads(line)
            obj["vector"] = np.array(obj["sentiment_vector"])
            data.append(obj)
    return data


train_reviews = load_reviews(TRAIN_FILE)
test_reviews = load_reviews(TEST_FILE)

FileNotFoundError: [Errno 2] No such file or directory: 'train_80.json'

#### 훈련 데이터 → 벡터 평균 계산

In [3]:
train_reviews = load_reviews(TRAIN_FILE)

user_vecs = defaultdict(list)
biz_vecs = defaultdict(list)

for r in train_reviews:
    user_vecs[r["user_id"]].append(r["vector"])
    biz_vecs[r["business_id"]].append(r["vector"])

user_embed = {u: np.mean(vs, axis=0) for u, vs in user_vecs.items()}
biz_embed = {b: np.mean(vs, axis=0) for b, vs in biz_vecs.items()}

#### 정규화 후 유사도 계산

In [4]:
user_ids = list(user_embed.keys())
biz_ids = list(biz_embed.keys())

user_matrix = normalize(np.stack([user_embed[u] for u in user_ids]))
biz_matrix = normalize(np.stack([biz_embed[b] for b in biz_ids]))

scores = np.dot(user_matrix, biz_matrix.T)

#### 추천 리스트 생성

In [5]:
user2seen = defaultdict(set)
for r in train_reviews:
    user2seen[r["user_id"]].add(r["business_id"])

recommendations = {}

for i, uid in enumerate(user_ids):
    user_score = scores[i]
    ranked_idx = np.argsort(user_score)[::-1]

    recs = []
    for j in ranked_idx:
        bid = biz_ids[j]
        if bid not in user2seen[uid]:  # 이미 본 식당은 제외
            recs.append(bid)
        if len(recs) == TOP_N:
            break
    recommendations[uid] = recs

#### 평가: Ground Truth 생성

In [6]:
ground_truth = defaultdict(set)
for r in test_reviews:
    ground_truth[r["user_id"]].add(r["business_id"])

NameError: name 'test_reviews' is not defined

#### Precision@K 계산

In [None]:
common_users = set(recommendations.keys()) & set(ground_truth.keys())
precision_list = []

for uid in common_users:
    pred = set(recommendations[uid])
    actual = ground_truth[uid]
    hit = len(pred & actual)
    precision = hit / TOP_N
    precision_list.append(precision)

#### 결과 출력

In [None]:
print(f"📌 평가 대상 유저 수: {len(common_users)}")
print(f"🎯 Precision@{TOP_N}: {np.mean(precision_list):.4f}")