<a href="https://colab.research.google.com/github/faid011/machine-learning/blob/main/movie_ver1_1_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 0. 기본 설정 & 라이브러리
import os
import json
import random
import re
from collections import Counter

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# 1. 데이터 로딩 & 기본 정리
DATA_PATH = "/content/drive/MyDrive/colab/kangnam/기계학습/movie_review/dataset/merged_reviews.json"  # 합친 json 파일 경로

with open(DATA_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

print("원본 리뷰 개수:", len(data))
df = pd.DataFrame(data)
print(df.head())

needed_cols = ["movie_id", "author", "review", "rating"]
missing = [c for c in needed_cols if c not in df.columns]
if missing:
    raise ValueError(f"필수 컬럼 없음: {missing}")

df = df[needed_cols].copy()
df["rating"] = pd.to_numeric(df["rating"], errors="coerce")
df = df.dropna(subset=["review", "author", "rating"])
print("정리 후 개수:", len(df))
print(df["rating"].describe())

# 2. author 1~15회 등장 필터링
author_counts = df["author"].value_counts()
valid_authors = author_counts[(author_counts >= 1) & (author_counts <= 15)].index
df_filtered = df[df["author"].isin(valid_authors)].copy()

print("필터링 전 리뷰 수:", len(df))
print("필터링 후 리뷰 수:", len(df_filtered))
print("필터링 후 고유 author 수:", df_filtered["author"].nunique())

# 3. 텍스트 전처리
def preprocess_text_basic(text: str) -> str:
    text = str(text)
    text = text.replace("\n", " ").replace("\t", " ")
    text = re.sub(r"\s+", " ", text).strip()
    return text

df_filtered["clean_review"] = df_filtered["review"].apply(preprocess_text_basic)
df_filtered = df_filtered[df_filtered["clean_review"] != ""]
print("전처리 후 리뷰 수:", len(df_filtered))
print(df_filtered[["review", "clean_review"]].head())

# 4. 평점 → 5단계 라벨 + 감정 pseudo-label
def rating_to_class(r):
    r = float(r)
    if r <= 2:
        return 0
    elif r <= 4:
        return 1
    elif r <= 6:
        return 2
    elif r <= 8:
        return 3
    else:
        return 4

df_filtered["rating_class"] = df_filtered["rating"].apply(rating_to_class)
print("rating_class 분포:")
print(df_filtered["rating_class"].value_counts().sort_index())

def rating_to_sentiment(r):
    r = float(r)
    if r <= 3:
        return 0
    elif r >= 8:
        return 1
    else:
        return -1

df_filtered["sentiment_label"] = df_filtered["rating"].apply(rating_to_sentiment)
print("sentiment_label 분포:")
print(df_filtered["sentiment_label"].value_counts())

class_to_rep_rating = {
    0: 1.5,
    1: 3.5,
    2: 5.5,
    3: 7.5,
    4: 9.5,
}
NUM_CLASSES = 5

# 5. vocab 생성 & 인코딩
MIN_FREQ = 3
PAD_TOKEN = "<pad>"
UNK_TOKEN = "<unk>"

counter = Counter()
for text in df_filtered["clean_review"]:
    counter.update(text.split())

word2id = {PAD_TOKEN: 0, UNK_TOKEN: 1}
for word, freq in counter.items():
    if freq >= MIN_FREQ:
        word2id[word] = len(word2id)

id2word = {i: w for w, i in word2id.items()}
vocab_size = len(word2id)
print("vocab_size:", vocab_size)

MAX_LEN = 80

def encode_sentence(text: str, word2id: dict, max_len: int = MAX_LEN):
    tokens = text.split()
    ids = [word2id.get(t, word2id[UNK_TOKEN]) for t in tokens]
    ids = ids[:max_len]
    if len(ids) < max_len:
        ids += [word2id[PAD_TOKEN]] * (max_len - len(ids))
    return ids

sample_text = df_filtered["clean_review"].iloc[0]
print("sample clean text:", sample_text)
print("encoded[:20]:", encode_sentence(sample_text, word2id)[:20])

# 6. train / valid / test 분할
train_df, test_df = train_test_split(
    df_filtered,
    test_size=0.2,
    random_state=SEED,
    stratify=df_filtered["rating_class"]
)
train_df, val_df = train_test_split(
    train_df,
    test_size=0.1,
    random_state=SEED,
    stratify=train_df["rating_class"]
)

print("train:", len(train_df), "valid:", len(val_df), "test:", len(test_df))
print("train class 분포:")
print(train_df["rating_class"].value_counts(normalize=True).sort_index())

# 7. Dataset 정의
class MultiTaskDataset(Dataset):
    def __init__(self, dataframe, word2id, max_len=80):
        self.texts = dataframe["clean_review"].values
        self.ratings = dataframe["rating"].values.astype("float32")
        self.classes = dataframe["rating_class"].values.astype("int64")
        self.sentiments = dataframe["sentiment_label"].values.astype("int64")
        self.word2id = word2id
        self.max_len = max_len

    def __len__(self):
        return len(self.classes)

    def __getitem__(self, idx):
        text = self.texts[idx]
        rating = self.ratings[idx]
        cls = self.classes[idx]
        sent = self.sentiments[idx]

        ids = encode_sentence(text, self.word2id, self.max_len)
        x = torch.tensor(ids, dtype=torch.long)
        y_cls = torch.tensor(cls, dtype=torch.long)
        y_rating = torch.tensor(rating, dtype=torch.float32)
        y_sent = torch.tensor(sent, dtype=torch.long)  # -1,0,1

        return x, y_cls, y_rating, y_sent

BATCH_SIZE = 64

train_dataset = MultiTaskDataset(train_df, word2id, MAX_LEN)
val_dataset   = MultiTaskDataset(val_df,   word2id, MAX_LEN)
test_dataset  = MultiTaskDataset(test_df,  word2id, MAX_LEN)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

print("dataset sizes:", len(train_dataset), len(val_dataset), len(test_dataset))

# 8. 멀티헤드 BiLSTM + Attention 모델
class MultiTaskRatingSentimentWithAttention(nn.Module):
    def __init__(
        self,
        vocab_size: int,
        embed_dim: int = 100,
        hidden_dim: int = 128,
        num_layers: int = 1,
        bidirectional: bool = True,
        dropout: float = 0.2,
        pad_idx: int = 0,
        attn_dim: int = 128,
        attn_temperature: float = 2.0,
        num_classes: int = 5,
    ):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)

        self.lstm = nn.LSTM(
            input_size=embed_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=bidirectional,
            dropout=dropout if num_layers > 1 else 0.0,
        )
        lstm_out_dim = hidden_dim * (2 if bidirectional else 1)

        self.attn_fc = nn.Linear(lstm_out_dim, attn_dim)
        self.attn_v = nn.Linear(attn_dim, 1, bias=False)

        self.dropout = nn.Dropout(dropout)

        self.fc_sentiment = nn.Linear(lstm_out_dim, 1)

        self.fc_rating = nn.Linear(lstm_out_dim + 1, num_classes)

        self.pad_idx = pad_idx
        self.attn_temperature = attn_temperature

    def forward(self, x):
        emb = self.embedding(x)
        emb = self.dropout(emb)

        outputs, _ = self.lstm(emb)

        attn_hidden = torch.tanh(self.attn_fc(outputs))
        scores = self.attn_v(attn_hidden).squeeze(-1)

        mask = (x == self.pad_idx)
        scores = scores.masked_fill(mask, -1e9)

        attn_weights = F.softmax(scores / self.attn_temperature, dim=1)

        context = torch.sum(outputs * attn_weights.unsqueeze(-1), dim=1)
        context = self.dropout(context)

        sent_logit = self.fc_sentiment(context).squeeze(-1)

        rating_input = torch.cat([context, sent_logit.unsqueeze(1)], dim=1)
        rating_logits = self.fc_rating(rating_input)

        return rating_logits, sent_logit, attn_weights

# 9. 지표 계산 함수
def calc_metrics_for_batch(rating_logits, y_cls, y_rating):
    preds_cls = torch.argmax(rating_logits, dim=1)

    acc = (preds_cls == y_cls).float().mean().item()

    f1 = f1_score(
        y_cls.cpu().numpy(),
        preds_cls.cpu().numpy(),
        average="macro"
    )

    rep = torch.tensor(
        [class_to_rep_rating[c.item()] for c in preds_cls],
        dtype=torch.float32,
        device=y_rating.device,
    )
    rmse = torch.sqrt(torch.mean((rep - y_rating) ** 2)).item()

    return acc, f1, rmse

# 10. 하이퍼파라미터 & 손실 설정 (최종 버전)
EMBED_DIM = 100
HIDDEN_DIM = 128
NUM_LAYERS = 1
BIDIRECTIONAL = True
DROPOUT = 0.2
LR = 3e-4
EPOCHS = 10
LAMBDA_SENT = 0.1

class_counts = train_df["rating_class"].value_counts().sort_index()
weights = class_counts.sum() / (NUM_CLASSES * class_counts)
class_weights = torch.tensor(weights.values, dtype=torch.float32, device=device)
print("class_weights:", class_weights)

model = MultiTaskRatingSentimentWithAttention(
    vocab_size=vocab_size,
    embed_dim=EMBED_DIM,
    hidden_dim=HIDDEN_DIM,
    num_layers=NUM_LAYERS,
    bidirectional=BIDIRECTIONAL,
    dropout=DROPOUT,
    pad_idx=word2id[PAD_TOKEN],
    attn_dim=128,
    attn_temperature=2.0,
    num_classes=NUM_CLASSES,
).to(device)

criterion_rating = nn.CrossEntropyLoss(weight=class_weights)
criterion_sentiment = nn.BCEWithLogitsLoss(reduction="none")
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

print(model)

# 11. train / eval (멀티태스크)
def train_one_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0.0
    total_samples = 0
    total_acc = 0.0
    total_f1 = 0.0
    total_rmse = 0.0

    total_sent_loss = 0.0
    total_sent_samples = 0
    total_sent_acc = 0.0

    for x, y_cls, y_rating, y_sent in dataloader:
        x = x.to(device)
        y_cls = y_cls.to(device)
        y_rating = y_rating.to(device)
        y_sent = y_sent.to(device)

        optimizer.zero_grad()
        rating_logits, sent_logit, _ = model(x)

        loss_rating = criterion_rating(rating_logits, y_cls)

        mask = (y_sent >= 0)
        if mask.any():
            y_sent_valid = y_sent[mask].float()
            sent_logit_valid = sent_logit[mask]

            loss_sent_all = criterion_sentiment(sent_logit_valid, y_sent_valid)
            loss_sent = loss_sent_all.mean()

            preds_sent = (torch.sigmoid(sent_logit_valid) >= 0.5).long()
            sent_acc = (preds_sent == y_sent_valid.long()).float().mean().item()

            total_sent_loss += loss_sent.item() * mask.sum().item()
            total_sent_acc += sent_acc * mask.sum().item()
            total_sent_samples += mask.sum().item()
        else:
            loss_sent = torch.tensor(0.0, device=device)

        loss = loss_rating + LAMBDA_SENT * loss_sent
        loss.backward()
        optimizer.step()

        batch_size = x.size(0)
        total_loss += loss.item() * batch_size
        acc, f1, rmse = calc_metrics_for_batch(rating_logits, y_cls, y_rating)
        total_acc += acc * batch_size
        total_f1 += f1 * batch_size
        total_rmse += rmse * batch_size
        total_samples += batch_size

    avg_loss = total_loss / total_samples
    avg_acc = total_acc / total_samples
    avg_f1 = total_f1 / total_samples
    avg_rmse = total_rmse / total_samples

    if total_sent_samples > 0:
        avg_sent_loss = total_sent_loss / total_sent_samples
        avg_sent_acc = total_sent_acc / total_sent_samples
    else:
        avg_sent_loss = 0.0
        avg_sent_acc = 0.0

    return avg_loss, avg_acc, avg_f1, avg_rmse, avg_sent_loss, avg_sent_acc


def evaluate(model, dataloader, device):
    model.eval()
    total_loss = 0.0
    total_samples = 0
    total_acc = 0.0
    total_f1 = 0.0
    total_rmse = 0.0

    total_sent_loss = 0.0
    total_sent_samples = 0
    total_sent_acc = 0.0

    with torch.no_grad():
        for x, y_cls, y_rating, y_sent in dataloader:
            x = x.to(device)
            y_cls = y_cls.to(device)
            y_rating = y_rating.to(device)
            y_sent = y_sent.to(device)

            rating_logits, sent_logit, _ = model(x)

            loss_rating = criterion_rating(rating_logits, y_cls)

            mask = (y_sent >= 0)
            if mask.any():
                y_sent_valid = y_sent[mask].float()
                sent_logit_valid = sent_logit[mask]

                loss_sent_all = criterion_sentiment(sent_logit_valid, y_sent_valid)
                loss_sent = loss_sent_all.mean()

                preds_sent = (torch.sigmoid(sent_logit_valid) >= 0.5).long()
                sent_acc = (preds_sent == y_sent_valid.long()).float().mean().item()

                total_sent_loss += loss_sent.item() * mask.sum().item()
                total_sent_acc += sent_acc * mask.sum().item()
                total_sent_samples += mask.sum().item()
            else:
                loss_sent = torch.tensor(0.0, device=device)

            loss = loss_rating + LAMBDA_SENT * loss_sent

            batch_size = x.size(0)
            total_loss += loss.item() * batch_size
            acc, f1, rmse = calc_metrics_for_batch(rating_logits, y_cls, y_rating)
            total_acc += acc * batch_size
            total_f1 += f1 * batch_size
            total_rmse += rmse * batch_size
            total_samples += batch_size

    avg_loss = total_loss / total_samples
    avg_acc = total_acc / total_samples
    avg_f1 = total_f1 / total_samples
    avg_rmse = total_rmse / total_samples

    if total_sent_samples > 0:
        avg_sent_loss = total_sent_loss / total_sent_samples
        avg_sent_acc = total_sent_acc / total_sent_samples
    else:
        avg_sent_loss = 0.0
        avg_sent_acc = 0.0

    return avg_loss, avg_acc, avg_f1, avg_rmse, avg_sent_loss, avg_sent_acc

# 12. 학습 실행 (최종 멀티태스크 모델)
best_val_f1 = None

for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc, train_f1, train_rmse, train_sent_loss, train_sent_acc = train_one_epoch(
        model, train_loader, optimizer, device
    )
    val_loss, val_acc, val_f1, val_rmse, val_sent_loss, val_sent_acc = evaluate(
        model, val_loader, device
    )

    print(f"\n=== Epoch {epoch}/{EPOCHS} ===")
    print(
        f"[Train] loss: {train_loss:.4f}, acc: {train_acc:.4f}, "
        f"F1: {train_f1:.4f}, RMSE: {train_rmse:.4f}, "
        f"sent_loss: {train_sent_loss:.4f}, sent_acc: {train_sent_acc:.4f}"
    )
    print(
        f"[Valid] loss: {val_loss:.4f}, acc: {val_acc:.4f}, "
        f"F1: {val_f1:.4f}, RMSE: {val_rmse:.4f}, "
        f"sent_loss: {val_sent_loss:.4f}, sent_acc: {val_sent_acc:.4f}"
    )

    if best_val_f1 is None or val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), "/content/best_final_multitask.pt")
        print("✔ best 최종 모델 저장 (F1 개선)")

# 13. Test 평가 (최종 모델)
best_model = MultiTaskRatingSentimentWithAttention(
    vocab_size=vocab_size,
    embed_dim=EMBED_DIM,
    hidden_dim=HIDDEN_DIM,
    num_layers=NUM_LAYERS,
    bidirectional=BIDIRECTIONAL,
    dropout=DROPOUT,
    pad_idx=word2id[PAD_TOKEN],
    attn_dim=128,
    attn_temperature=2.0,
    num_classes=NUM_CLASSES,
).to(device)

best_model.load_state_dict(torch.load("/content/best_final_multitask.pt", map_location=device))

test_loss, test_acc, test_f1, test_rmse, test_sent_loss, test_sent_acc = evaluate(
    best_model, test_loader, device
)
print("\n=== Test 결과 (최종 멀티태스크) ===")
print(
    f"loss: {test_loss:.4f}, acc: {test_acc:.4f}, "
    f"F1: {test_f1:.4f}, RMSE: {test_rmse:.4f}, "
    f"sent_loss: {test_sent_loss:.4f}, sent_acc: {test_sent_acc:.4f}"
)

# 14. Temperature Scaling (Calibration)
best_model.eval()
val_logits_list = []
val_labels_list = []

with torch.no_grad():
    for x, y_cls, _, _ in val_loader:
        x = x.to(device)
        y_cls = y_cls.to(device)

        rating_logits, _, _ = best_model(x)
        val_logits_list.append(rating_logits)
        val_labels_list.append(y_cls)

val_logits = torch.cat(val_logits_list, dim=0)
val_labels = torch.cat(val_labels_list, dim=0)

print("val_logits shape:", val_logits.shape)
print("val_labels shape:", val_labels.shape)

T = torch.nn.Parameter(torch.ones(1, device=device))
optimizer_T = torch.optim.LBFGS([T], lr=0.1, max_iter=50)

calib_criterion = nn.CrossEntropyLoss(weight=class_weights)

def eval_T():
    def closure():
        optimizer_T.zero_grad()
        scaled_logits = val_logits / T
        loss = calib_criterion(scaled_logits, val_labels)
        loss.backward()
        return loss
    return closure

optimizer_T.step(eval_T())
T_value = T.detach().item()
print(f"Learned Temperature T: {T_value:.4f}")

# 15. Calibration 전/후 평가 함수
def evaluate_with_temperature(model, dataloader, device, T_value=None):
    model.eval()
    total_loss = 0.0
    total_samples = 0
    total_acc = 0.0
    total_f1 = 0.0
    total_rmse = 0.0

    with torch.no_grad():
        for x, y_cls, y_rating, _ in dataloader:
            x = x.to(device)
            y_cls = y_cls.to(device)
            y_rating = y_rating.to(device)

            rating_logits, _, _ = model(x)
            if T_value is not None:
                rating_logits = rating_logits / T_value

            loss_rating = calib_criterion(rating_logits, y_cls)

            batch_size = x.size(0)
            total_loss += loss_rating.item() * batch_size

            acc, f1, rmse = calc_metrics_for_batch(rating_logits, y_cls, y_rating)
            total_acc += acc * batch_size
            total_f1 += f1 * batch_size
            total_rmse += rmse * batch_size
            total_samples += batch_size

    avg_loss = total_loss / total_samples
    avg_acc = total_acc / total_samples
    avg_f1 = total_f1 / total_samples
    avg_rmse = total_rmse / total_samples

    return avg_loss, avg_acc, avg_f1, avg_rmse

print("\n=== Calibration 전(Test) ===")
orig_loss, orig_acc, orig_f1, orig_rmse = evaluate_with_temperature(
    best_model, test_loader, device, T_value=None
)
print(
    f"loss: {orig_loss:.4f}, acc: {orig_acc:.4f}, "
    f"F1: {orig_f1:.4f}, RMSE: {orig_rmse:.4f}"
)

print("\n=== Calibration 후(Test, logits / T) ===")
calib_loss, calib_acc, calib_f1, calib_rmse = evaluate_with_temperature(
    best_model, test_loader, device, T_value=T_value
)
print(
    f"loss: {calib_loss:.4f}, acc: {calib_acc:.4f}, "
    f"F1: {calib_f1:.4f}, RMSE: {calib_rmse:.4f}"
)

# 16. 최종 예측 함수 (Calibration 적용)
def predict_review_final(text, model, word2id, max_len=80, T_value=1.0):
    model.eval()
    clean = preprocess_text_basic(text)
    ids = encode_sentence(clean, word2id, max_len)
    x = torch.tensor([ids], dtype=torch.long).to(device)

    with torch.no_grad():
        rating_logits, sent_logit, attn_weights = model(x)

    rating_logits = rating_logits / T_value

    probs_rating = F.softmax(rating_logits, dim=1)[0].cpu().numpy()
    pred_cls = int(np.argmax(probs_rating))
    pred_rating = class_to_rep_rating[pred_cls]

    sent_prob = torch.sigmoid(sent_logit)[0].item()

    tokens = clean.split()
    attn = attn_weights[0].cpu().numpy()

    return pred_cls, pred_rating, probs_rating, sent_prob, tokens, attn

# 17. 예시 리뷰 테스트
example = "송혜교가 처음으로 매우 싫어진 영화"

pred_cls, pred_rating, probs, sent_prob, tokens, attn = predict_review_final(
    example, best_model, word2id, MAX_LEN, T_value=T_value
)

print("\n=== 최종 모델 예시 예측 ===")
print("원문:", example)
print("예측 클래스:", pred_cls, "→ 대표 평점:", pred_rating)
print("클래스 확률:", np.round(probs, 3))
print(f"감정(긍정) 확률: {sent_prob:.3f}")

print("\n[Attention 상위 10 토큰]")
token_attn = list(zip(tokens, attn))
token_attn = sorted(token_attn, key=lambda x: x[1], reverse=True)
for t, a in token_attn[:10]:
    print(f"{t} ({a:.3f})", end=" | ")
print()

device: cuda
원본 리뷰 개수: 712404
  review_id movie_id    author  \
0  10275295    92575  terr****   
1  10273693    92575  qorh****   
2  10269103    92575  myun****   
3  10268563    92575  good****   
4  10257610    92575  thdr****   

                                              review rating      date  
0                                    종합 평점은 4점 드립니다.      4  15.08.26  
1  원작이 칭송받는 이유는 웹툰 계 자체의 질적 저하가 심각하기 때문.  원작이나 영화...      1  15.08.25  
2  나름의  감동도 있고 안타까운 마음에 가슴도 먹먹  배우들의 연기가 good 김수현...     10  15.08.23  
3   이런걸 돈주고 본 내자신이 후회스럽다 최악의 쓰레기 영화 김수현 밖에없는 저질 삼류영화      1  15.08.23  
4                          초반엔 코미디, 후반엔 액션, 결론은 코미디.      7  15.08.21  
정리 후 개수: 712404
count    712404.000000
mean          7.571844
std           3.013130
min           1.000000
25%           6.000000
50%           9.000000
75%          10.000000
max          10.000000
Name: rating, dtype: float64
필터링 전 리뷰 수: 712404
필터링 후 리뷰 수: 235059
필터링 후 고유 author 수: 90133
전처리 후 리뷰 수: 235049
                      

추천 모델

In [None]:
LOAD_PATH = "/content/drive/MyDrive/colab/kangnam/기계학습/movie_review/model/final_model.pt"

checkpoint = torch.load(LOAD_PATH, map_location=device)
print("checkpoint keys:", checkpoint.keys())

vocab_size = checkpoint["vocab_size"]
num_classes = len(checkpoint["class_to_rep_rating"])
MAX_LEN = checkpoint["max_len"]

EMBED_DIM = 100
HIDDEN_DIM = 128
NUM_LAYERS = 1

print("vocab_size:", vocab_size)
print("num_classes:", num_classes)
print("MAX_LEN:", MAX_LEN)

best_model = MultiTaskRatingSentimentWithAttention(
    vocab_size=vocab_size,
    embed_dim=EMBED_DIM,
    hidden_dim=HIDDEN_DIM,
    num_layers=NUM_LAYERS,
    num_classes=num_classes
).to(device)

best_model.load_state_dict(checkpoint["model_state_dict"])

best_model.eval()

word2id = checkpoint["word2id"]
id2word = checkpoint["id2word"]

print("✅ 모델 로드 완료")

checkpoint keys: dict_keys(['model_state_dict', 'word2id', 'id2word', 'vocab_size', 'max_len', 'class_to_rep_rating', 'T_value'])
vocab_size: 60741
num_classes: 5
MAX_LEN: 80
✅ 모델 로드 완료


In [None]:
import os
import json
import random
import re
from collections import Counter

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)


DATA_PATH = "/content/drive/MyDrive/colab/kangnam/기계학습/movie_review/dataset/merged_reviews.json"

with open(DATA_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

print("원본 리뷰 개수:", len(data))
df = pd.DataFrame(data)
print(df.head())

needed_cols = ["movie_id", "author", "review", "rating"]
missing = [c for c in needed_cols if c not in df.columns]
if missing:
    raise ValueError(f"필수 컬럼 없음: {missing}")

df = df[needed_cols].copy()
df["rating"] = pd.to_numeric(df["rating"], errors="coerce")
df = df.dropna(subset=["review", "author", "rating"])
print("정리 후 개수:", len(df))
print(df["rating"].describe())


author_counts = df["author"].value_counts()
valid_authors = author_counts[(author_counts >= 1) & (author_counts <= 15)].index
df_filtered = df[df["author"].isin(valid_authors)].copy()

print("필터링 전 리뷰 수:", len(df))
print("필터링 후 리뷰 수:", len(df_filtered))
print("필터링 후 고유 author 수:", df_filtered["author"].nunique())


def preprocess_text_basic(text: str) -> str:
    text = str(text)
    text = text.replace("\n", " ").replace("\t", " ")
    text = re.sub(r"\s+", " ", text).strip()
    return text

df_filtered["clean_review"] = df_filtered["review"].apply(preprocess_text_basic)
df_filtered = df_filtered[df_filtered["clean_review"] != ""]
print("전처리 후 리뷰 수:", len(df_filtered))
print(df_filtered[["review", "clean_review"]].head())


def rating_to_class(r):
    r = float(r)
    if r <= 2:
        return 0
    elif r <= 4:
        return 1
    elif r <= 6:
        return 2
    elif r <= 8:
        return 3
    else:
        return 4

df_filtered["rating_class"] = df_filtered["rating"].apply(rating_to_class)
print(df_filtered["rating_class"].value_counts().sort_index())


def rating_to_sentiment(r):
    r = float(r)
    if r <= 3:
        return 0
    elif r >= 8:
        return 1
    else:
        return -1

df_filtered["sentiment_label"] = df_filtered["rating"].apply(rating_to_sentiment)
print(df_filtered["sentiment_label"].value_counts())


class_to_rep_rating = {0: 1.5, 1: 3.5, 2: 5.5, 3: 7.5, 4: 9.5}
NUM_CLASSES = 5


MIN_FREQ = 3
PAD_TOKEN = "<pad>"
UNK_TOKEN = "<unk>"

counter = Counter()
for text in df_filtered["clean_review"]:
    counter.update(text.split())

word2id = {PAD_TOKEN: 0, UNK_TOKEN: 1}
for word, freq in counter.items():
    if freq >= MIN_FREQ:
        word2id[word] = len(word2id)

id2word = {i: w for w, i in word2id.items()}
vocab_size = len(word2id)
print("vocab_size:", vocab_size)

MAX_LEN = 80

def encode_sentence(text: str, word2id: dict, max_len: int = MAX_LEN):
    tokens = text.split()
    ids = [word2id.get(t, word2id[UNK_TOKEN]) for t in tokens]
    ids = ids[:max_len]
    if len(ids) < max_len:
        ids += [word2id[PAD_TOKEN]] * (max_len - len(ids))
    return ids


train_df, test_df = train_test_split(
    df_filtered, test_size=0.2, random_state=SEED, stratify=df_filtered["rating_class"]
)
train_df, val_df = train_test_split(
    train_df, test_size=0.1, random_state=SEED, stratify=train_df["rating_class"]
)

print("train:", len(train_df), "valid:", len(val_df), "test:", len(test_df))


class MultiTaskDataset(Dataset):
    def __init__(self, dataframe, word2id, max_len=80):
        self.texts = dataframe["clean_review"].values
        self.ratings = dataframe["rating"].values.astype("float32")
        self.classes = dataframe["rating_class"].values.astype("int64")
        self.sentiments = dataframe["sentiment_label"].values.astype("int64")
        self.word2id = word2id
        self.max_len = max_len

    def __len__(self):
        return len(self.classes)

    def __getitem__(self, idx):
        text = self.texts[idx]
        rating = self.ratings[idx]
        cls = self.classes[idx]
        sent = self.sentiments[idx]

        ids = encode_sentence(text, self.word2id, self.max_len)
        x = torch.tensor(ids, dtype=torch.long)
        y_cls = torch.tensor(cls, dtype=torch.long)
        y_rating = torch.tensor(rating, dtype=torch.float32)
        y_sent = torch.tensor(sent, dtype=torch.long)

        return x, y_cls, y_rating, y_sent


BATCH_SIZE = 64

train_loader = DataLoader(MultiTaskDataset(train_df, word2id, MAX_LEN), batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(MultiTaskDataset(val_df, word2id, MAX_LEN), batch_size=BATCH_SIZE)
test_loader  = DataLoader(MultiTaskDataset(test_df, word2id, MAX_LEN), batch_size=BATCH_SIZE)


class MultiTaskRatingSentimentWithAttention(nn.Module):
    def __init__(
        self,
        vocab_size,
        embed_dim=100,
        hidden_dim=128,
        num_layers=1,
        bidirectional=True,
        dropout=0.2,
        pad_idx=0,
        attn_dim=128,
        attn_temperature=2.0,
        num_classes=5,
    ):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)

        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers,
                            batch_first=True, bidirectional=bidirectional,
                            dropout=dropout if num_layers > 1 else 0.0)

        lstm_out_dim = hidden_dim * (2 if bidirectional else 1)

        self.attn_fc = nn.Linear(lstm_out_dim, attn_dim)
        self.attn_v = nn.Linear(attn_dim, 1, bias=False)

        self.dropout = nn.Dropout(dropout)
        self.fc_sentiment = nn.Linear(lstm_out_dim, 1)
        self.fc_rating = nn.Linear(lstm_out_dim + 1, num_classes)

        self.pad_idx = pad_idx
        self.attn_temperature = attn_temperature

    def forward(self, x):
        emb = self.dropout(self.embedding(x))
        outputs, _ = self.lstm(emb)

        attn_hidden = torch.tanh(self.attn_fc(outputs))
        scores = self.attn_v(attn_hidden).squeeze(-1)

        mask = (x == self.pad_idx)
        scores = scores.masked_fill(mask, -1e9)
        attn_weights = F.softmax(scores / self.attn_temperature, dim=1)

        context = torch.sum(outputs * attn_weights.unsqueeze(-1), dim=1)
        context = self.dropout(context)

        sent_logit = self.fc_sentiment(context).squeeze(-1)
        rating_input = torch.cat([context, sent_logit.unsqueeze(1)], dim=1)
        rating_logits = self.fc_rating(rating_input)

        return rating_logits, sent_logit, attn_weights


class_counts = train_df["rating_class"].value_counts().sort_index()
weights = class_counts.sum() / (NUM_CLASSES * class_counts)
class_weights = torch.tensor(weights.values, dtype=torch.float32, device=device)


model = MultiTaskRatingSentimentWithAttention(
    vocab_size=vocab_size,
    embed_dim=100,
    hidden_dim=128,
    bidirectional=True,
    dropout=0.2,
    pad_idx=word2id[PAD_TOKEN],
).to(device)

criterion_rating = nn.CrossEntropyLoss(weight=class_weights)
criterion_sentiment = nn.BCEWithLogitsLoss(reduction="none")
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)


def calc_metrics_for_batch(rating_logits, y_cls, y_rating):
    preds = torch.argmax(rating_logits, dim=1)
    acc = (preds == y_cls).float().mean().item()
    f1 = f1_score(y_cls.cpu(), preds.cpu(), average="macro")
    rep = torch.tensor([class_to_rep_rating[c.item()] for c in preds], device=y_rating.device)
    rmse = torch.sqrt(torch.mean((rep - y_rating) ** 2)).item()
    return acc, f1, rmse


def train_one_epoch(model, loader):
    model.train()
    tot_loss = tot_acc = tot_f1 = tot_rmse = 0
    s_loss = s_acc = s_cnt = 0

    for x, y_cls, y_rating, y_sent in loader:
        x, y_cls, y_rating, y_sent = x.to(device), y_cls.to(device), y_rating.to(device), y_sent.to(device)
        optimizer.zero_grad()
        rating_logits, sent_logit, _ = model(x)

        loss_rating = criterion_rating(rating_logits, y_cls)

        mask = y_sent >= 0
        if mask.any():
            loss_sent = criterion_sentiment(sent_logit[mask], y_sent[mask].float()).mean()
            preds = (torch.sigmoid(sent_logit[mask]) >= 0.5).long()
            s_acc += (preds == y_sent[mask]).float().sum().item()
            s_loss += loss_sent.item() * mask.sum().item()
            s_cnt += mask.sum().item()
        else:
            loss_sent = torch.tensor(0.0, device=device)

        loss = loss_rating + 0.1 * loss_sent
        loss.backward()
        optimizer.step()

        acc, f1, rmse = calc_metrics_for_batch(rating_logits, y_cls, y_rating)
        tot_loss += loss.item() * len(x)
        tot_acc += acc * len(x)
        tot_f1 += f1 * len(x)
        tot_rmse += rmse * len(x)

    return tot_loss / len(loader.dataset), tot_acc / len(loader.dataset), tot_f1 / len(loader.dataset), tot_rmse / len(loader.dataset), s_loss / max(s_cnt, 1), s_acc / max(s_cnt, 1)


def evaluate(model, loader):
    model.eval()
    tot_loss = tot_acc = tot_f1 = tot_rmse = 0
    s_loss = s_acc = s_cnt = 0

    with torch.no_grad():
        for x, y_cls, y_rating, y_sent in loader:
            x, y_cls, y_rating, y_sent = x.to(device), y_cls.to(device), y_rating.to(device), y_sent.to(device)
            rating_logits, sent_logit, _ = model(x)
            loss_rating = criterion_rating(rating_logits, y_cls)

            mask = y_sent >= 0
            if mask.any():
                loss_sent = criterion_sentiment(sent_logit[mask], y_sent[mask].float()).mean()
                preds = (torch.sigmoid(sent_logit[mask]) >= 0.5).long()
                s_acc += (preds == y_sent[mask]).float().sum().item()
                s_loss += loss_sent.item() * mask.sum().item()
                s_cnt += mask.sum().item()
            else:
                loss_sent = torch.tensor(0.0, device=device)

            loss = loss_rating + 0.1 * loss_sent
            acc, f1, rmse = calc_metrics_for_batch(rating_logits, y_cls, y_rating)
            tot_loss += loss.item() * len(x)
            tot_acc += acc * len(x)
            tot_f1 += f1 * len(x)
            tot_rmse += rmse * len(x)

    return tot_loss / len(loader.dataset), tot_acc / len(loader.dataset), tot_f1 / len(loader.dataset), tot_rmse / len(loader.dataset), s_loss / max(s_cnt, 1), s_acc / max(s_cnt, 1)


best_f1 = None
for epoch in range(1, 11):
    tr = train_one_epoch(model, train_loader)
    va = evaluate(model, val_loader)
    print(f"\nEpoch {epoch}")
    print("Train:", tr)
    print("Valid:", va)

    if best_f1 is None or va[2] > best_f1:
        best_f1 = va[2]
        torch.save(model.state_dict(), "/content/best_final_multitask.pt")
        print("✔ best model saved")


best_model = MultiTaskRatingSentimentWithAttention(vocab_size=vocab_size, pad_idx=word2id[PAD_TOKEN]).to(device)
best_model.load_state_dict(torch.load("/content/best_final_multitask.pt", map_location=device))
best_model.eval()

test_metrics = evaluate(best_model, test_loader)
print("\nTest:", test_metrics)

device: cuda
✅ encode_for_recommendation 메서드 추가 완료


In [None]:
def preprocess_text_basic(text: str) -> str:
    text = str(text)
    text = text.replace("\n", " ").replace("\t", " ")
    text = re.sub(r"\s+", " ", text).strip()
    return text

PAD_ID = word2id.get("<pad>", 0)
UNK_ID = word2id.get("<unk>", 0)

def encode_sentence(text, word2id, max_len=MAX_LEN):
    clean = preprocess_text_basic(text)
    tokens = clean.split()

    if len(tokens) == 0:
        tokens = ["<pad>"]

    length = min(len(tokens), max_len)

    ids = []
    for tok in tokens[:max_len]:
        ids.append(word2id.get(tok, UNK_ID))
    if len(ids) < max_len:
        ids += [PAD_ID] * (max_len - len(ids))

    return ids, length

In [None]:
def get_review_embedding(text, model, word2id, max_len=MAX_LEN):
    model.eval()
    ids, length = encode_sentence(text, word2id, max_len)
    input_ids = torch.tensor([ids], dtype=torch.long, device=device)
    lengths = torch.tensor([length], dtype=torch.long, device=device)

    with torch.no_grad():
        vec = model.encode_for_recommendation(input_ids, lengths)

    return vec.squeeze(0).cpu().numpy()

In [None]:
df_all = pd.read_json("/content/drive/MyDrive/colab/kangnam/기계학습/movie_review/dataset/merged_reviews.json")

df_all["rating"] = pd.to_numeric(df_all["rating"], errors="coerce")
df_all = df_all.dropna(subset=["review", "author", "rating"]).reset_index(drop=True)

df_all["clean_review"] = df_all["review"].apply(preprocess_text_basic)

print(df_all.head())
print(df_all.columns)

   review_id  movie_id    author  \
0   10275295     92575  terr****   
1   10273693     92575  qorh****   
2   10269103     92575  myun****   
3   10268563     92575  good****   
4   10257610     92575  thdr****   

                                              review  rating       date  \
0                                    종합 평점은 4점 드립니다.       4 2026-08-15   
1  원작이 칭송받는 이유는 웹툰 계 자체의 질적 저하가 심각하기 때문.  원작이나 영화...       1 2025-08-15   
2  나름의  감동도 있고 안타까운 마음에 가슴도 먹먹  배우들의 연기가 good 김수현...      10 2023-08-15   
3   이런걸 돈주고 본 내자신이 후회스럽다 최악의 쓰레기 영화 김수현 밖에없는 저질 삼류영화       1 2023-08-15   
4                          초반엔 코미디, 후반엔 액션, 결론은 코미디.       7 2021-08-15   

                                        clean_review  
0                                    종합 평점은 4점 드립니다.  
1  원작이 칭송받는 이유는 웹툰 계 자체의 질적 저하가 심각하기 때문. 원작이나 영화나...  
2   나름의 감동도 있고 안타까운 마음에 가슴도 먹먹 배우들의 연기가 good 김수현 최고~  
3   이런걸 돈주고 본 내자신이 후회스럽다 최악의 쓰레기 영화 김수현 밖에없는 저질 삼류영화  
4                          초반엔 코미디, 후반엔 액션, 결론은 코미디.  
In

In [None]:
movie_ids = df_all["movie_id"].unique()
movie_emb_dict = {}

for mid in tqdm(movie_ids):
    sub = df_all[df_all["movie_id"] == mid]
    embs = []
    for txt in sub["review"]:
        embs.append(get_review_embedding(txt, best_model, word2id, MAX_LEN))
    if len(embs) == 0:
        continue
    movie_emb = np.mean(embs, axis=0)
    movie_emb_dict[mid] = movie_emb

print("임베딩이 만들어진 영화 수:", len(movie_emb_dict))

100%|██████████| 14730/14730 [15:05<00:00, 16.26it/s]

임베딩이 만들어진 영화 수: 14730





In [None]:
def build_author_profile_embedding(author_id, df_all, model, word2id,
                                   min_rating_pos=8.0, max_len=MAX_LEN):
    sub = df_all[(df_all["author"] == author_id) & (df_all["rating"] >= min_rating_pos)]
    if len(sub) == 0:
        return None

    embs = []
    for txt in sub["review"]:
        embs.append(get_review_embedding(txt, model, word2id, max_len))
    if len(embs) == 0:
        return None

    return np.mean(embs, axis=0)

In [None]:
def recommend_with_model_embedding(
    author_id,
    df_all,
    model,
    word2id,
    movie_emb_dict,
    top_k=10,
    min_rating_for_profile=8.0,
    min_mean_rating_candidate=7.0,
    max_len=MAX_LEN
):
    author_vec = build_author_profile_embedding(
        author_id, df_all, model, word2id,
        min_rating_pos=min_rating_for_profile,
        max_len=max_len
    )
    if author_vec is None or norm(author_vec) == 0:
        print(f"{author_id} : 프로필 생성 불가 (고평점 리뷰 부족)")
        return None

    seen = set(df_all[df_all["author"] == author_id]["movie_id"].unique())

    movie_mean = df_all.groupby("movie_id")["rating"].mean().to_dict()

    candidates = []
    for mid, memb in movie_emb_dict.items():
        if mid in seen:
            continue

        mean_r = movie_mean.get(mid, None)
        if mean_r is None or mean_r < min_mean_rating_candidate:
            continue

        denom = (norm(author_vec) * norm(memb) + 1e-8)
        if denom == 0:
            continue

        sim = float(np.dot(author_vec, memb) / denom)
        candidates.append((mid, sim, mean_r))

    if not candidates:
        print("추천 후보가 없습니다. 필터 조건을 완화해보세요.")
        return None

    candidates.sort(key=lambda x: x[1], reverse=True)
    top = candidates[:top_k]

    print(f"author(추천대상): {author_id}")
    print("movie_id(추천영화):")
    for rank, (mid, sim, mean_r) in enumerate(top, start=1):
        print(f"{rank}. {mid} ({sim*100:.1f}%)(유사도) / 평균 평점: {mean_r:.2f}")

    rec_df = pd.DataFrame(
        [{
            "rank": i+1,
            "movie_id": mid,
            "similarity": sim,
            "similarity_%": sim * 100,
            "mean_rating": mean_r
        } for i, (mid, sim, mean_r) in enumerate(top)]
    )

    return rec_df

In [None]:
target_author = "beth****"

rec_df_model = recommend_with_model_embedding(
    target_author,
    df_all,
    best_model,
    word2id,
    movie_emb_dict,
    top_k=10,
    min_rating_for_profile=8.0,
    min_mean_rating_candidate=7.0,
    max_len=MAX_LEN
)

rec_df_model

author(추천대상): beth****
movie_id(추천영화):
1. 87896 (92.0%)(유사도) / 평균 평점: 9.19
2. 130847 (91.9%)(유사도) / 평균 평점: 8.58
3. 94507 (91.8%)(유사도) / 평균 평점: 9.24
4. 84842 (91.7%)(유사도) / 평균 평점: 8.94
5. 127321 (91.7%)(유사도) / 평균 평점: 8.90
6. 19505 (91.6%)(유사도) / 평균 평점: 8.66
7. 124805 (91.5%)(유사도) / 평균 평점: 8.78
8. 17233 (91.5%)(유사도) / 평균 평점: 8.90
9. 31323 (91.5%)(유사도) / 평균 평점: 8.93
10. 95767 (91.4%)(유사도) / 평균 평점: 9.31


Unnamed: 0,rank,movie_id,similarity,similarity_%,mean_rating
0,1,87896,0.919566,91.956586,9.19
1,2,130847,0.919271,91.927075,8.580645
2,3,94507,0.917995,91.799462,9.24
3,4,84842,0.9169,91.689962,8.938272
4,5,127321,0.916556,91.655594,8.9
5,6,19505,0.916395,91.639531,8.66
6,7,124805,0.91544,91.544008,8.784091
7,8,17233,0.914896,91.489625,8.9
8,9,31323,0.914614,91.461414,8.93
9,10,95767,0.91429,91.428959,9.31


In [None]:
target_author = "beth****"

author_pos = df_all[(df_all["author"] == target_author) & (df_all["rating"] >= 9.0)]
print("beth****의 고평점 영화 예시")
display(author_pos[["movie_id", "rating", "review"]].head(10))

print("\n추천 결과")
display(rec_df_model)

beth****의 고평점 영화 예시


Unnamed: 0,movie_id,rating,review
16813,50216,10,명작이다.
19511,18487,9,한 여자의 서글픈 인생유전은 시대의 초상 같네요.
28411,127247,10,희망 없는 사회속 약자들의 절망
151801,98728,9,나도 사랑하고 싶어졌어요~~~ ^^
188699,98125,10,박시후의 또다른 매력을 느낀 드라마.. 나는 살인범이다 에서의 역할과 너무 대조적이...
398043,51049,9,"잔잔한 내용, 임팩트도 없지만 시간 가는 줄 모르게 푹 빠져서 봤고, 여운이 기네요..."
451782,51855,9,사랑하고 싶어지죠 ^^
549204,97466,9,억지 해피앤딩이 아닌... 과거에서 현실에 돌아와 삶을 받아들이고 새로운 출발을 ...
657036,50370,9,시네마천국의 그리스판. 이런영화 넘좋아.. 훈훈해지는 이런영화..



추천 결과


Unnamed: 0,rank,movie_id,similarity,similarity_%,mean_rating
0,1,87896,0.919566,91.956586,9.19
1,2,130847,0.919271,91.927075,8.580645
2,3,94507,0.917995,91.799462,9.24
3,4,84842,0.9169,91.689962,8.938272
4,5,127321,0.916556,91.655594,8.9
5,6,19505,0.916395,91.639531,8.66
6,7,124805,0.91544,91.544008,8.784091
7,8,17233,0.914896,91.489625,8.9
8,9,31323,0.914614,91.461414,8.93
9,10,95767,0.91429,91.428959,9.31


In [None]:
def evaluate_for_author_single_hit(
    author_id,
    df_all,
    model,
    word2id,
    movie_emb_dict,
    min_rating_pos=8.0,
    min_mean_rating_candidate=7.0,
    top_k=10,
    max_len=MAX_LEN
):
    sub_pos = df_all[(df_all["author"] == author_id) & (df_all["rating"] >= min_rating_pos)]
    if len(sub_pos) < 2:
        return None

    holdout = sub_pos.sample(1, random_state=42)
    target_movie = int(holdout["movie_id"].iloc[0])

    train_pos = sub_pos[sub_pos["movie_id"] != target_movie]
    if len(train_pos) == 0:
        return None

    embs = [get_review_embedding(txt, model, word2id, max_len)
            for txt in train_pos["review"]]
    author_vec = np.mean(embs, axis=0)
    if norm(author_vec) == 0:
        return None

    movie_mean = df_all.groupby("movie_id")["rating"].mean().to_dict()
    seen = set(df_all[df_all["author"] == author_id]["movie_id"].unique())

    candidates = []
    for mid, memb in movie_emb_dict.items():
        if mid in seen:
            continue
        mean_r = movie_mean.get(mid, None)
        if mean_r is None or mean_r < min_mean_rating_candidate:
            continue
        denom = norm(author_vec) * norm(memb) + 1e-8
        sim = float(np.dot(author_vec, memb) / denom)
        candidates.append((mid, sim))

    if not candidates:
        return None

    candidates.sort(key=lambda x: x[1], reverse=True)
    ranked_ids = [m for m, s in candidates]

    if target_movie in ranked_ids:
        rank = ranked_ids.index(target_movie) + 1
    else:
        rank = len(ranked_ids) + 1

    hit_at_k = 1.0 if rank <= top_k else 0.0

    return {
        "author": author_id,
        "target_movie": target_movie,
        "rank": rank,
        "hit@10": hit_at_k,
        "num_pos": len(sub_pos)
    }

In [None]:
results = []

candidate_authors = (
    df_all.groupby("author")["rating"]
    .apply(lambda s: (s >= 8.0).sum())
    .reset_index(name="num_pos")
)
candidate_authors = candidate_authors[candidate_authors["num_pos"] >= 5]["author"].tolist()

for a in candidate_authors[:50]:
    res = evaluate_for_author_single_hit(
        a,
        df_all,
        best_model,
        word2id,
        movie_emb_dict,
        min_rating_pos=8.0,
        min_mean_rating_candidate=7.0,
        top_k=10,
        max_len=MAX_LEN
    )
    if res is not None:
        results.append(res)

eval_df = pd.DataFrame(results)
eval_df.head(), eval_df["hit@10"].mean(), eval_df["rank"].mean()

(     author  target_movie   rank  hit@10  num_pos
 0  0000****        103241  10269     0.0        7
 1  0001****         77291  10266     0.0       11
 2  0007****        118361  10257     0.0       14
 3  0012****         65748  10273     0.0        7
 4  0040****         10741  10272     0.0        5,
 np.float64(0.0),
 np.float64(10256.78))