In [1]:
# Install required packages
!pip install -q sentence-transformers scikit-learn joblib telegram --upgrade


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.9/12.9 MB[0m [31m134.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m125.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m96.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m40.9 MB/s[0m eta [36

In [4]:
import numpy as np
import joblib
from sentence_transformers import SentenceTransformer, CrossEncoder
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm
import math

# === Load Models on GPU ===
bi_encoder = SentenceTransformer("all-MiniLM-L6-v2", device="cuda")
cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device="cuda")

# === Load Data ===
metadata = joblib.load("movie_metadata_umap_clusters.pkl")
embeddings = np.load("movie_weighted_embeddings.npy")

# === Ground Truths ===
test_queries = {
    "A young man discovers he is the son of a god and must stop ancient forces":
        ["Percy Jackson & the Olympians: The Lightning Thief", "The Lightning Thief", "Hercules", "The Legend of Hercules"],

    "A scientist becomes stranded on Mars and must survive until help arrives":
        ["The Martian", "Robinson Crusoe on Mars", "The Last Days on Mars", "Stranded"],

    "A virus spreads across the globe and scientists must stop the outbreak":
        ["Contagion", "Outbreak", "Carriers", "Quarantine"],

    "A man enters others’ dreams to extract secrets using advanced technology":
        ["Inception", "Paprika", "Dreamscape"],

    "A robot develops emotions and questions what it means to be human":
        ["I, Robot", "A.I. Artificial Intelligence", "Bicentennial Man", "Finch"],

    "A poor young man becomes rich and famous after winning a game show":
        ["Slumdog Millionaire"]
}

# === Evaluation Metrics ===
def precision_at_k(pred, truth, k):
    return len(set(pred[:k]) & set(truth)) / k

def recall_at_k(pred, truth, k):
    return len(set(pred[:k]) & set(truth)) / len(truth)

def ndcg_at_k(pred, truth, k):
    dcg = 0.0
    for i, movie in enumerate(pred[:k]):
        if movie in truth:
            dcg += 1 / math.log2(i + 2)
    idcg = sum([1 / math.log2(i + 2) for i in range(min(len(truth), k))])
    return dcg / idcg if idcg != 0 else 0.0

# === Recommendation Function ===
def recommend_movies(query, top_n_cosine=30, top_k_final=10):
    query_emb = bi_encoder.encode([query])
    sims = cosine_similarity(query_emb, embeddings)[0]
    top_indices = np.argsort(sims)[::-1][:top_n_cosine]

    pairs = [(query, f"{metadata[i]['title']} {metadata[i]['overview']} {metadata[i]['keywords']} {metadata[i]['tagline']}") for i in top_indices]
    scores = cross_encoder.predict(pairs)

    reranked = sorted(zip(scores, top_indices), reverse=True)[:top_k_final]
    return [metadata[i]['title'] for _, i in reranked]

# === Run Evaluation ===
K = 5
print(f"\n🎯 Evaluating Recommendations at K={K}...\n")
total_p, total_r, total_ndcg = 0, 0, 0

for query, true_titles in test_queries.items():
    print(f"\n🧠 Query: {query}")
    predicted = recommend_movies(query)
    for i, title in enumerate(predicted, 1):
        print(f"{i}. {title}")

    p = precision_at_k(predicted, true_titles, K)
    r = recall_at_k(predicted, true_titles, K)
    ndcg = ndcg_at_k(predicted, true_titles, K)

    print(f"📊 Precision@{K}: {p:.2f}, Recall@{K}: {r:.2f}, nDCG@{K}: {ndcg:.2f}")
    total_p += p
    total_r += r
    total_ndcg += ndcg

n = len(test_queries)
print(f"\n✅ Average Precision@{K}: {total_p/n:.2f}")
print(f"✅ Average Recall@{K}: {total_r/n:.2f}")
print(f"✅ Average nDCG@{K}: {total_ndcg/n:.2f}")


🎯 Evaluating Recommendations at K=5...


🧠 Query: A young man discovers he is the son of a god and must stop ancient forces
1. Percy Jackson & the Olympians: The Lightning Thief
2. Through the Magic Pyramid
3. Through the Magic Pyramid
4. Mortal
5. Arion
6. Walter
7. Hercules
8. Ulysses Against the Son of Hercules
9. Hercules
10. The Legend of Hercules
📊 Precision@5: 0.20, Recall@5: 0.25, nDCG@5: 0.39

🧠 Query: A scientist becomes stranded on Mars and must survive until help arrives
1. The Martian
2. Stranded
3. Robinson Crusoe on Mars
4. The Sea of Perdition
5. The Sea of Perdition
6. The Space Between Us
7. Flight To Mars
8. The Last Days on Mars
9. After the World Ended
10. After the World Ended
📊 Precision@5: 0.60, Recall@5: 0.75, nDCG@5: 0.83

🧠 Query: A virus spreads across the globe and scientists must stop the outbreak
1. Contagion
2. Carriers
3. Spillover: Zika, Ebola, and Beyond
4. Race for the Vaccine
5. Race for the Vaccine
6. Quarantine
7. Runaway Virus
8. Runaway Virus
9