🎬 Content-Based Movie Recommender (Eq. 3 – Cosine Similarity)

In [18]:
import pandas as pd
import numpy as np
import ast
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import Word2Vec

nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\OKTAVIAN\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\OKTAVIAN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [19]:
# 🔄 Load Data dan Filter 7 Judul
movies = pd.read_csv('../Dataset/movies_metadata.csv', low_memory=False, on_bad_lines='skip')
credits = pd.read_csv('../Dataset/credits.csv', on_bad_lines='skip')

movies = movies[['id', 'title', 'genres', 'overview']]
credits = credits[['id', 'cast', 'crew']]
credits['id'] = credits['id'].astype(str)
movies['id'] = movies['id'].astype(str)
df = pd.merge(movies, credits, on='id')

judul_terpilih = [
    '862', '8844', '949', '9091', '863', '65759', '49013'
]
df = df[df['id'].isin(judul_terpilih)].reset_index(drop=True)

In [20]:
df

Unnamed: 0,id,title,genres,overview,cast,crew
0,862,Toy Story,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","Led by Woody, Andy's toys live happily in his ...","[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de..."
1,8844,Jumanji,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",When siblings Judy and Peter discover an encha...,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de..."
2,949,Heat,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...","Obsessive master thief, Neil McCauley leads a ...","[{'cast_id': 25, 'character': 'Lt. Vincent Han...","[{'credit_id': '52fe4292c3a36847f802916d', 'de..."
3,9091,Sudden Death,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",International action superstar Jean Claude Van...,"[{'cast_id': 1, 'character': 'Darren Francis T...","[{'credit_id': '52fe44dbc3a36847f80ae0f1', 'de..."
4,863,Toy Story 2,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","Andy heads off to Cowboy Camp, leaving his toy...","[{'cast_id': 18, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8025073', 'de..."
5,49013,Cars 2,"[{'id': 16, 'name': 'Animation'}, {'id': 10751...",Star race car Lightning McQueen and his pal Ma...,"[{'cast_id': 4, 'character': 'Lightning McQuee...","[{'credit_id': '52fe477fc3a36847f8139271', 'de..."
6,65759,Happy Feet Two,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",Mumble the penguin has a problem: his son Erik...,"[{'cast_id': 1, 'character': 'Mumble (voice)',...","[{'credit_id': '52fe4718c3a368484e0b4d57', 'de..."


In [21]:
# Ekstraksi fitur
def parse_genres(x):
    try:
        return [d['name'].lower() for d in ast.literal_eval(x)]
    except:
        return []

def extract_cast(x):
    try:
        return [d['name'].lower() for d in ast.literal_eval(x)[:5]]
    except:
        return []

def extract_director(x):
    try:
        return [d['name'].lower() for d in ast.literal_eval(x) if d['job'].lower() == 'director']
    except:
        return []

df['genres'] = df['genres'].apply(parse_genres)
df['actors'] = df['cast'].apply(extract_cast)
df['director'] = df['crew'].apply(extract_director)

In [22]:
# 🔠 Preprocessing Overview dan Title
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def preprocess(text):
    if pd.isna(text):
        return []
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    tokens = word_tokenize(text)
    tokens = [t for t in tokens if t not in stop_words]
    return [stemmer.stem(t) for t in tokens]

df['processed_overview'] = df['overview'].apply(preprocess)
df['processed_title'] = df['title'].apply(preprocess)
df['combined_features'] = df['processed_title'] + df['genres'] + df['actors'] + df['director']

In [23]:
df['processed_overview']

0    [led, woodi, andi, toy, live, happili, room, a...
1    [sibl, judi, peter, discov, enchant, board, ga...
2    [obsess, master, thief, neil, mccauley, lead, ...
3    [intern, action, superstar, jean, claud, van, ...
4    [andi, head, cowboy, camp, leav, toy, devic, t...
5    [star, race, car, lightn, mcqueen, pal, mater,...
6    [mumbl, penguin, problem, son, erik, reluct, d...
Name: processed_overview, dtype: object

In [24]:
df['processed_overview'].to_csv('../Dataset/processed_overview.csv', index=False)

In [25]:
w2v_model = Word2Vec(df['processed_overview'], vector_size=3, window=5, min_count=1, workers=4)

def vectorize(tokens):
    vectors = [w2v_model.wv[word] for word in tokens if word in w2v_model.wv]
    return np.mean(vectors, axis=0) if vectors else np.zeros(3)

df['overview_vector'] = df['processed_overview'].apply(vectorize)


In [26]:
# Menampilkan mean vector (Word2Vec) untuk setiap film (overview)
mean_vectors = pd.DataFrame(df['overview_vector'].to_list(), index=df['title'])
print('Mean vector (Word2Vec) untuk setiap film:')
print(mean_vectors)

Mean vector (Word2Vec) untuk setiap film:
                       0         1         2
title                                       
Toy Story       0.024388  0.026865 -0.042885
Jumanji        -0.004921  0.027390  0.037805
Heat           -0.047090  0.003584  0.050137
Sudden Death   -0.029745  0.033016 -0.008192
Toy Story 2     0.004014  0.078487 -0.047273
Cars 2          0.035396 -0.080353  0.062854
Happy Feet Two -0.019230 -0.023681  0.016018


In [27]:
# Menampilkan vektor Word2Vec untuk setiap kata unik di overview 7 film
all_words = set([word for doc in df['processed_overview'] for word in doc])
print("Jumlah kata unik di overview:", len(all_words))

Jumlah kata unik di overview: 195


In [28]:
# 🎬 Menampilkan kata unik dari overview berdasarkan urutan title
print("=== KATA UNIK DARI OVERVIEW BERDASARKAN URUTAN TITLE ===")
print()

# Mengurutkan dataframe berdasarkan title
df_sorted = df.sort_values('title').reset_index(drop=True)

# Menampilkan kata unik dari overview untuk setiap film berdasarkan urutan title
for i in range(len(df_sorted)):
    title = df_sorted.loc[i, 'title']
    overview_words = df_sorted.loc[i, 'processed_overview']
    
    print(f"Film #{i+1}: {title}")
    print(f"Kata unik dari overview: {overview_words}")
    print(f"Jumlah kata: {len(overview_words)}")
    print("-" * 70)
    print()

=== KATA UNIK DARI OVERVIEW BERDASARKAN URUTAN TITLE ===

Film #1: Cars 2
Kata unik dari overview: ['star', 'race', 'car', 'lightn', 'mcqueen', 'pal', 'mater', 'head', 'oversea', 'compet', 'world', 'grand', 'prix', 'race', 'road', 'championship', 'becom', 'rocki', 'mater', 'get', 'caught', 'intrigu', 'adventur', 'intern', 'espionag']
Jumlah kata: 25
----------------------------------------------------------------------

Film #2: Happy Feet Two
Kata unik dari overview: ['mumbl', 'penguin', 'problem', 'son', 'erik', 'reluct', 'danc', 'encount', 'mighti', 'sven', 'penguin', 'fli', 'thing', 'get', 'wors', 'mumbl', 'world', 'shaken', 'power', 'forc', 'caus', 'bring', 'togeth', 'penguin', 'nation', 'alli', 'set', 'thing', 'right']
Jumlah kata: 29
----------------------------------------------------------------------

Film #3: Heat
Kata unik dari overview: ['obsess', 'master', 'thief', 'neil', 'mccauley', 'lead', 'topnotch', 'crew', 'variou', 'insan', 'heist', 'throughout', 'lo', 'angel', 'me

In [29]:
# 📊 Analisis statistik kata unik overview (diurutkan berdasarkan title)
print("=== ANALISIS STATISTIK KATA UNIK OVERVIEW (URUT BERDASARKAN TITLE) ===")
print()

# Mengurutkan dataframe berdasarkan title
df_sorted = df.sort_values('title').reset_index(drop=True)

# Mengumpulkan statistik
total_kata_per_film = []
semua_kata_unik = set()

print("Ringkasan per film (diurutkan berdasarkan title):")
print()
for i in range(len(df_sorted)):
    title = df_sorted.loc[i, 'title']
    overview_words = df_sorted.loc[i, 'processed_overview']
    jumlah_kata = len(overview_words)
    
    total_kata_per_film.append(jumlah_kata)
    semua_kata_unik.update(overview_words)
    
    print(f"{i+1:2d}. {title:<35} - {jumlah_kata:3d} kata")

print()
print("=== STATISTIK KESELURUHAN ===")
print(f"Total film: {len(df_sorted)}")
print(f"Rata-rata kata per overview: {np.mean(total_kata_per_film):.2f}")
print(f"Overview dengan kata terbanyak: {max(total_kata_per_film)} kata")
print(f"Overview dengan kata tersedikit: {min(total_kata_per_film)} kata")
print(f"Total kata unik dari semua overview: {len(semua_kata_unik)}")

# Film dengan overview terpanjang dan terpendek
idx_max = np.argmax(total_kata_per_film)
idx_min = np.argmin(total_kata_per_film)
print()
print(f"Film dengan overview terpanjang: '{df_sorted.loc[idx_max, 'title']}' ({total_kata_per_film[idx_max]} kata)")
print(f"Film dengan overview terpendek: '{df_sorted.loc[idx_min, 'title']}' ({total_kata_per_film[idx_min]} kata)")

=== ANALISIS STATISTIK KATA UNIK OVERVIEW (URUT BERDASARKAN TITLE) ===

Ringkasan per film (diurutkan berdasarkan title):

 1. Cars 2                              -  25 kata
 2. Happy Feet Two                      -  29 kata
 3. Heat                                -  35 kata
 4. Jumanji                             -  39 kata
 5. Sudden Death                        -  47 kata
 6. Toy Story                           -  33 kata
 7. Toy Story 2                         -  38 kata

=== STATISTIK KESELURUHAN ===
Total film: 7
Rata-rata kata per overview: 35.14
Overview dengan kata terbanyak: 47 kata
Overview dengan kata tersedikit: 25 kata
Total kata unik dari semua overview: 195

Film dengan overview terpanjang: 'Sudden Death' (47 kata)
Film dengan overview terpendek: 'Cars 2' (25 kata)


In [30]:
# 📋 Daftar semua kata unik dari overview (terurut alfabetis)
print("=== DAFTAR SEMUA KATA UNIK DARI OVERVIEW ===")
print()

# Mengumpulkan semua kata unik dari overview
semua_kata_overview = set()
for overview_words in df['processed_overview']:
    semua_kata_overview.update(overview_words)

# Mengurutkan kata secara alfabetis
kata_terurut = sorted(list(semua_kata_overview))

print(f"Total kata unik dalam overview: {len(kata_terurut)}")
print()
print("Daftar kata unik (urut alfabetis):")
print()

# Menampilkan kata dalam format kolom
for i, kata in enumerate(kata_terurut, 1):
    print(f"{i:3d}. {kata:<20}", end="")
    # Baris baru setiap 3 kata untuk tampilan yang rapi
    if i % 3 == 0:
        print()
        
# Tambahkan baris baru jika tidak habis dibagi 3
if len(kata_terurut) % 3 != 0:
    print()

print()
print("=== SAMPEL KATA BERDASARKAN PANJANG ===")
print()

# Kelompokkan kata berdasarkan panjang
kata_pendek = [kata for kata in kata_terurut if len(kata) <= 3]
kata_sedang = [kata for kata in kata_terurut if 4 <= len(kata) <= 7]
kata_panjang = [kata for kata in kata_terurut if len(kata) >= 8]

print(f"Kata pendek (≤3 karakter): {len(kata_pendek)} kata")
print(f"Contoh: {kata_pendek[:10]}")
print()
print(f"Kata sedang (4-7 karakter): {len(kata_sedang)} kata")
print(f"Contoh: {kata_sedang[:10]}")
print()
print(f"Kata panjang (≥8 karakter): {len(kata_panjang)} kata")
print(f"Contoh: {kata_panjang[:10]}")

=== DAFTAR SEMUA KATA UNIK DARI OVERVIEW ===

Total kata unik dalam overview: 195

Daftar kata unik (urut alfabetis):

  1. 26                    2. abil                  3. abort               
  4. action                5. adult                 6. adventur            
  7. afraid                8. al                    9. alan                
 10. alli                 11. andi                 12. angel               
 13. asid                 14. awar                 15. backdrop            
 16. barn                 17. becom                18. belong              
 19. billion              20. birthday             21. board               
 22. booth                23. bring                24. buzz                
 25. buzzer               26. camp                 27. captor              
 28. car                  29. catandmous           30. caught              
 31. caus                 32. championship         33. circumst            
 34. claud                35. collector      

In [31]:
# Output DataFrame vektor kata Word2Vec untuk semua kata unik di overview 7 film
word_vectors = {word: w2v_model.wv[word] for word in all_words if word in w2v_model.wv}
df_vectors = pd.DataFrame(word_vectors).T  # index=kata, kolom=dimensi vektor
df_vectors

Unnamed: 0,0,1,2
bring,-0.250366,-0.028504,0.316954
unwittingli,-0.065878,0.154894,-0.136527
trap,0.189792,0.307937,-0.137094
terrifi,0.172936,0.192099,0.249201
magic,0.090558,0.232867,0.202006
...,...,...,...
set,-0.276339,-0.313748,0.244393
kidnap,-0.191248,0.073195,-0.175409
plot,-0.218224,0.132668,0.182305
onto,0.226178,0.133926,0.150488


In [32]:
def jaccard_similarity(list1, list2):
    set1, set2 = set(list1), set(list2)
    return len(set1 & set2) / len(set1 | set2) if set1 or set2 else 0.0


In [33]:
def recommend(title_input, top_n=10,
              alpha=1, beta=1, gamma=1, theta=1, delta=1):
    """
    Sistem rekomendasi menggunakan cosine similarity (Eq. 3) untuk overview.
    """
    title_input = title_input.lower()
    if title_input not in df['title'].str.lower().values:
        print(f"Film '{title_input}' tidak ditemukan.")
        return []

    idx = df[df['title'].str.lower() == title_input].index[0]

    # Fitur target
    genre_i     = df.loc[idx, 'genres']
    actors_i    = df.loc[idx, 'actors']
    director_i  = df.loc[idx, 'director']
    title_i     = df.loc[idx, 'processed_title']
    overview_i  = df.loc[idx, 'overview_vector'].reshape(1, -1)

    results = []

    print(f"\n📊 Menghitung kemiripan dengan '{df.loc[idx, 'title']}' (menggunakan Eq. 3):\n")

    for j in range(len(df)):
        if j == idx:
            continue

        genre_sim    = jaccard_similarity(genre_i, df.loc[j, 'genres'])
        actor_sim    = jaccard_similarity(actors_i, df.loc[j, 'actors'])
        director_sim = jaccard_similarity(director_i, df.loc[j, 'director'])
        title_sim    = jaccard_similarity(title_i, df.loc[j, 'processed_title'])

        overview_j = df.loc[j, 'overview_vector'].reshape(1, -1)
        overview_sim = cosine_similarity(overview_i, overview_j)[0][0]

        final_score = (
            (alpha * genre_sim +
            beta * actor_sim +
            gamma * director_sim +
            theta * title_sim +
            delta * overview_sim) / 5
        )

        print(f"- {df.loc[j, 'title']}:\n  Genre={genre_sim:.3f}, Actor={actor_sim:.3f}, Director={director_sim:.3f}, Title={title_sim:.3f}, Overview={overview_sim:.3f}")
        print(f"  → Final Score: {final_score:.3f}\n")

        results.append((j, final_score))

    top = sorted(results, key=lambda x: x[1], reverse=True)[:top_n]
    return [(df.loc[i, 'title'], round(score, 3)) for i, score in top]


In [34]:
hasil = recommend("Toy Story", top_n=10)

print("\n🎬 Top 10 Rekomendasi (menggunakan Eq. 3 – cosine):")
for title, sim in hasil:
    print(f"- {title} (Similarity: {sim})")



📊 Menghitung kemiripan dengan 'Toy Story' (menggunakan Eq. 3):

- Jumanji:
  Genre=0.200, Actor=0.000, Director=0.000, Title=0.000, Overview=-0.381
  → Final Score: -0.036

- Heat:
  Genre=0.000, Actor=0.000, Director=0.000, Title=0.000, Overview=-0.828
  → Final Score: -0.166

- Sudden Death:
  Genre=0.000, Actor=0.000, Director=0.000, Title=0.000, Overview=0.202
  → Final Score: 0.040

- Toy Story 2:
  Genre=1.000, Actor=0.429, Director=1.000, Title=0.667, Overview=0.822
  → Final Score: 0.783

- Cars 2:
  Genre=0.750, Actor=0.000, Director=0.500, Title=0.000, Overview=-0.658
  → Final Score: 0.118

- Happy Feet Two:
  Genre=1.000, Actor=0.000, Director=0.000, Title=0.000, Overview=-0.926
  → Final Score: 0.015


🎬 Top 10 Rekomendasi (menggunakan Eq. 3 – cosine):
- Toy Story 2 (Similarity: 0.783)
- Cars 2 (Similarity: 0.118)
- Sudden Death (Similarity: 0.04)
- Happy Feet Two (Similarity: 0.015)
- Jumanji (Similarity: -0.036)
- Heat (Similarity: -0.166)
- Jumanji:
  Genre=0.200, Acto

In [35]:
# 📊 Evaluasi Model Menggunakan Precision
import random
from sklearn.metrics import precision_score

def evaluate_precision(test_samples=50, top_k=5):
    """
    Evaluasi model menggunakan precision.
    Precision = (Relevant items recommended) / (Total items recommended)
    """
    print("=== EVALUASI MODEL MENGGUNAKAN PRECISION ===")
    print()
    
    # Sampling film untuk testing
    sample_indices = random.sample(range(len(df)), min(test_samples, len(df)))
    
    precision_scores = []
    total_relevant = 0
    total_recommended = 0
    
    print(f"Menguji {len(sample_indices)} film dengan top-{top_k} rekomendasi...")
    print()
    
    for i, idx in enumerate(sample_indices):
        test_title = df.loc[idx, 'title']
        test_genres = set(df.loc[idx, 'genres'])
        
        # Dapatkan rekomendasi
        recommendations = recommend(test_title, top_n=top_k, 
                                   alpha=1, beta=1, gamma=1, theta=1, delta=1)
        
        if not recommendations:
            continue
            
        # Hitung precision berdasarkan genre similarity
        relevant_count = 0
        for rec_title, score in recommendations:
            # Cari film rekomendasi
            rec_idx = df[df['title'] == rec_title].index
            if len(rec_idx) > 0:
                rec_genres = set(df.loc[rec_idx[0], 'genres'])
                # Anggap relevan jika memiliki setidaknya 1 genre yang sama
                if len(test_genres.intersection(rec_genres)) > 0:
                    relevant_count += 1
        
        precision = relevant_count / len(recommendations) if recommendations else 0
        precision_scores.append(precision)
        
        total_relevant += relevant_count
        total_recommended += len(recommendations)
        
        if i < 5:  # Tampilkan 5 contoh pertama
            print(f"Film #{i+1}: {test_title}")
            print(f"  Genre: {list(test_genres)}")
            print(f"  Rekomendasi relevan: {relevant_count}/{len(recommendations)}")
            print(f"  Precision: {precision:.3f}")
            print()
    
    # Hitung metrik keseluruhan
    avg_precision = sum(precision_scores) / len(precision_scores) if precision_scores else 0
    macro_precision = total_relevant / total_recommended if total_recommended > 0 else 0
    
    print("=== HASIL EVALUASI ===")
    print(f"Jumlah film yang diuji: {len(precision_scores)}")
    print(f"Average Precision (Micro): {avg_precision:.4f}")
    print(f"Macro Precision: {macro_precision:.4f}")
    print(f"Total item relevan: {total_relevant}")
    print(f"Total item direkomendasi: {total_recommended}")
    
    return precision_scores, avg_precision, macro_precision

In [36]:
# 🏆 Menjalankan Evaluasi Precision
# Set random seed untuk reproducibility
random.seed(42)

# Jalankan evaluasi dengan 20 film sample dan top-5 rekomendasi
precision_scores, avg_precision, macro_precision = evaluate_precision(test_samples=20, top_k=5)

print("\n=== DISTRIBUSI PRECISION SCORES ===")
print(f"Precision scores: {[f'{p:.3f}' for p in precision_scores[:10]]}...")  # Tampilkan 10 pertama
print(f"Precision tertinggi: {max(precision_scores):.3f}")
print(f"Precision terendah: {min(precision_scores):.3f}")
print(f"Standar deviasi: {np.std(precision_scores):.3f}")

=== EVALUASI MODEL MENGGUNAKAN PRECISION ===

Menguji 7 film dengan top-5 rekomendasi...


📊 Menghitung kemiripan dengan 'Cars 2' (menggunakan Eq. 3):

- Toy Story:
  Genre=0.750, Actor=0.000, Director=0.500, Title=0.000, Overview=-0.658
  → Final Score: 0.118

- Jumanji:
  Genre=0.400, Actor=0.000, Director=0.000, Title=0.000, Overview=0.000
  → Final Score: 0.080

- Heat:
  Genre=0.000, Actor=0.000, Director=0.000, Title=0.000, Overview=0.161
  → Final Score: 0.032

- Sudden Death:
  Genre=0.167, Actor=0.000, Director=0.000, Title=0.000, Overview=-0.865
  → Final Score: -0.140

- Toy Story 2:
  Genre=0.750, Actor=0.000, Director=0.500, Title=0.250, Overview=-0.923
  → Final Score: 0.115

- Happy Feet Two:
  Genre=0.750, Actor=0.000, Director=0.000, Title=0.000, Overview=0.599
  → Final Score: 0.270

Film #1: Cars 2
  Genre: ['animation', 'family', 'comedy', 'adventure']
  Rekomendasi relevan: 4/5
  Precision: 0.800


📊 Menghitung kemiripan dengan 'Toy Story' (menggunakan Eq. 3):

- J

In [37]:
# 📈 Analisis Detail Evaluasi Precision

def evaluate_precision_by_genre(top_k=5):
    """
    Evaluasi precision berdasarkan genre film
    """
    print("=== EVALUASI PRECISION BERDASARKAN GENRE ===")
    print()
    
    # Kumpulkan semua genre unik
    all_genres = set()
    for genres in df['genres']:
        all_genres.update(genres)
    
    genre_precision = {}
    
    for genre in list(all_genres)[:5]:  # Test 5 genre utama
        # Cari film dengan genre ini
        genre_films = df[df['genres'].apply(lambda x: genre in x)]
        
        if len(genre_films) < 2:
            continue
            
        # Sample beberapa film dari genre ini
        sample_size = min(3, len(genre_films))
        sample_films = genre_films.sample(n=sample_size)
        
        precision_scores = []
        
        for idx, film in sample_films.iterrows():
            test_title = film['title']
            test_genres = set(film['genres'])
            
            recommendations = recommend(test_title, top_n=top_k,
                                       alpha=1, beta=1, gamma=1, theta=1, delta=1)
            
            if not recommendations:
                continue
                
            relevant_count = 0
            for rec_title, score in recommendations:
                rec_idx = df[df['title'] == rec_title].index
                if len(rec_idx) > 0:
                    rec_genres = set(df.loc[rec_idx[0], 'genres'])
                    if len(test_genres.intersection(rec_genres)) > 0:
                        relevant_count += 1
            
            precision = relevant_count / len(recommendations)
            precision_scores.append(precision)
        
        if precision_scores:
            avg_precision = sum(precision_scores) / len(precision_scores)
            genre_precision[genre] = avg_precision
            print(f"Genre '{genre}': Precision = {avg_precision:.3f} (dari {len(precision_scores)} film)")
    
    return genre_precision

# Jalankan evaluasi per genre
genre_results = evaluate_precision_by_genre(top_k=5)

=== EVALUASI PRECISION BERDASARKAN GENRE ===


📊 Menghitung kemiripan dengan 'Cars 2' (menggunakan Eq. 3):

- Toy Story:
  Genre=0.750, Actor=0.000, Director=0.500, Title=0.000, Overview=-0.658
  → Final Score: 0.118

- Jumanji:
  Genre=0.400, Actor=0.000, Director=0.000, Title=0.000, Overview=0.000
  → Final Score: 0.080

- Heat:
  Genre=0.000, Actor=0.000, Director=0.000, Title=0.000, Overview=0.161
  → Final Score: 0.032

- Sudden Death:
  Genre=0.167, Actor=0.000, Director=0.000, Title=0.000, Overview=-0.865
  → Final Score: -0.140

- Toy Story 2:
  Genre=0.750, Actor=0.000, Director=0.500, Title=0.250, Overview=-0.923
  → Final Score: 0.115

- Happy Feet Two:
  Genre=0.750, Actor=0.000, Director=0.000, Title=0.000, Overview=0.599
  → Final Score: 0.270


📊 Menghitung kemiripan dengan 'Sudden Death' (menggunakan Eq. 3):

- Toy Story:
  Genre=0.000, Actor=0.000, Director=0.000, Title=0.000, Overview=0.202
  → Final Score: 0.040

- Jumanji:
  Genre=0.200, Actor=0.000, Director=0.000,