In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np

file_path = '/content/drive/MyDrive/film_oneri/movies_dataset.csv'
df = pd.read_csv(file_path)

# Eksik değerleri temizle
df = df.dropna(subset=["overview"])

# Kullanılacak metin sütunlarını birleştir
def combine_text(row):
    return ' '.join([
        str(row["overview"]),
    ])

df["text"] = df.apply(combine_text, axis=1)

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF

# TF-IDF vektörleştirici (isteğe göre sınırla)
tfidf = TfidfVectorizer(stop_words='english', max_features=10000)
tfidf_matrix = tfidf.fit_transform(df["text"])

# NMF ile konuları çıkar (örneğin 50 konulu bir model)
nmf = NMF(n_components=69, random_state=42)
nmf_features = nmf.fit_transform(tfidf_matrix)


In [7]:
def average_similarity_score(features, k=2):
    from sklearn.neighbors import NearestNeighbors

    nn = NearestNeighbors(n_neighbors=k, metric="cosine")
    nn.fit(features)
    distances, _ = nn.kneighbors(features)

    similarities = 1 - distances[:, 1:]  # ilk komşu kendisi → at
    avg_sim = np.mean(similarities)
    return round(avg_sim, 4)

avg_score = average_similarity_score(nmf_features, k=6)
print("🔍 Ortalama Benzerlik Skoru (Top-6):", avg_score)

KeyboardInterrupt: 

In [8]:
print("🔧 NMF Reconstruction Error (Frobenius norm):", round(nmf.reconstruction_err_, 4))

🔧 NMF Reconstruction Error (Frobenius norm): 629.3382


In [14]:
for k in [10, 20, 50, 100]:
    nmf = NMF(n_components=k).fit(tfidf_matrix)
    print(f"k={k}, Reconstruction Error: {nmf.reconstruction_err_:.2f}")

k=10, Reconstruction Error: 650.29
k=20, Reconstruction Error: 645.28


KeyboardInterrupt: 

In [4]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_from_two_nmf(title1, title2, top_n=5):
    idx1 = df[df["title"] == title1].index[0]
    idx2 = df[df["title"] == title2].index[0]

    vec1 = nmf_features[idx1]
    vec2 = nmf_features[idx2]
    combined_vec = (vec1 + vec2) / 2

    similarities = cosine_similarity([combined_vec], nmf_features)[0]

    # Kendi girdilerini çıkar
    similar_indices = similarities.argsort()[::-1]
    recs = []
    for i in similar_indices:
        if i not in [idx1, idx2]:
            recs.append((df.iloc[i]["title"], round(similarities[i], 3)))
        if len(recs) == top_n:
            break
    return recs

recommend_from_two_nmf("Inception", "The Matrix")


[('Thawed Carp', np.float64(0.822)),
 ('Nitro', np.float64(0.814)),
 ('Rage of Angels: The Story Continues', np.float64(0.808)),
 ('Good to Go', np.float64(0.807)),
 ('Letter from Eusapia', np.float64(0.788))]