In [13]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class MovieRecommendationSystem:
    def __init__(self, movies_path, ratings_path, tags_path):
        # Veri dosyalarını yüklerken hata kontrolü
        try:
            self.movies = pd.read_csv(movies_path)
            print(f"Movies data loaded. Shape: {self.movies.shape}")
        except FileNotFoundError:
            print("Movies file not found.")
            self.movies = pd.DataFrame()

        try:
            self.ratings = pd.read_csv(ratings_path)
            print(f"Ratings data loaded. Shape: {self.ratings.shape}")
        except FileNotFoundError:
            print("Ratings file not found.")
            self.ratings = pd.DataFrame()

        try:
            self.tags = pd.read_csv(tags_path)
            print(f"Tags data loaded. Shape: {self.tags.shape}")
        except FileNotFoundError:
            print("Tags file not found.")
            self.tags = pd.DataFrame()

        self.user_movie_matrix = None
        self.model = None
        self.tag_similarity = None
        self.genre_similarity = None

    def preprocess_data(self):
        # Kullanıcı-film matrisini oluştur
        if not self.ratings.empty:
            self.user_movie_matrix = self.ratings.pivot_table(index='userId', columns='movieId', values='rating', fill_value=0)
            print(f"User-movie matrix created. Shape: {self.user_movie_matrix.shape}")
        else:
            print("Ratings data is empty. Cannot create user-movie matrix.")
        
        # Etiketleri ve filmleri birleştir
        if not self.tags.empty:
            movie_tags_df = self.tags.groupby('movieId')['tag'].apply(lambda x: ' '.join(x)).reset_index()
            self.movies = pd.merge(self.movies, movie_tags_df, on='movieId', how='left')
            self.movies['tag'] = self.movies['tag'].fillna('')

            # TF-IDF matrisi oluştur (tag bazlı öneri için)
            tfidf = TfidfVectorizer(stop_words='english')
            tfidf_matrix = tfidf.fit_transform(self.movies['tag'])

            # Cosine similarity hesapla (tag bazlı)
            self.tag_similarity = cosine_similarity(tfidf_matrix)
            print("Tag-based similarity matrix created.")
        else:
            print("Tags data is empty. Cannot calculate tag similarity.")

        # Tür bilgisi için TF-IDF benzeri bir yaklaşımla tür benzerliğini hesaplama
        if 'genres' in self.movies.columns:
            tfidf_genres = TfidfVectorizer(tokenizer=lambda x: x.split('|'), stop_words='english')
            tfidf_genres_matrix = tfidf_genres.fit_transform(self.movies['genres'])
            
            # Cosine similarity hesapla (genre bazlı)
            self.genre_similarity = cosine_similarity(tfidf_genres_matrix)
            print("Genre-based similarity matrix created.")
        else:
            print("Genres data is missing. Cannot calculate genre similarity.")

    def train_model(self):
        if self.user_movie_matrix is not None:
            self.model = NearestNeighbors(metric='cosine', algorithm='brute')
            self.model.fit(self.user_movie_matrix)
            print("Model trained successfully.")
        else:
            print("User-movie matrix is empty. Cannot train the model.")
    
    def get_user_based_recommendations(self, user_id, num_recommendations=5):
        # Kullanıcıya dayalı öneri almak için modeli kullan
        if self.model is None:
            print("Model is not trained yet.")
            return []
        
        user_vector = self.user_movie_matrix.loc[user_id].values.reshape(1, -1)
        distances, indices = self.model.kneighbors(user_vector, n_neighbors=num_recommendations + 1)
        
        # İlk öğe kendi kullanıcı olduğundan onu hariç tut
        recommended_user_ids = self.user_movie_matrix.index[indices.flatten()[1:]].tolist()
        return recommended_user_ids

    def get_weighted_recommendations(self, movie_id, genre_weight=0.7, tag_weight=0.3, num_recommendations=5):
        # Belirtilen movie_id'ye göre genre ve tag benzerliklerini ağırlıklı olarak hesapla

        # Tür ve etiket benzerliklerini bul
        if self.genre_similarity is None or self.tag_similarity is None:
            print("Genre or tag similarity not available.")
            return []

        # movie_id'nin geçerli olup olmadığını kontrol et
        if movie_id not in self.movies['movieId'].values:
            print(f"Movie with ID {movie_id} not found in movies data.")
            return []

        # movie_index'i al
        movie_index = self.movies.index[self.movies['movieId'] == movie_id].tolist()[0]

        # Genre ve Tag similarity'lerini al
        genre_sim = self.genre_similarity[movie_index]
        tag_sim = self.tag_similarity[movie_index]

        # Ağırlıklı ortalama hesapla
        weighted_sim = (genre_sim * genre_weight) + (tag_sim * tag_weight)

        # En yüksek benzerlikteki filmleri seç (seçilen film hariç)
        similar_indices = weighted_sim.argsort()[-(num_recommendations+1):-1]  # En benzer olanları seç
        similar_movie_ids = self.movies.iloc[similar_indices]['movieId'].tolist()

        return similar_movie_ids

    def display_recommendations(self, recommended_movie_ids):
        if self.movies.empty:
            print("Movies data is empty. Cannot display recommendations.")
            return pd.DataFrame()

        if not recommended_movie_ids:
            print("No recommended movie IDs provided.")
            return pd.DataFrame()

        recommended_movies = self.movies[self.movies['movieId'].isin(recommended_movie_ids)]
        
        if recommended_movies.empty:
            print("No movies found for the provided movie IDs.")
            return pd.DataFrame()

        return recommended_movies[['title', 'genres']]

    def recommend_to_first_ten_users(self):
        all_recommendations = []
        
        # İlk 10 kullanıcıyı seç
        user_ids = self.user_movie_matrix.index[:10]  
        
        for user_id in user_ids:
            user_based_recommended_movie_ids = self.get_user_based_recommendations(user_id, num_recommendations=5)
            if user_based_recommended_movie_ids:
                recommended_movies_df = self.display_recommendations(user_based_recommended_movie_ids)
                if not recommended_movies_df.empty:
                    print(f"User ID: {user_id} (User-Based Recommendations)")
                    for _, row in recommended_movies_df.iterrows():
                        print(f"  - Title: {row['title']}, Genres: {row['genres']}")
                    for _, row in recommended_movies_df.iterrows():
                        all_recommendations.append({'userId': user_id, 'title': row['title'], 'genres': row['genres']})

                # Ağırlıklı tür ve tag bazlı öneriler
                for movie_id in user_based_recommended_movie_ids:
                    weighted_recommended_movie_ids = self.get_weighted_recommendations(movie_id, genre_weight=0.7, tag_weight=0.3, num_recommendations=5)
                    if weighted_recommended_movie_ids:
                        recommended_movies_df = self.display_recommendations(weighted_recommended_movie_ids)
                        if not recommended_movies_df.empty:
                            print(f"User ID: {user_id} (Weighted Genre+Tag-Based Recommendations for Movie ID: {movie_id})")
                            for _, row in recommended_movies_df.iterrows():
                                print(f"  - Title: {row['title']}, Genres: {row['genres']}")
                            for _, row in recommended_movies_df.iterrows():
                                all_recommendations.append({'userId': user_id, 'title': row['title'], 'genres': row['genres']})

        # Sonuçları DataFrame'e dönüştür
        recommendations_df = pd.DataFrame(all_recommendations)

        if not recommendations_df.empty:
            print("\nAll Recommendations for the first ten users:")
            print(recommendations_df)
        else:
            print("No recommendations generated for any of the first ten users.")

if __name__ == "__main__":
    movies_path = 'movies.csv'  # MovieLens filmleri
    ratings_path = 'ratings.csv'  # Kullanıcı puanları
    tags_path = 'tags.csv'  # Etiketler

    recommender = MovieRecommendationSystem(movies_path, ratings_path, tags_path)
    recommender.preprocess_data()
    recommender.train_model()

    # İlk on kullanıcı için öneri alma
    recommender.recommend_to_first_ten_users()


Movies data loaded. Shape: (9125, 3)
Ratings data loaded. Shape: (100004, 4)
Tags data loaded. Shape: (1296, 4)
User-movie matrix created. Shape: (671, 9066)
Tag-based similarity matrix created.
Genre-based similarity matrix created.
Model trained successfully.
User ID: 1 (User-Based Recommendations)
  - Title: Walk in the Clouds, A (1995), Genres: Drama|Romance
  - Title: National Lampoon's Senior Trip (1995), Genres: Comedy
  - Title: Double Happiness (1994), Genres: Drama
User ID: 1 (Weighted Genre+Tag-Based Recommendations for Movie ID: 325)
  - Title: Porky's (1982), Genres: Comedy
  - Title: Porky's II: The Next Day (1983), Genres: Comedy
  - Title: Porky's Revenge (1985), Genres: Comedy
  - Title: Bringing Down the House (2003), Genres: Comedy
  - Title: Cedar Rapids (2011), Genres: Comedy
Movie with ID 634 not found in movies data.
User ID: 1 (Weighted Genre+Tag-Based Recommendations for Movie ID: 341)
  - Title: That's Life! (1986), Genres: Drama
  - Title: Woman Under the Inf