In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
import warnings
import seaborn as sns
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')



class BookRecommender:
    def __init__(self, n_latent_factors=20):
        self.n_latent_factors = n_latent_factors
        self.books_df = None
        self.ratings_df = None
        self.user_book_matrix = None
        self.user_mapper = None
        self.book_mapper = None
        self.reverse_user_mapper = None
        self.reverse_book_mapper = None
        self.global_mean = None
        self.min_rating = 1
        self.max_rating = 10


    def load_data(self, books_path, ratings_path):
        print("Loading datasets...")

        self.books_df = pd.read_csv(
            books_path,
            encoding='latin-1',
            usecols=['ISBN', 'Book-Title', 'Book-Author'],
            dtype={'ISBN': 'string', 'Book-Title': 'string', 'Book-Author': 'string'}
        )

        self.books_df['Book-Title'] = self.books_df['Book-Title'].str.strip()
        self.books_df['Clean-Title'] = self.books_df['Book-Title'].str.lower()
        self.books_df = self.books_df.drop_duplicates(
            subset=['Clean-Title', 'Book-Author'],
            keep='first'
        )

        self.ratings_df = pd.read_csv(
            ratings_path,
            dtype={'User-ID': np.int32, 'ISBN': 'string', 'Book-Rating': np.float32}
        )

        # Replace 0 ratings with 1 to maintain the 1-10 scale
        self.ratings_df.loc[self.ratings_df['Book-Rating'] == 0, 'Book-Rating'] = 1

        self.ratings_df = self.ratings_df.merge(
            self.books_df[['ISBN']],
            on='ISBN',
            how='inner'
        )

        print(f"Initial shapes:")
        print(f"Books (after deduplication): {self.books_df.shape}")
        print(f"Ratings: {self.ratings_df.shape}")
        print(f"Rating range: {self.min_rating} to {self.max_rating}")





    def prepare_matrix(self, min_book_ratings=50, min_user_ratings=10):
        print("\nPreparing rating matrix...")

        book_counts = self.ratings_df['ISBN'].value_counts()
        user_counts = self.ratings_df['User-ID'].value_counts()

        valid_books = book_counts[book_counts >= min_book_ratings].index
        valid_users = user_counts[user_counts >= min_user_ratings].index

        filtered_ratings = self.ratings_df[
            self.ratings_df['ISBN'].isin(valid_books) &
            self.ratings_df['User-ID'].isin(valid_users)
        ]

        self.user_mapper = {uid: idx for idx, uid in enumerate(filtered_ratings['User-ID'].unique())}
        self.book_mapper = {isbn: idx for idx, isbn in enumerate(filtered_ratings['ISBN'].unique())}

        self.reverse_user_mapper = {idx: uid for uid, idx in self.user_mapper.items()}
        self.reverse_book_mapper = {idx: isbn for isbn, idx in self.book_mapper.items()}

        rows = filtered_ratings['User-ID'].map(self.user_mapper)
        cols = filtered_ratings['ISBN'].map(self.book_mapper)
        ratings = filtered_ratings['Book-Rating'].values

        self.user_book_matrix = csr_matrix(
            (ratings, (rows, cols)),
            shape=(len(self.user_mapper), len(self.book_mapper))
        )

        self.global_mean = float(np.mean(ratings))

        print(f"Matrix shape: {self.user_book_matrix.shape}")
        print(f"Density: {self.user_book_matrix.nnz / (self.user_book_matrix.shape[0] * self.user_book_matrix.shape[1]):.2%}")
        print(f"Global mean rating: {self.global_mean:.2f}")

    def train_model(self):
        print("\nTraining model...")

        # Calculate user biases
        user_ratings_sum = np.asarray(self.user_book_matrix.sum(axis=1)).flatten()
        user_ratings_count = np.diff(self.user_book_matrix.indptr)
        user_means = np.zeros_like(user_ratings_sum, dtype=float)
        mask = user_ratings_count > 0
        user_means[mask] = user_ratings_sum[mask] / user_ratings_count[mask]
        self.user_bias = user_means - self.global_mean

        # Calculate item biases
        item_ratings_sum = np.asarray(self.user_book_matrix.sum(axis=0)).flatten()
        item_ratings_count = np.diff(self.user_book_matrix.tocsc().indptr)
        item_means = np.zeros_like(item_ratings_sum, dtype=float)
        mask = item_ratings_count > 0
        item_means[mask] = item_ratings_sum[mask] / item_ratings_count[mask]
        self.item_bias = item_means - self.global_mean

        # Center the matrix
        centered_matrix = self.user_book_matrix.copy()
        for i, j in zip(*centered_matrix.nonzero()):
            centered_matrix[i, j] = (
                centered_matrix[i, j] -
                self.global_mean -
                self.user_bias[i] -
                self.item_bias[j]
            )

        # Perform SVD on the centered matrix
        U, sigma, Vt = svds(centered_matrix, k=self.n_latent_factors)

        # Store the factorized matrices
        self.U = U
        self.sigma = np.diag(sigma)
        self.Vt = Vt

        print("Model training completed successfully")

    def predict_rating(self, user_idx, item_idx):
        try:
            baseline = (
                self.global_mean +
                self.user_bias[user_idx] +
                self.item_bias[item_idx]
            )

            svd_estimate = np.dot(
                np.dot(self.U[user_idx, :], self.sigma),
                self.Vt[:, item_idx]
            )

            predicted = float(baseline + svd_estimate)
            return np.clip(predicted, self.min_rating, self.max_rating)
        except Exception as e:
            print(f"Error in prediction: {e}")
            return self.global_mean  # Return global mean as fallback


    def plot_predicted_ratings(self, user_idx):
      predictions = []
      rated_items = set(self.user_book_matrix[user_idx].nonzero()[1])
      for item_idx in range(self.user_book_matrix.shape[1]):
          if item_idx not in rated_items:
              pred = self.predict_rating(user_idx, item_idx)
              predictions.append(pred)

      plt.figure(figsize=(8, 6))
      sns.histplot(predictions, bins=10, kde=True, color='green')
      plt.title("Distribuția Ratingurilor Prezise pentru Utilizator")
      plt.xlabel("Rating Prezis")
      plt.ylabel("Frecvență")
      plt.show()

    def get_recommendations(self, user_id, n_recommendations=10):
        if user_id not in self.user_mapper:
            print(f"User {user_id} not found in the dataset")
            return pd.DataFrame()

        user_idx = self.user_mapper[user_id]
        rated_items = set(self.user_book_matrix[user_idx].nonzero()[1])

        predictions = []
        for item_idx in range(self.user_book_matrix.shape[1]):
            if item_idx not in rated_items:
                pred = self.predict_rating(user_idx, item_idx)
                if not np.isnan(pred):  # Only include valid predictions
                    predictions.append((item_idx, pred))

        predictions.sort(key=lambda x: x[1], reverse=True)

        recommendations = []
        seen_titles = set()

        for item_idx, pred_rating in predictions:
            if len(recommendations) >= n_recommendations:
                break

            isbn = self.reverse_book_mapper[item_idx]
            book_info = self.books_df[self.books_df['ISBN'] == isbn].iloc[0]

            if book_info['Clean-Title'] in seen_titles:
                continue

            seen_titles.add(book_info['Clean-Title'])
            recommendations.append({
                'Title': book_info['Book-Title'],
                'Author': book_info['Book-Author'],
                'ISBN': isbn,
                'Predicted Rating': round(pred_rating, 2)
            })

        self.plot_predicted_ratings(user_idx)

        return pd.DataFrame(recommendations)

def main():
    try:
        recommender = BookRecommender(n_latent_factors=50)
        recommender.load_data('Books.csv', 'Ratings.csv')
        recommender.prepare_matrix(min_book_ratings=10, min_user_ratings=5)
        recommender.train_model()

        valid_user = list(recommender.user_mapper.keys())[0]
        recommendations = recommender.get_recommendations(valid_user)

        print(f"\nTop recommendations for user {valid_user}:")
        print("=" * 80)
        for idx, row in recommendations.iterrows():
            print(f"\n{idx + 1}. {row['Title']}")
            print(f"   Author: {row['Author']}")
            print(f"   ISBN: {row['ISBN']}")
            print(f"   Predicted Rating: {row['Predicted Rating']}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'seaborn'