In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

# Load data
books = pd.read_csv("data/Books.csv").drop(['Image-URL-S', 'Image-URL-M', 'Image-URL-L'], axis=1)
ratings = pd.read_csv("data/Ratings.csv")[:20000]

# Data cleaning and preprocessing
books = books[books['Year-Of-Publication'].apply(lambda x: x.isnumeric())]
books['Year-Of-Publication'] = books['Year-Of-Publication'].astype(int)

# Visualization of top authors
top_authors = books['Book-Author'].value_counts().head(10)
plt.figure(figsize=(12, 6))
sns.barplot(x=top_authors.index, y=top_authors.values)
plt.xlabel('Author Name')
plt.ylabel('Number of Books')
plt.title('Top 10 Authors by Number of Books')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# User and ISBN encoding
df_rating = pd.merge(ratings, books, on='ISBN', how='left')
user_to_user_encoded = {user: i for i, user in enumerate(df_rating['User-ID'].unique())}
isbn_to_isbn_encoded = {isbn: i for i, isbn in enumerate(df_rating['ISBN'].unique())}
df_rating['user'] = df_rating['User-ID'].map(user_to_user_encoded)
df_rating['book_title'] = df_rating['ISBN'].map(isbn_to_isbn_encoded)

# Model input preparation
x = df_rating[['user', 'book_title']].values
y = df_rating['Book-Rating'].values.astype(np.float32)

# Model definition
class RecommenderNet(keras.Model):
    def __init__(self, num_users, num_books, embedding_size, dropout_rate=0.2, **kwargs):
        super(RecommenderNet, self).__init__(**kwargs)
        self.user_embedding = layers.Embedding(num_users, embedding_size)
        self.book_embedding = layers.Embedding(num_books, embedding_size)
        self.dropout = layers.Dropout(dropout_rate)

    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        book_vector = self.book_embedding(inputs[:, 1])
        concatenated = tf.concat([user_vector, book_vector], axis=-1)
        concatenated = self.dropout(concatenated)
        output = tf.keras.layers.Dense(1, activation='sigmoid')(concatenated)
        return output

# Model instantiation and compilation
model = RecommenderNet(num_users=len(user_to_user_encoded), num_books=len(isbn_to_isbn_encoded), embedding_size=50)
model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(learning_rate=1e-4), metrics=['mean_squared_error'])

# Model training
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=42)
history = model.fit(x=x_train, y=y_train, batch_size=16, epochs=50, validation_data=(x_val, y_val))

# Book recommendations
user_id = df_rating['User-ID'].sample(1).iloc[0]
books_read_by_user = df_rating[df_rating['User-ID'] == user_id]
books_not_read = df_rating[~df_rating['ISBN'].isin(books_read_by_user['ISBN'].values)]['ISBN'].unique()
books_not_read_encoded = np.array([[user_to_user_encoded[user_id], isbn_to_isbn_encoded[book]] for book in books_not_read])
ratings_model = model.predict(books_not_read_encoded).flatten()
top_ratings_indices = ratings_model.argsort()[-10:][::-1]
recommended_books = [isbn_to_isbn_encoded[books_not_read[i]] for i in top_ratings_indices]

# Display recommendations
book_df_rows = books[books['ISBN'].isin(books_read_by_user['ISBN'])].head(10)
recommended_book_rows = books[books['ISBN'].isin(recommended_books)].head(10)

output_columns = ['Book Title', 'Book Author']
df_book_read_by_user = pd.DataFrame(book_df_rows[['Book-Title', 'Book-Author']].values, columns=output_columns)
df_recommended_books = pd.DataFrame(recommended_book_rows[['Book-Title', 'Book-Author']].values, columns=output_columns)

print("Showing recommendations for user: {}".format(user_id))
print("===" * 9)
print("Books highly rated by the user:")
print("----" * 8)
print(df_book_read_by_user)
print("----" * 8)
print("Top 10 recommended books:")
print("----" * 8)
print(df_recommended_books)

ModuleNotFoundError: No module named 'seaborn'