# AI-Book-Recommender: Data Exploration
This notebook loads the datasets and performs exploratory data analysis (EDA) to understand the structure and content of the books, users, and ratings datasets.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set seaborn style
sns.set(style='whitegrid')

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Ensure plots appear inline in Jupyter
%matplotlib inline

In [None]:
# Load datasets
books = pd.read_csv('books.csv', low_memory=False)
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

# Preview datasets
print('Books Dataset:')
print(books.head())
print('\nUsers Dataset:')
print(users.head())
print('\nRatings Dataset:')
print(ratings.head())

In [None]:
# Basic EDA
# 1. Distribution of book ratings
plt.figure(figsize=(8, 6))
sns.histplot(ratings['Book-Rating'], bins=10, kde=False, color='skyblue')
plt.title('Distribution of Book Ratings', fontsize=14)
plt.xlabel('Rating')
plt.ylabel('Count')
plt.xticks(range(0, 11))
plt.tight_layout()
plt.show()

# 2. Top 10 most rated books
popular_counts = ratings['ISBN'].value_counts().head(10).reset_index()
popular_counts.columns = ['ISBN', 'Rating Count']
popular_titles = books[['ISBN', 'Book-Title']]
popular_books = pd.merge(popular_counts, popular_titles, on='ISBN', how='left')

plt.figure(figsize=(12, 6))
sns.barplot(x='Rating Count', y='Book-Title', data=popular_books, palette='viridis')
plt.title('Top 10 Most Rated Books')
plt.xlabel('Number of Ratings')
plt.ylabel('Book Title')
plt.tight_layout()
plt.show()

# 3. Pie chart of active vs inactive users
total_ratings_per_user = ratings['User-ID'].value_counts()
active_users = total_ratings_per_user[total_ratings_per_user > 10].count()
inactive_users = total_ratings_per_user[total_ratings_per_user <= 10].count()

plt.figure(figsize=(6, 6))
plt.pie([active_users, inactive_users], labels=['Active Users (>10 ratings)', 'Inactive Users (<=10 ratings)'], autopct='%1.1f%%', colors=['lightgreen', 'lightcoral'])
plt.title('Active vs Inactive Users')
plt.tight_layout()
plt.show()