In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_json("../data/books.json")

# Clean missing values
df.fillna({"genre": "Unknown", "average_rating": 0, "ratings_count": 0}, inplace=True)

# Most common genres
genre_counts = df["genre"].value_counts()
genre_counts.plot(kind="bar", title="Most Common Genres")
plt.savefig("../assets/charts/genre_counts.png")

# Highest rated genres
avg_ratings = df.groupby("genre")["average_rating"].mean().sort_values(ascending=False)
avg_ratings.plot(kind="bar", title="Average Ratings by Genre")
plt.savefig("../assets/charts/avg_ratings.png")

# Weighted Bayesian rating
m = 50
C = df["average_rating"].mean()
v = df["ratings_count"]
R = df["average_rating"]
df["weighted_rating"] = (v / (v + m)) * R + (m / (v + m)) * C

# Scatter plot: price vs rating
df.plot.scatter(x="price_usd", y="average_rating", title="Price vs Rating")
plt.savefig("../assets/charts/price_vs_rating.png")

# Books released by year
df.groupby("publication_year").size().plot(kind="line", title="Books Released by Year")
plt.savefig("../assets/charts/books_by_year.png")

# Checked in vs available
df["available"].value_counts().plot(kind="pie", title="Checked In vs Available")
plt.savefig("../assets/charts/available_pie.png")
