In [1]:
# import library
import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
import plotly.express as px
import os

In [2]:
# load data

books = pd.read_csv("../data/processed/clean_books.csv")
interactions = pd.read_csv("../data/processed/user_interactions.csv")
reading = pd.read_csv("../data/processed/reading_progress.csv")
segments = pd.read_csv("../data/processed/user_segments.csv")

In [3]:
# combine book info and reading activity
reading_full = reading.merge(books, left_on='book_id', right_on='id', how='left')
reading_full = reading_full.merge(segments, on='user_id', how='left')

In [4]:
# Book Distribution by Genre (Bar Chart)
genre_counts = books['genre'].value_counts().reset_index()
genre_counts.columns = ['Genre', 'Book Count']

fig1 = px.bar(genre_counts, x='Genre', y='Book Count',
              color='Genre', title='Book Distribution by Genre')
fig1.show()

In [5]:
# Reading Completion Rates (Histogram)
fig2 = px.histogram(reading, x='completion_rate', nbins=10,
                    title='Distribution of Reading Completion Rates',
                    labels={'completion_rate': 'Completion Rate'})
fig2.show()

In [6]:
# Top-Rated Books (Table)
top_books = books.sort_values('rating', ascending=False).head(10)

top_books[["title","author","rating"]]

Unnamed: 0,title,author,rating
6,The Lord Of The Rings,J.R.R. Tolkien,4.9
1,To Kill A Mockingbird,Harper Lee,4.8
5,Harry Potter And The Sorcerer'S Stone,J.K. Rowling,4.7
2,1984,George Orwell,4.6
3,Pride And Prejudice,Jane Austen,4.4
7,Dune,Frank Herbert,4.3
0,The Great Gatsby,F. Scott Fitzgerald,4.2
4,The Catcher In The Rye,J.D. Salinger,3.8


In [7]:
# User Activity Over Time (Line Chart)
interactions['timestamp'] = pd.to_datetime(interactions['timestamp'])
daily_activity = interactions.groupby(interactions['timestamp'].dt.date)['action'].count().reset_index()
daily_activity.columns = ['Date', 'Activity Count']

fig4 = px.line(daily_activity, x='Date', y='Activity Count',
               title='User Activity Over Time',
               markers=True)
fig4.show()

In [8]:
# Reading Progress by User Segment (Pie Chart)
segment_summary = segments['segment'].value_counts().reset_index()
segment_summary.columns = ['Segment', 'User Count']

fig5 = px.pie(segment_summary, names='Segment', values='User Count',
              title='Reader Segmentation Breakdown',
              color_discrete_sequence=px.colors.qualitative.Pastel)
fig5.show()