Import libraries

In [17]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math

# Import data

In [3]:
books = pd.read_csv('AZ_top_books.csv')
books.head()

Unnamed: 0,Rank,Title,Author,Rating,Format,Copies,Availability,Wait Time,Wait Weeks
0,1,The Women,Kristin Hannah,4.4,EBOOK,25.0,WAIT LIST,6 months,27.0
1,2,Five Total Strangers,Natalie D. Richards,3.5,EBOOK,12.0,AVAILABLE,No wait,0.0
2,3,It Ends With Us,Colleen Hoover,4.3,EBOOK,15.0,WAIT LIST,6 months,27.0
3,4,Funny Story,Emily Henry,4.2,EBOOK,18.0,WAIT LIST,6 months,27.0
4,5,Iron Flame,Rebecca Yarros,4.2,EBOOK,18.0,WAIT LIST,23 weeks,23.0


# Distributions of ratings

Waitlist vs Available

In [4]:
# Histogram of ratings for waitlist books
wl_books = books[books['Availability'] == 'WAIT LIST']
fig_wl = px.histogram(wl_books, x='Rating', title='Histogram of Ratings for Waitlist Books')
fig_wl.show()

In [6]:
# Histogram of ratings for available books
av_books = books[books['Availability'] == 'AVAILABLE']
fig_av = px.histogram(av_books, x='Rating', title='Histogram of Ratings for Available Books')
fig_av.show()

Audiobook vs. Ebook

In [8]:
# Histogram of ratings for audiobooks
audiobooks = books[books['Format'] == 'AUDIOBOOK']
fig_ab = px.histogram(audiobooks, x='Rating', title='Histogram of Ratings for Audiobooks')
fig_ab.show()

In [9]:
# Histogram of ratings for ebooks
ebooks = books[books['Format'] == 'EBOOK']
fig_eb = px.histogram(ebooks, x='Rating', title='Histogram of Ratings for Ebooks')
fig_eb.show()

Difference in ratings over rank

In [12]:
# Rank on X-axis
# Rating on Y-axis
# Color by book type
# Layer a regression line
fig = px.scatter(books, x='Rank', y='Rating', color='Format', trendline='ols')
fig.show()

In [27]:
# RATING DIFFERENCE by RANK
ranks = list(range(1, 241))
diffs = []
for rank in ranks:
    eb_rank = ebooks[ebooks['Rank'] == rank].loc[:, 'Rating'].values[0]
    ab_rank = audiobooks[audiobooks['Rank'] == rank].loc[:, 'Rating'].values[0]
    diff = eb_rank - ab_rank
    diff = round(diff, 4)
    diffs.append(diff)

print(diffs)

[0.5, -0.9, 0.5, 0.0, 0.8, -0.3, -0.1, 0.3, 0.1, 0.6, 0.8, -0.2, -0.3, 0.2, 0.2, 0.6, -0.2, 0.2, 0.0, -0.4, 0.2, -0.9, -0.1, 0.2, 0.1, 0.2, -1.1, 0.0, 0.7, 0.1, -0.1, 0.5, 0.0, -0.2, -0.3, 0.1, 0.1, 0.7, -0.1, 0.0, -0.2, 0.4, -1.1, 0.3, 1.0, -0.4, -0.4, -0.5, 0.9, -0.2, 0.0, -0.1, -0.2, -1.3, 0.1, 0.3, 1.0, -0.5, -1.3, 0.2, 0.1, -0.1, 0.1, 0.9, 0.3, 0.6, 0.5, -0.6, 0.5, 0.7, 0.4, 0.2, 0.1, -0.4, -0.1, 0.4, 0.3, -0.7, -0.4, 0.2, -0.3, 0.3, 0.2, 0.2, 0.0, -1.0, -1.1, 0.7, 0.9, 0.1, -0.2, 0.5, 1.1, 0.9, -0.5, 0.4, -0.7, 0.6, 0.3, -0.1, 0.6, 0.5, 0.2, 0.2, -0.7, -0.1, -0.5, -0.3, 0.1, -0.1, -0.2, 0.6, 0.2, 0.2, -1.4, 0.5, -0.2, -0.5, -0.2, -0.5, -0.6, -0.2, 0.1, 0.4, -0.2, -0.2, -0.7, 0.1, 0.7, -0.6, 0.9, -0.2, 0.4, 0.6, 0.8, 0.6, -0.4, 0.2, 0.2, -0.9, 0.1, 0.0, -0.6, 0.3, -0.5, -0.3, 0.6, 0.0, 0.7, 0.3, 0.2, -0.7, -0.6, 0.4, -0.1, -0.2, 0.3, 0.2, -1.4, -0.4, -0.3, 0.7, 0.0, -0.1, 0.6, 0.5, -0.4, 0.1, 0.5, 1.3, -0.2, 0.2, 0.6, -0.2, 0.0, -0.6, 0.7, -1.0, -0.1, 0.0, 0.5, 0.9, -0.7, 0.3, 0.4

In [29]:
rank_diff = pd.DataFrame({'Rank': ranks, 'Rating Difference': diffs})
fig_rd = px.line(rank_diff, x='Rank', y='Rating Difference', title='Rating Difference by Rank')
fig_rd.show()

# Wait time

Wait time as rank increases

Unnamed: 0,Rank,Title,Author,Rating,Format,Copies,Availability,Wait Time,Wait Weeks
0,1,The Women,Kristin Hannah,4.4,EBOOK,25.0,WAIT LIST,6 months,27.0
240,1,Hillbilly Elegy,J. D. Vance,3.9,AUDIOBOOK,10.0,WAIT LIST,6 months,27.0


Wait time as rating increases

Wait time as number of copies increases