In [1]:
import pandas as pd
import json
from IPython.core.display import HTML

In [2]:
with open('book_data.json') as f:
  books_data = json.load(f)


In [3]:
# Converting links to html tags
def path_to_image_html(path):
    return '<img src="'+ path + '" width="240" >'


In [4]:
df_books = pd.DataFrame(data=books_data['books'])

In [5]:
df_books

Unnamed: 0,title,author,cover,pages,main_genre,sub_genre
0,Darth Plagueis,James Luceno,https://images-na.ssl-images-amazon.com/images...,498,Fiction,Science Fiction
1,Dooku: Jedi Lost,Cavan Scott,https://images-na.ssl-images-amazon.com/images...,480,Fiction,Science Fiction
2,Fahrenheit 451,Ray Bradbury,https://images-na.ssl-images-amazon.com/images...,249,Fiction,Science Fiction
3,Master and Apprentice,Claudia Gray,https://images-na.ssl-images-amazon.com/images...,480,Fiction,Science Fiction
4,The Strain,"Guillermo Del Toro, Chuck Hogan",https://images-na.ssl-images-amazon.com/images...,432,Fiction,Horror
5,The Fall,"Guillermo Del Toro, Chuck Hogan",https://images-na.ssl-images-amazon.com/images...,480,Fiction,Horror
6,The Night Eternal,"Guillermo Del Toro, Chuck Hogan",https://images-na.ssl-images-amazon.com/images...,560,Fiction,Horror
7,Zen and the Art of Motorcycle Maintenance,Robert M. Pirsig,https://images-na.ssl-images-amazon.com/images...,464,Fiction,Philosophy


In [6]:
# let's change the layout before moving on

df_books = df_books[['cover','title','author','pages','main_genre','sub_genre']]

In [7]:
# Rendering the images in the dataframe using the HTML method with formatters

HTML(df_books.to_html(escape=False,formatters=dict(cover=path_to_image_html)))

Unnamed: 0,cover,title,author,pages,main_genre,sub_genre
0,,Darth Plagueis,James Luceno,498,Fiction,Science Fiction
1,,Dooku: Jedi Lost,Cavan Scott,480,Fiction,Science Fiction
2,,Fahrenheit 451,Ray Bradbury,249,Fiction,Science Fiction
3,,Master and Apprentice,Claudia Gray,480,Fiction,Science Fiction
4,,The Strain,"Guillermo Del Toro, Chuck Hogan",432,Fiction,Horror
5,,The Fall,"Guillermo Del Toro, Chuck Hogan",480,Fiction,Horror
6,,The Night Eternal,"Guillermo Del Toro, Chuck Hogan",560,Fiction,Horror
7,,Zen and the Art of Motorcycle Maintenance,Robert M. Pirsig,464,Fiction,Philosophy


In [8]:
# you can also not use formatters but create the html in the field itself
# the cover image sizes differ so I prefer setting a defined image size like above
# I am chosing not to run this cell since it takes up a lot of space with the huge cover image sizes. =]
# df_books_alt = df_books.copy()
# df_books_alt['cover'] = '<img src="'+ df_books_alt['cover'] + '">'
# HTML(df_books_alt.to_html(escape=False))

## Filtering

In [8]:
# if we wanted to filter our list of books on certain criteria we can simply do this:

df_books[df_books['main_genre'] == 'Fiction']

Unnamed: 0,cover,title,author,pages,main_genre,sub_genre
0,https://images-na.ssl-images-amazon.com/images...,Darth Plagueis,James Luceno,498,Fiction,Science Fiction
1,https://images-na.ssl-images-amazon.com/images...,Dooku: Jedi Lost,Cavan Scott,480,Fiction,Science Fiction
2,https://images-na.ssl-images-amazon.com/images...,Fahrenheit 451,Ray Bradbury,249,Fiction,Science Fiction
3,https://images-na.ssl-images-amazon.com/images...,Master and Apprentice,Claudia Gray,480,Fiction,Science Fiction
4,https://images-na.ssl-images-amazon.com/images...,The Strain,"Guillermo Del Toro, Chuck Hogan",432,Fiction,Horror
5,https://images-na.ssl-images-amazon.com/images...,The Fall,"Guillermo Del Toro, Chuck Hogan",480,Fiction,Horror
6,https://images-na.ssl-images-amazon.com/images...,The Night Eternal,"Guillermo Del Toro, Chuck Hogan",560,Fiction,Horror
7,https://images-na.ssl-images-amazon.com/images...,Zen and the Art of Motorcycle Maintenance,Robert M. Pirsig,464,Fiction,Philosophy


In [9]:
# however, we can make filtering more fun and interactive by using WIDGETS

import ipywidgets as widgets
from IPython.display import display

In [10]:
ALL = 'ALL'

def unique_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)
    return unique

In [11]:
def dropdown_main_genre_eventhandler(change):
    output_main_genre.clear_output()
    with output_main_genre:
        if (change.new == ALL):
            display(df_books)
        else:
            display(df_books[df_books.main_genre == change.new])        
        
dropdown_main_genre = widgets.Dropdown(options = unique_sorted_values_plus_ALL(df_books.main_genre))

output_main_genre = widgets.Output()

dropdown_main_genre.observe(dropdown_main_genre_eventhandler, names='value')


def dropdown_sub_genre_eventhandler(change):
    output_sub_genre.clear_output()
    with output_sub_genre:
        if (change.new == ALL):
            display(df_books)
        else:
            display(df_books[df_books.sub_genre == change.new])        
        
dropdown_sub_genre = widgets.Dropdown(options = unique_sorted_values_plus_ALL(df_books.sub_genre))

output_sub_genre = widgets.Output()

dropdown_sub_genre.observe(dropdown_sub_genre_eventhandler, names='value')



def dropdown_author_eventhandler(change):
    output_author.clear_output()
    with output_author:
        if (change.new == ALL):
            display(df_books)
        else:
            display(df_books[df_books.author == change.new])        
        
dropdown_author = widgets.Dropdown(options = unique_sorted_values_plus_ALL(df_books.author))

output_author = widgets.Output()

dropdown_author.observe(dropdown_author_eventhandler, names='value')



In [12]:
display(dropdown_main_genre)

Dropdown(options=('ALL', 'Fiction'), value='ALL')

In [13]:
display(output_main_genre)

Output()

In [14]:
display(dropdown_sub_genre)

Dropdown(options=('ALL', 'Horror', 'Philosophy', 'Science Fiction'), value='ALL')

In [15]:
display(output_sub_genre)

Output()

In [16]:
display(dropdown_author)

Dropdown(options=('ALL', 'Cavan Scott', 'Claudia Gray', 'Guillermo Del Toro, Chuck Hogan', 'James Luceno', 'Ra…

In [17]:
display(output_author)

Output()

## Stats

In [18]:
total_books = df_books['title'].count()
total_pages = df_books['pages'].sum()
print('2022: \nTotal Books Read : ' + str(total_books) + ' \nTotal Pages Read: ' + str(total_pages))

2022: 
Total Books Read : 8 
Total Pages Read: 3643


In [19]:
# let's get an author count
df_authors = pd.DataFrame({'total' : df_books.groupby(['author'])['author'].count()}).reset_index()
df_authors.sort_values(by='total', ascending=False, inplace=True)

In [20]:
df_authors

Unnamed: 0,author,total
2,"Guillermo Del Toro, Chuck Hogan",3
0,Cavan Scott,1
1,Claudia Gray,1
3,James Luceno,1
4,Ray Bradbury,1
5,Robert M. Pirsig,1


In [21]:
# top 3 authors
df_authors[:3]

Unnamed: 0,author,total
2,"Guillermo Del Toro, Chuck Hogan",3
0,Cavan Scott,1
1,Claudia Gray,1


In [22]:
# we can also look at timeline, era, storyline

# let's get an author count
df_sub_genres = pd.DataFrame({'total' : df_books.groupby(['sub_genre'])['sub_genre'].count()}).reset_index()
df_sub_genres.sort_values(by='total', ascending=False, inplace=True)

df_main_genres = pd.DataFrame({'total' : df_books.groupby(['main_genre'])['main_genre'].count()}).reset_index()
df_main_genres.sort_values(by='total', ascending=False, inplace=True)

In [23]:
df_main_genres

Unnamed: 0,main_genre,total
0,Fiction,8


In [24]:
df_sub_genres

Unnamed: 0,sub_genre,total
2,Science Fiction,4
0,Horror,3
1,Philosophy,1


In [26]:
# to render the dataframe as an html table
#df_books.to_html(escape=False, formatters=dict(cover=path_to_image_html))

# Saving the dataframe as a webpage
df_books.to_html('books_i_have_read_2022.html',escape=False, formatters=dict(cover=path_to_image_html))