## My older brother passed away at the end of November, 2020. As kids born in the early 70s we literally grew up with Star Wars. And so to honour him and the bond we shared with Star Wars, when 2021 rolled around I made a promise to myself that I would only read Star Wars books for the entire year. This notebook is a way for me to keep track of this promise. 

In [1]:
import pandas as pd
import json
from IPython.core.display import HTML

In [2]:
with open('book_data.json') as f:
  books_data = json.load(f)


In [3]:
# Converting links to html tags
def path_to_image_html(path):
    return '<img src="'+ path + '" width="240" >'


In [4]:
df_books = pd.DataFrame(data=books_data['books'])

In [5]:
df_books

Unnamed: 0,title,author,cover,pages,timeline,era,storyline
0,Tarkin,James Luceno,https://images-na.ssl-images-amazon.com/images...,354,14 BBY,Reign of the Empire,Canon
1,Dark Lord: The Rise of Darth Vader,James Luceno,https://d188rgcu4zozwl.cloudfront.net/content/...,369,19 BBY,Reign of the Empire,Legends
2,Revan,Drew Karpyshyn,https://images-na.ssl-images-amazon.com/images...,368,3950 BBY,Old Republic,Legends
3,Darth Bane: Path of Destruction,Drew Karpyshyn,https://images-na.ssl-images-amazon.com/images...,324,1003 BBY - 1000 BBY,Old Republic,Legends
4,Darth Bane: Rule of Two,Drew Karpyshyn,https://images-na.ssl-images-amazon.com/images...,352,1000 BBY - 990 BBY,Old Republic,Legends
5,Darth Bane: Dynasty of Evil,Drew Karpyshyn,https://images-na.ssl-images-amazon.com/images...,336,980 BBY,Old Republic,Legends
6,Heir to the Empire,Timothy Zahn,https://d188rgcu4zozwl.cloudfront.net/content/...,416,9 ABY,New Republic,Legends
7,Dark Force Rising,Timothy Zahn,https://d188rgcu4zozwl.cloudfront.net/content/...,450,9 ABY,New Republic,Legends
8,The Last Command,Timothy Zahn,https://d188rgcu4zozwl.cloudfront.net/content/...,595,9 ABY,New Republic,Legends
9,Resistance Reborn,Rebecca Roanhorse,https://images-na.ssl-images-amazon.com/images...,432,34 ABY,Rise of the First Order,Canon


In [6]:
# let's change the layout before moving on

df_books = df_books[['cover','title','author','pages','timeline','era','storyline']]

In [7]:
# Rendering the images in the dataframe using the HTML method with formatters

HTML(df_books.to_html(escape=False,formatters=dict(cover=path_to_image_html)))

Unnamed: 0,cover,title,author,pages,timeline,era,storyline
0,,Tarkin,James Luceno,354,14 BBY,Reign of the Empire,Canon
1,,Dark Lord: The Rise of Darth Vader,James Luceno,369,19 BBY,Reign of the Empire,Legends
2,,Revan,Drew Karpyshyn,368,3950 BBY,Old Republic,Legends
3,,Darth Bane: Path of Destruction,Drew Karpyshyn,324,1003 BBY - 1000 BBY,Old Republic,Legends
4,,Darth Bane: Rule of Two,Drew Karpyshyn,352,1000 BBY - 990 BBY,Old Republic,Legends
5,,Darth Bane: Dynasty of Evil,Drew Karpyshyn,336,980 BBY,Old Republic,Legends
6,,Heir to the Empire,Timothy Zahn,416,9 ABY,New Republic,Legends
7,,Dark Force Rising,Timothy Zahn,450,9 ABY,New Republic,Legends
8,,The Last Command,Timothy Zahn,595,9 ABY,New Republic,Legends
9,,Resistance Reborn,Rebecca Roanhorse,432,34 ABY,Rise of the First Order,Canon


In [8]:
# you can also not use formatters but create the html in the field itself
# the cover image sizes differ so I prefer setting a defined image size like above
# I am chosing not to run this cell since it takes up a lot of space with the huge cover image sizes. =]
# df_books_alt = df_books.copy()
# df_books_alt['cover'] = '<img src="'+ df_books_alt['cover'] + '">'
# HTML(df_books_alt.to_html(escape=False))

## Filtering

In [9]:
# if we wanted to filter our list of books on certain criteria we can simply do this:

df_books[df_books['storyline'] == 'Canon']

Unnamed: 0,cover,title,author,pages,timeline,era,storyline
0,https://images-na.ssl-images-amazon.com/images...,Tarkin,James Luceno,354,14 BBY,Reign of the Empire,Canon
9,https://images-na.ssl-images-amazon.com/images...,Resistance Reborn,Rebecca Roanhorse,432,34 ABY,Rise of the First Order,Canon
10,https://d188rgcu4zozwl.cloudfront.net/content/...,Thrawn,Timothy Zahn,488,11 BBY - 2 BBY,Age of Rebellion,Canon
11,https://d374oxlv7wyffd.cloudfront.net/B07692QX...,Thrawn: Alliances,Timothy Zahn,483,19 BBY - 2 BBY,Age of Rebellion,Canon
12,https://images-na.ssl-images-amazon.com/images...,Thrawn: Treason,Timothy Zahn,496,1 BBY,Age of Rebellion,Canon
16,https://static.wikia.nocookie.net/starwars/ima...,Lost Stars,Claudia Gray,576,11 BBY - 5 ABY,New Republic,Canon
21,https://images-na.ssl-images-amazon.com/images...,Lords of the Sith,Paul S. Kemp,368,14 BBY,Reign of the Empire,Canon
22,https://images-na.ssl-images-amazon.com/images...,Dark Disciple,Christie Golden,400,19 BBY,Fall of the Empire,Canon
24,https://images-na.ssl-images-amazon.com/images...,Aftermath,Chuck Wendig,410,4 ABY,New Republic,Canon
25,https://images-na.ssl-images-amazon.com/images...,Aftermath: Life Debt,Chuck Wendig,480,5 ABY,New Republic,Canon


In [10]:
# however, we can make filtering more fun and interactive by using WIDGETS

import ipywidgets as widgets
from IPython.display import display

In [11]:
ALL = 'ALL'

def unique_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)
    return unique

In [12]:
def dropdown_storyline_eventhandler(change):
    output_storyline.clear_output()
    with output_storyline:
        if (change.new == ALL):
            display(df_books)
        else:
            display(df_books[df_books.storyline == change.new])        
        
dropdown_storyline = widgets.Dropdown(options = unique_sorted_values_plus_ALL(df_books.storyline))

output_storyline = widgets.Output()

dropdown_storyline.observe(dropdown_storyline_eventhandler, names='value')


def dropdown_era_eventhandler(change):
    output_era.clear_output()
    with output_era:
        if (change.new == ALL):
            display(df_books)
        else:
            display(df_books[df_books.era == change.new])        
        
dropdown_era = widgets.Dropdown(options = unique_sorted_values_plus_ALL(df_books.era))

output_era = widgets.Output()

dropdown_era.observe(dropdown_era_eventhandler, names='value')



def dropdown_author_eventhandler(change):
    output_author.clear_output()
    with output_author:
        if (change.new == ALL):
            display(df_books)
        else:
            display(df_books[df_books.author == change.new])        
        
dropdown_author = widgets.Dropdown(options = unique_sorted_values_plus_ALL(df_books.author))

output_author = widgets.Output()

dropdown_author.observe(dropdown_author_eventhandler, names='value')



In [13]:
display(dropdown_storyline)

Dropdown(options=('ALL', 'Canon', 'Legends'), value='ALL')

In [14]:
display(output_storyline)

Output()

In [15]:
display(dropdown_era)

Dropdown(options=('ALL', 'Age of Rebellion', 'Fall of the Empire', 'New Republic', 'Old Republic', 'Rebellion'…

In [16]:
display(output_era)

Output()

In [17]:
display(dropdown_author)

Dropdown(options=('ALL', 'Christie Golden', 'Chuck Wendig', 'Claudia Gray', 'Drew Karpyshyn', 'James Luceno', …

In [18]:
display(output_author)

Output()

## Stats

In [19]:
total_books = df_books['title'].count()
total_pages = df_books['pages'].sum()
print('2021: \nTotal Books Read : ' + str(total_books) + ' \nTotal Pages Read: ' + str(total_pages))

2021: 
Total Books Read : 30 
Total Pages Read: 12692


In [20]:
# let's get an author count
df_authors = pd.DataFrame({'total' : df_books.groupby(['author'])['author'].count()}).reset_index()
df_authors.sort_values(by='total', ascending=False, inplace=True)

In [21]:
df_authors

Unnamed: 0,author,total
13,Timothy Zahn,9
3,Drew Karpyshyn,5
1,Chuck Wendig,3
4,James Luceno,2
9,Paul S. Kemp,2
0,Christie Golden,1
2,Claudia Gray,1
5,Joe Schreiber,1
6,John Jackson Miller,1
7,Kevin Anderson,1


In [22]:
# top 3 authors
df_authors[:3]

Unnamed: 0,author,total
13,Timothy Zahn,9
3,Drew Karpyshyn,5
1,Chuck Wendig,3


In [23]:
# we can also look at timeline, era, storyline

# let's get an author count
df_eras = pd.DataFrame({'total' : df_books.groupby(['era'])['era'].count()}).reset_index()
df_eras.sort_values(by='total', ascending=False, inplace=True)

df_timelines = pd.DataFrame({'total' : df_books.groupby(['timeline'])['timeline'].count()}).reset_index()
df_timelines.sort_values(by='total', ascending=False, inplace=True)

df_storylines = pd.DataFrame({'total' : df_books.groupby(['storyline'])['storyline'].count()}).reset_index()
df_storylines.sort_values(by='total', ascending=False, inplace=True)

In [24]:
df_eras

Unnamed: 0,era,total
3,Old Republic,9
2,New Republic,8
4,Rebellion,4
0,Age of Rebellion,3
5,Reign of the Empire,3
1,Fall of the Empire,1
6,Rise of the First Order,1
7,Rise of the Sith,1


In [25]:
df_storylines

Unnamed: 0,storyline,total
1,Legends,19
0,Canon,11


In [26]:
# to render the dataframe as an html table
#df_books.to_html(escape=False, formatters=dict(cover=path_to_image_html))

# Saving the dataframe as a webpage
df_books.to_html('books_i_have_read_2021.html',escape=False, formatters=dict(cover=path_to_image_html))