In [1]:
import pandas as pd
import numpy as np
import math
from bokeh.plotting import figure, output_file, show,save
from bokeh.palettes import brewer, d3, inferno, cividis, viridis
from bokeh.models import ColumnDataSource, WheelZoomTool, HoverTool, CategoricalColorMapper, LinearColorMapper, ContinuousColorMapper, Legend, LegendItem, Title
from bokeh.core.properties import value
from bokeh.layouts import row, column,layout

## Load Data

In [2]:
books = pd.read_csv('../data/raw/books.csv')
ratings = pd.read_csv('../data/raw/ratings.csv')
book_tags = pd.read_csv('../data/raw/book_tags.csv')
tags = pd.read_csv('../data/raw/tags.csv')
to_read = pd.read_csv('../data/raw/to_read.csv')

In [3]:
books.columns

Index(['book_id', 'goodreads_book_id', 'best_book_id', 'work_id',
       'books_count', 'isbn', 'isbn13', 'authors', 'original_publication_year',
       'original_title', 'title', 'language_code', 'average_rating',
       'ratings_count', 'work_ratings_count', 'work_text_reviews_count',
       'ratings_1', 'ratings_2', 'ratings_3', 'ratings_4', 'ratings_5',
       'image_url', 'small_image_url'],
      dtype='object')

## Add Features

In [4]:
books['average_rating']=  (5*books['ratings_5']+4*books['ratings_4']+3*books['ratings_3']+2*books['ratings_2']+1*books['ratings_1'])/\
                         (books['ratings_5']+books['ratings_4']+books['ratings_3']+books['ratings_2']+books['ratings_1'])

In [5]:
books = books.loc[books['original_publication_year'].notnull()]

In [6]:
books['main_author'] = books.apply(lambda x: x['authors'].split(',')[0], axis=1)

In [7]:
books = books.sort_values(['main_author','original_publication_year'])
books['nth_release'] = books.groupby('main_author').cumcount()+1

In [8]:
books = books.sort_values(['original_publication_year','ratings_count'], ascending=False)

## Merge Data

In [9]:
tags_df = book_tags.merge(tags)

In [10]:
ratings_df = ratings.merge(books[['book_id','title','authors','original_publication_year']])

## Viz 1
Are newer books more popular (# of ratings) than older books?

In [11]:
books1_df = books
books1_df = books1_df.loc[(books1_df['original_publication_year'] != 2017) & (books1_df['original_publication_year'] >= 1800)]

In [12]:
total_by_year = books1_df.groupby('original_publication_year')['ratings_count'].sum().reset_index()
rank_1 = books1_df.groupby('original_publication_year')['title'].nth(0).reset_index().rename({'title':'rank1'},axis='columns')
rank_2 = books1_df.groupby('original_publication_year')['title'].nth(1).reset_index().rename({'title':'rank2'},axis='columns')
rank_3 = books1_df.groupby('original_publication_year')['title'].nth(2).reset_index().rename({'title':'rank3'},axis='columns')
rank_4 = books1_df.groupby('original_publication_year')['title'].nth(3).reset_index().rename({'title':'rank4'},axis='columns')
rank_5 = books1_df.groupby('original_publication_year')['title'].nth(4).reset_index().rename({'title':'rank5'},axis='columns')
rankings = total_by_year.merge(rank_1,how='left').merge(rank_2,how='left').merge(rank_3,how='left').merge(rank_4,how='left').merge(rank_5,how='left')

In [13]:
rank_1_count = rank_1.merge(books1_df[['original_publication_year','title','ratings_count']],right_on=['original_publication_year','title'],left_on=['original_publication_year','rank1'],how='inner')[['original_publication_year','title','ratings_count']].set_index('original_publication_year')
rank_2_count = rank_2.merge(books1_df[['original_publication_year','title','ratings_count']],right_on=['original_publication_year','title'],left_on=['original_publication_year','rank2'],how='inner')[['original_publication_year','title','ratings_count']].set_index('original_publication_year')
rank_3_count = rank_3.merge(books1_df[['original_publication_year','title','ratings_count']],right_on=['original_publication_year','title'],left_on=['original_publication_year','rank3'],how='inner')[['original_publication_year','title','ratings_count']].set_index('original_publication_year')
rank_4_count = rank_4.merge(books1_df[['original_publication_year','title','ratings_count']],right_on=['original_publication_year','title'],left_on=['original_publication_year','rank4'],how='inner')[['original_publication_year','title','ratings_count']].set_index('original_publication_year')
rank_5_count = rank_5.merge(books1_df[['original_publication_year','title','ratings_count']],right_on=['original_publication_year','title'],left_on=['original_publication_year','rank5'],how='inner')[['original_publication_year','title','ratings_count']].set_index('original_publication_year')

In [14]:
ratings_count_df = pd.concat([total_by_year.set_index('original_publication_year')['ratings_count'],rank_1_count[['title','ratings_count']],rank_2_count[['title','ratings_count']],rank_3_count[['title','ratings_count']],rank_4_count[['title','ratings_count']],rank_5_count[['title','ratings_count']]],axis=1,join='outer')
ratings_count_df['title'] = ratings_count_df['title'].fillna('N/A')
ratings_count_df['ratings_count'] = ratings_count_df['ratings_count'].fillna(0)

In [15]:
other_books = ratings_count_df.values.transpose()[0]
for i in range(1,6):
    other_books = other_books-ratings_count_df.values.transpose()[2*i]

In [16]:
rank = ['Other books','#1 most popular book','#2 most popular book','#3 most popular book','#4 most popular book','#5 most popular book']
years = total_by_year['original_publication_year'].unique()
data1 = ColumnDataSource(data={
    'years': years,
    'Other books': other_books,
    '#1 most popular book': ratings_count_df.values.transpose().tolist()[2],
    '#2 most popular book': ratings_count_df.values.transpose().tolist()[4],
    '#3 most popular book': ratings_count_df.values.transpose().tolist()[6],
    '#4 most popular book': ratings_count_df.values.transpose().tolist()[8],
    '#5 most popular book': ratings_count_df.values.transpose().tolist()[10],
    'total_ratings': ratings_count_df.values.transpose().tolist()[0],
    'rank1': ratings_count_df.values.transpose().tolist()[1],
    'rank2': ratings_count_df.values.transpose().tolist()[3],
    'rank3': ratings_count_df.values.transpose().tolist()[5],
    'rank4': ratings_count_df.values.transpose().tolist()[7],
    'rank5': ratings_count_df.values.transpose().tolist()[9],
})

In [17]:
colors1 = brewer['Spectral'][6]

In [18]:
p1 = figure(plot_width=1600, plot_height=600, title='Visualization 1: Total Ratings by Year')
p1.vbar_stack(rank,x='years', width=1, source=data1, color=colors1, legend=[value(x) for x in rank])

[GlyphRenderer(id='1053', ...),
 GlyphRenderer(id='1066', ...),
 GlyphRenderer(id='1080', ...),
 GlyphRenderer(id='1094', ...),
 GlyphRenderer(id='1108', ...),
 GlyphRenderer(id='1122', ...)]

In [19]:
p1.add_tools(HoverTool(
    tooltips=[
        ('Year','@years'),
        ('Number of Ratings','@total_ratings'),
        ('#1 book','@rank1'),
        ('#2 book','@rank2'),
        ('#3 book','@rank3'),
        ('#4 book','@rank4'),
        ('#5 book','@rank5')
    ],
    formatters={
        'Year' : 'datetime',
        'Number of Ratings' : 'numeral',
        '#1 book': 'printf',
        '#2 book': 'printf',
        '#3 book': 'printf',
        '#4 book': 'printf',
        '#5 book': 'printf',
    },
    mode='mouse'
))
p1.legend.location = "top_left"
p1.legend.orientation = "vertical"
p1.legend.label_text_font_size = '8pt'
p1.title.text_font_size = '20pt'
p1.xaxis.axis_label = 'Publication Year'
p1.yaxis.axis_label = 'Total Ratings'
p1.left[0].formatter.use_scientific = False

## Viz 2
What are the top 300 most read books and how do they tend to score?

In [20]:
for i in range(1,11):
    print(str(np.round(i*0.1,2))+'th Quantile: '+str(books['original_publication_year'].quantile(q=i*0.1)))

0.1th Quantile: 1957.0
0.2th Quantile: 1984.0
0.3th Quantile: 1995.0
0.4th Quantile: 2000.0
0.5th Quantile: 2004.0
0.6th Quantile: 2007.0
0.7th Quantile: 2010.0
0.8th Quantile: 2012.0
0.9th Quantile: 2013.0
1.0th Quantile: 2017.0


In [21]:
books2_df = books
books2_df['publication_date_revised'] = books2_df['original_publication_year']
books2_df.loc[books2_df['publication_date_revised'] < 1800,'publication_date_revised'] = 1800
books2_df = books2_df.loc[(books2_df['publication_date_revised'] != 2017) & (books2_df['publication_date_revised'] >= 1800)]

In [22]:
bins = [(-5000,1800,'Pre 19th Century'),(1800,1900,'19th Century'),(1900,1946,'Early 20th Century'),
        (1946,1980,'Mid 20th Century'),(1980,2000,'Late 20th Century'),(2000,2020,'21st Century')]

for s,e,r in bins:
    books2_df.loc[(books2_df['original_publication_year'] >= s) & (books2_df['original_publication_year'] < e),'date_bins'] = r

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [23]:
books2_df_300 = books2_df.nlargest(300,'ratings_count')
books2_df_300['count_size'] = books2_df_300['ratings_count'].apply(lambda x: x/100000)
books2_df_300.loc[books2_df_300['count_size'] < 3, 'count_size'] = 3

In [24]:
colors2 = brewer['Set2'][6]
color_map2 = CategoricalColorMapper(factors=books2_df['date_bins'].unique(),palette=colors2)

In [25]:
data2 = ColumnDataSource(books2_df_300)

In [26]:
books2_df_30 = books2_df.nlargest(20,'ratings_count')

In [27]:
p2 = figure(plot_width=1600, plot_height=600, x_range=(1790,2050), title='Visualization 2: Top 300 Books by Year and Rating')
p2_circles = p2.circle('publication_date_revised', 'average_rating', source=data2, size='count_size', line_width=2, color={'field': 'date_bins', 'transform': color_map2}, legend='date_bins')
p2_text = p2.text(books2_df_30['publication_date_revised'], books2_df_30['average_rating'], books2_df_30['title'], x_offset=3, y_offset= 3, text_font_size= '8pt')

In [28]:
p2.add_tools(HoverTool(
    renderers=[p2_circles],
    tooltips=[
        ('Book', '@title'),
        ('Author', '@main_author'),
        ('Year','@original_publication_year'),
        ('Number of Ratings','@ratings_count'),
        ('Average Rating','@average_rating'),
    ],
    formatters={
        'Book': 'printf',
        'Author': 'printf',
        'Year' : 'datetime',
        'Number of Ratings' : 'printf',
        'Average Rating' : 'printf'
    },
    mode='mouse'
))
p2.legend.location = "top_left"
p2.legend.orientation = "vertical"
p2.legend.label_text_font_size = '8pt'
p2.title.text_font_size = '20pt'
p2.xaxis.axis_label = 'Publication Year'
p2.yaxis.axis_label = 'Average Rating'

## Viz 3
What is the mean and variance of book ratings by period? In other words, are classics better regarded than contemporary books?

In [29]:
books3_df = books

In [30]:
bins = [(-5000,1800,'Pre 19th Century'),(1800,1900,'19th Century'),(1900,1946,'Early 20th Century'),
        (1946,1980,'Mid 20th Century'),(1980,2000,'Late 20th Century'),(2000,2020,'21st Century')]

for s,e,r in bins:
    books3_df.loc[(books3_df['original_publication_year'] >= s) & (books3_df['original_publication_year'] < e),'date_bins'] = r

In [31]:
period = ['Pre 19th Century','19th Century','Early 20th Century',
          'Mid 20th Century','Late 20th Century','21st Century']
#period = period[::-1]

In [32]:
def build_quantiles(df):
    ## build quantiles
    groups = df.groupby('date_bins')
    q1 = groups.quantile(q=0.25)
    q2 = groups.quantile(q=0.5)
    q3 = groups.quantile(q=0.75)
    iqr = q3 - q1
    upper = q3 + 1.5*iqr
    lower = q1 - 1.5*iqr
    upper['original_publication_year'] = q3['original_publication_year']
    lower['original_publication_year'] = q3['original_publication_year']
    ## build outliers
    def outliers(group):
        cat = group.name
        return group[(group.average_rating > upper.loc[cat]['average_rating']) | (group.average_rating < lower.loc[cat]['average_rating'])]['average_rating']
    out = groups.apply(outliers).dropna()
    if not out.empty:
        outx = []
        outy = []
    for keys in out.index:
        outx.append(keys[0])
        outy.append(out.loc[keys[0]].loc[keys[1]])
    ## get qmin and qmax
    qmin = groups.quantile(q=0.00)
    qmax = groups.quantile(q=1.00)
    upper.average_rating = [min([x,y]) for (x,y) in zip(list(qmax.loc[:,'average_rating']),upper.average_rating)]
    lower.average_rating = [max([x,y]) for (x,y) in zip(list(qmin.loc[:,'average_rating']),lower.average_rating)]
    ## build quantile_dict
    value_dict = {
                    'q1': q1.sort_values('original_publication_year',ascending=True),
                    'q2': q2.sort_values('original_publication_year',ascending=True),
                    'q3': q3.sort_values('original_publication_year',ascending=True),
                    'iqr': iqr.sort_values('original_publication_year',ascending=True),
                    'upper': upper.sort_values('original_publication_year',ascending=True),
                    'lower': lower.sort_values('original_publication_year',ascending=True),
                    'out': out,
                    'outx': outx,
                    'outy': outy
                    }
    return value_dict

In [33]:
def create_plot(p3, values, period):
    p3.segment(period, values['upper'].average_rating, period, values['q3'].average_rating, line_color="black")
    p3.segment(period, values['lower'].average_rating, period, values['q1'].average_rating, line_color="black")
    p3.vbar(period, 0.25, values['q2'].average_rating, values['q3'].average_rating, fill_color="#32CD32", line_color="black")
    p3.vbar(period, 0.25, values['q1'].average_rating, values['q2'].average_rating, fill_color="#FFC0CB", line_color="black")
    p3.rect(period, values['lower'].average_rating, 0.3, 0.01, line_color="black")
    p3.rect(period, values['upper'].average_rating, 0.3, 0.01, line_color="black")
    if not values['out'].empty:
        p3.circle(values['outx'], values['outy'], size=6, color="#F38630", fill_alpha=0.6)
    p3.grid.grid_line_width = 1
    p3.xaxis.major_label_text_font_size="9pt"
    p3.title.text_font_size = '16pt'
    p3.xaxis.axis_label = 'Periods'
    p3.xaxis.major_label_orientation = math.pi/4
    p3.yaxis.axis_label = 'Average Rating'
    p3.ygrid.minor_grid_line_color = 'navy'
    p3.ygrid.minor_grid_line_alpha = 0.1

In [34]:
p3_300 = figure(plot_width=400, plot_height=600, tools="", x_range=period, y_range=(2.5, 5), toolbar_location=None, title='Top 300 Books')
p3_1500 = figure(plot_width=400, plot_height=600, tools="", x_range=period, y_range=(2.5, 5), toolbar_location=None, title='Top 1500 Books')
p3_10000 = figure(plot_width=400, plot_height=600, tools="", x_range=period, y_range=(2.5, 5), toolbar_location=None, title='Top 10000 Books')

In [35]:
values_300 = build_quantiles(books3_df.nlargest(300,'ratings_count'))
values_1500 = build_quantiles(books3_df.nlargest(1500,'ratings_count'))
values_10000 = build_quantiles(books3_df.nlargest(10000,'ratings_count'))

In [36]:
create_plot(p3_300,values_300,period)
create_plot(p3_1500,values_1500,period)
create_plot(p3_10000,values_10000,period)

In [37]:
p2.add_layout(Title(text='Visualization 3: Distribution of Rating by Period', text_font_size='20pt'), 'below')

In [38]:
p3_all = row([p3_300,p3_1500,p3_10000])

## Viz 4


In [39]:
books4_df = books

In [40]:
top_authors4 = books.groupby('main_author')['ratings_count'].sum().sort_values(ascending=False)[0:300]
books4_df = books4_df.loc[books4_df['main_author'].isin(top_authors4.index)]

In [41]:
ratings_count_4 = books4_df.groupby('main_author')['ratings_count'].sum().reset_index()
releases_4 = books4_df.groupby('main_author')['nth_release'].max().reset_index()
releases_4['num_releases'] = releases_4['nth_release'].apply(lambda x: str(x)+' releases')
releases_4.loc[releases_4['nth_release'] > 14, 'num_releases'] = '15+ releases'
releases_4['num_releases'] = releases_4['num_releases'].astype(str)
avg_rating_4 = books4_df.groupby('main_author')['average_rating'].mean().reset_index()
earliest_book_4 = books4_df.groupby('main_author')['original_publication_year'].min().reset_index()

In [42]:
books4_df = ratings_count_4.merge(releases_4).merge(avg_rating_4).merge(earliest_book_4)
books4_df = books4_df.sort_values('nth_release')
books4_df['count_size'] = books4_df['ratings_count']/100000
books4_df.loc[books4_df['count_size'] < 3, 'count_size'] = 3

In [43]:
books4_df_30 = books4_df.nlargest(30,'ratings_count')

In [44]:
data4 = ColumnDataSource(books4_df)

In [45]:
color_map4 = CategoricalColorMapper(factors=[str(i)+' releases' for i in range(1,15)]+['15+ releases'], palette=viridis(22)[::-1])

In [46]:
p4 = figure(plot_width=1600, plot_height=600, x_range=(1800,2020), title='Visualization 4: Most Popular Authors')

In [47]:
p4_circle = p4.circle(x='original_publication_year',y='average_rating', source=data4, color={'field': 'num_releases', 'transform': color_map4} ,size='count_size',legend='num_releases')
p4_text = p4.text(books4_df_30['original_publication_year'], books4_df_30['average_rating'], books4_df_30['main_author'], x_offset=3, y_offset= 3, text_font_size= '8pt')

In [48]:
p4.add_tools(HoverTool(
    renderers=[p4_circle],
    tooltips=[
        ('Author', '@main_author'),
        ('Year of First Publication','@original_publication_year'),
        ('Number of Books','@nth_release'),
        ('Number of Ratings','@average_rating'),
        ('Average Rating','@average_rating'),
    ],
    formatters={
        'Author': 'printf',
        'Year of First Publication' : 'datetime',
        'Number of Ratings' : 'printf',
        'Average Rating' : 'printf'
    },
    mode='mouse'
))
p4.legend.location = "top_left"
p4.legend.orientation = "vertical"
p4.legend.label_text_font_size = '8pt'
p4.xaxis.major_label_text_font_size="9pt"
p4.title.text_font_size = '20pt'
p4.xaxis.axis_label = 'Year of First Publication'
p4.yaxis.axis_label = 'Average Rating of Books'
p4.xgrid.minor_grid_line_color = 'navy'
p4.xgrid.minor_grid_line_alpha = 0.1

## Viz 5
For authors with multiple books, how does their popularity and ratings change over time?

In [49]:
books5_df = books

In [50]:
authors_filtered = books5_df.loc[(books5_df['nth_release'] == 2) & (books5_df['main_author'] != 'Anonymous')]

In [51]:
top_authors_19th = books.loc[(books['original_publication_year'] >= 1800) & (books['original_publication_year'] < 1900) & (books['main_author'].isin(authors_filtered['main_author']))].groupby('main_author')['ratings_count'].sum().sort_values(ascending=False)[0:12]
top_authors_e20th = books.loc[(books['original_publication_year'] >= 1900) & (books['original_publication_year'] < 1946) & (books['main_author'].isin(authors_filtered['main_author']))].groupby('main_author')['ratings_count'].sum().sort_values(ascending=False)[0:6]
top_authors_m20th = books.loc[(books['original_publication_year'] >= 1946) & (books['original_publication_year'] < 1980) & (books['main_author'].isin(authors_filtered['main_author']))].groupby('main_author')['ratings_count'].sum().sort_values(ascending=False)[0:5]
top_authors_l20th = books.loc[(books['original_publication_year'] >= 1980) & (books['original_publication_year'] < 2000) & (books['main_author'].isin(authors_filtered['main_author']))].groupby('main_author')['ratings_count'].sum().sort_values(ascending=False)[0:5]
top_authors_21st = books.loc[(books['original_publication_year'] >= 2000) & (books['original_publication_year'] < 2020) & (books['main_author'].isin(authors_filtered['main_author']))].groupby('main_author')['ratings_count'].sum().sort_values(ascending=False)[0:5]

In [52]:
top_authors5 = pd.concat([top_authors_19th,top_authors_e20th,top_authors_m20th,top_authors_l20th,top_authors_21st], axis=0)

In [53]:
authors = books5_df.loc[books5_df['main_author'].isin(top_authors5.index),'main_author']

In [54]:
books5_df = books5_df.loc[books5_df['main_author'].isin(authors)][['main_author','title','original_publication_year','ratings_count','average_rating','nth_release']]
books5_df = books5_df.sort_values(['original_publication_year','nth_release','main_author'])
books5_df = books5_df.drop_duplicates(['main_author','original_publication_year'], keep='first')

In [55]:
publication_years_5 = []
ratings_count_5 = []
main_author_5 = []
title_5 = []
mostpop_5 = []
for author in books5_df['main_author'].unique():
    year_list = books5_df.loc[books5_df['main_author'] == author,'original_publication_year'].tolist()
    publication_years_5.append(year_list)
    count_list = books5_df.loc[books5_df['main_author'] == author,'ratings_count'].tolist()
    ratings_count_5.append(count_list)
    author_list = [author for x in count_list]
    main_author_5.append(author_list)
    title_list = books5_df.loc[books5_df['main_author'] == author,'title'].tolist()
    title_5.append(title_list)
    mostpop_list = books5_df.loc[books5_df['main_author'] == author].sort_values('ratings_count',ascending=False)['title'].tolist()
    mostpop_5.append(mostpop_list)
    

In [56]:
data5_1 = ColumnDataSource(data={
    'author': main_author_5,
    'title': title_5,
    'year': publication_years_5,
    'count': ratings_count_5,
    'print_author': [i[0] for i in main_author_5],
    'print_mostpop': [i[0] for i in mostpop_5],
    'print_year': [i[0] for i in publication_years_5],
    'print_books': [len(i) for i in ratings_count_5],
    'print_count': [sum(i) for i in ratings_count_5],
})

In [57]:
data5_2 = ColumnDataSource(data={
    'author': [i for sublist in main_author_5 for i in sublist],
    'title': [i for sublist in title_5 for i in sublist],
    'year': [i for sublist in publication_years_5 for i in sublist],
    'count': [i for sublist in ratings_count_5 for i in sublist]
})

In [58]:
data5_3 = ColumnDataSource(data={
    'author': [i[0] for i in main_author_5],
    'year': [i[0] for i in publication_years_5],
    'count': [i[0] for i in ratings_count_5]
})

In [59]:
colors5 = d3['Category20'][20]
color_map5 = CategoricalColorMapper(factors=books5_df['main_author'].unique(),palette=colors5*3)

In [60]:
p5 = figure(plot_width=1600, plot_height=700, x_range=(1800,1870), y_axis_type='log', title='Visualization 5: Popularity of Author Throughout Career (scrollable)')

In [61]:
p5_circle1 = p5.circle(x='year',y='count', source=data5_2, size=8, color={'field': 'author', 'transform': color_map5})
p5_line = p5.multi_line(xs='year', ys='count', source=data5_1, line_color='black', line_width=2.5, line_alpha= 0.025, hover_line_alpha=1)
p5_circle2 = p5.text(x='year',y='count', text='author', source=data5_3, text_font_size= '8pt', x_offset=5, y_offset= 4)

In [62]:
p5.add_tools(HoverTool(
    renderers=[p5_circle1],
    tooltips=[
        ('Author', '@author'),
        ('Title', '@title'),
        ('Publication Year','@year'),
        ('Number of Ratings','@count'),
    ],
    formatters={
        'Author': 'printf',
        'Title': 'printf',
        'Publication Year' : 'datetime',
        'Number of Ratings' : 'printf',
    },
    mode='mouse'
))

p5.add_tools(HoverTool(
    renderers=[p5_line],
    tooltips=[
        ('Author', '@print_author'),
        ('Most Popular Book', '@print_mostpop'),
        ('First Publication Year','@print_year'),
        ('Number of Publications', '@print_books'),
        ('Number of Ratings','@print_count')
    ],
    formatters={
        'Author': 'printf',
        'Most Popular Book': 'printf',
        'First Publication Year': 'datetime',
        'Number of Publications' : 'printf',
        'Number of Ratings' : 'printf'
    },
    mode='mouse',
    line_policy='nearest'
))
p5.title.text_font_size = '20pt'
p5.xaxis.axis_label = 'Publication Year'
p5.yaxis.axis_label = 'Total Ratings (log scale)'
p5.xgrid.grid_line_color = None
p5.ygrid.grid_line_color = None

## Create Grid

In [63]:
l = layout([
            [p1],
            [p2],
            [p3_all],
            [p4],
            [p5]
            ])

In [64]:
output_file('../final.html', mode='inline')
save(l)

'D:\\Code\\UCSD DSE\\DSE241\\final\\final.html'