In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

books = pd.read_csv(r'./Goodreads.csv',  encoding='ISO-8859-1')
books.head()

Unnamed: 0,ID,Title,Author,Fiction,Genre,Gender,Origin,Language,Pages,Year Published,Centuries,Nobel Prize,Avg_rating,Num_rating,Num_reviews,textblob_sentiment,vader_sentiment,URL
0,1,1984,George Orwell,Fiction,Dystopian,male,UK,English,368,1949,20th,,4.19,4795158,124200,0.142063,0.8847,https://www.goodreads.com/book/show/61439040-1...
1,2,100 Headlines That Changed the World,James Maloney,Non-fiction,History,male,UK,English,309,2012,21st,,3.57,141,23,0.173077,0.6115,https://www.goodreads.com/book/show/14567468-1...
2,3,20 Principles,Hassan Al Banna,Non-fiction,Faith,male,Egypt,Arabic,200,1940,20th,,4.35,37,3,0.228333,0.9349,https://www.goodreads.com/book/show/8594271-us...
3,4,30-Second Psychology,Christian Jarrett,Non-fiction,Psychology,male,UK,English,160,2011,21st,,3.75,1032,96,0.213223,0.9682,https://www.goodreads.com/book/show/11931275-3...
4,5,30-Second Religion,Russell Re Manning,Non-fiction,Faith,male,UK,English,160,2011,21st,,3.58,296,38,-0.083333,0.0,https://www.goodreads.com/book/show/13124582-3...


In [3]:
books.columns

Index(['ID', 'Title', 'Author', 'Fiction', 'Genre', 'Gender', 'Origin',
       'Language', 'Pages', 'Year Published', 'Centuries', 'Nobel Prize',
       'Avg_rating', 'Num_rating', 'Num_reviews', 'textblob_sentiment',
       'vader_sentiment', 'URL'],
      dtype='object')

In [4]:
# # treemap
# fig = px.treemap(books, path=['Genre', 'Title', 'Author'], values='ID', title='Books by Genre and Author')

# #Update layout
# fig.update_layout(
#     title_x=0.5,
#     title_font_size=30,
#     height=800,
#     width=1200, 
#     margin=dict(
#         l=20, r=20, t=70, b=20
#     )
# )

# fig.show()

In [27]:
#Bubble chart
fig = px.scatter(
    books, 
    x='Year Published', 
    y='Origin', color='Genre', size='Pages', title='Bubble Chart (origin, year published, pages)', 
    hover_data={'Title':True, 'Author':True},
    size_max=30,
    template='plotly_dark'
)

#Update layout
fig.update_layout(
    title_x=0.5,
    title_font_size=25,
    height=700
)

fig.write_html("./charts/bubble_chart.html")

fig.show()

In [None]:
# the thickest books Ive ever read 
import pandas as pd
import plotly.express as px

# sort pages in desc order and select the top 10
top_10_books = books.sort_values('Pages', ascending=False).head(15)

# combine author and title for label
top_10_books['Label'] = top_10_books['Title']

#colors 
colors = ['#1f77b4','#2ca02c', '#d62728', '#9467bd', 
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf',
          '#f5a3a2', '#ffb6c1', '#ffcccb', '#ff7f0e', '#c2c2f0', '#ff6666']

# plot chart
fig = px.bar(
    top_10_books, 
    x='Pages',
    y='Label',
    orientation='h',
    title='Top 15 Lengthiest Reads by Title',
    labels={'Label': 'Book', 'Pages': 'Number of pages'},
    height=600,
    color=top_10_books['Label'],                            #this ensures color is mapped correctly
    color_discrete_sequence=colors,
    hover_data={'Origin': True, 'Genre':True, 'Year Published': True}, 
    template = 'plotly_dark'
)

fig.update_layout(
    title_x=0.5,
    title_font_size=30,
    yaxis={'categoryorder': 'total ascending'},
    showlegend=False,
    )

fig.show()


In [7]:
# the oldest books Ive ever read 
import pandas as pd
import plotly.express as px

# sort pages in desc order and select the top 10
oldest_books = books.sort_values('Year Published', ascending=True).head(15)

# combine author and title for label
oldest_books['Label'] = oldest_books['Title'] + ' by ' + oldest_books['Author']

# plot chart
fig = px.scatter(
    oldest_books, 
    x='Year Published',
    y='Label',
    color='Year Published',
    size='Year Published',
    title='Top 15 oldest Books by Title and Author',
    height=600,
    hover_data={'Origin': True, 'Genre':True, 'Pages':True},
    template='plotly_dark'
)

fig.update_layout(
    title_x=0.5,
    title_font_size=30,
    xaxis_title='Year Published',
    yaxis_title='Book',
    yaxis={'categoryorder': 'total ascending'},
    )

fig.show()

**Authors who have written more than one book that I've read**

In [8]:
author_counts = books.groupby('Author').size().reset_index(name='Book Count')

multiple_books_authors = author_counts[author_counts['Book Count'] > 1]

multiple_books_authors

Unnamed: 0,Author,Book Count
28,George Orwell,2
49,Kassim Ahmad,2
61,Mohd Asri Zainul Abidin,2
76,Sigmund Freud,2
