In [1]:
import pandas as pd

book_rating = pd.read_csv(r'./Goodreads.csv', encoding='ISO-8859-1')

book_rating.head()

Unnamed: 0,ID,Title,Author,Fiction,Genre,Gender,Origin,Language,Pages,Year Published,Centuries,Nobel Prize,Avg_rating,Num_rating,Num_reviews,textblob_sentiment,vader_sentiment,URL
0,1,1984,George Orwell,Fiction,Dystopian,male,UK,English,368,1949,20th,,4.19,4795158,124200,0.142063,0.8847,https://www.goodreads.com/book/show/61439040-1...
1,2,100 Headlines That Changed the World,James Maloney,Non-fiction,History,male,UK,English,309,2012,21st,,3.57,141,23,0.173077,0.6115,https://www.goodreads.com/book/show/14567468-1...
2,3,20 Principles,Hassan Al Banna,Non-fiction,Faith,male,Egypt,Arabic,200,1940,20th,,4.35,37,3,0.228333,0.9349,https://www.goodreads.com/book/show/8594271-us...
3,4,30-Second Psychology,Christian Jarrett,Non-fiction,Psychology,male,UK,English,160,2011,21st,,3.75,1032,96,0.213223,0.9682,https://www.goodreads.com/book/show/11931275-3...
4,5,30-Second Religion,Russell Re Manning,Non-fiction,Faith,male,UK,English,160,2011,21st,,3.58,296,38,-0.083333,0.0,https://www.goodreads.com/book/show/13124582-3...


In [2]:
book_rating.columns

Index(['ID', 'Title', 'Author', 'Fiction', 'Genre', 'Gender', 'Origin',
       'Language', 'Pages', 'Year Published', 'Centuries', 'Nobel Prize',
       'Avg_rating', 'Num_rating', 'Num_reviews', 'textblob_sentiment',
       'vader_sentiment', 'URL'],
      dtype='object')

# Descriptive Statistics

- I had to chnage the data type of num_rating and num_reviews to integers as their values for descriptive stats werent shown - they were classified as 'object'


In [3]:
#book_rating.dtypes

# book_rating[['Avg_rating', 'Num_rating', 'Num_reviews']].describe()

In [4]:
import plotly.express as px

# distr_avg_rating = px.histogram(book_rating, x='Num_rating', nbins=20, title='Distribution of Number of Ratings')
# distr_avg_rating.show()

# distr_avg_rating = px.histogram(book_rating, x='Num_reviews', nbins=20, title='Distribution of Number of Reviews')
# distr_avg_rating.show()

In [5]:
#Top Performers
top_books = book_rating.nlargest(15, 'Num_rating')[['Title', 'Author', 'Num_rating']]

fig_top_books = px.bar(
    top_books, x='Num_rating', y='Title', orientation='h',
    color='Num_rating', color_continuous_scale='Viridis',
    title='Top 10 most popular books by ratings on Goodreads',
    text=top_books['Author'],
    template='plotly_dark'
)

fig_top_books.update_layout(
    yaxis={'categoryorder': 'total ascending'},
    title_x = 0.5,
    coloraxis_colorbar=dict(title=''),
    title_font=dict(size=20, color='yellow'),
    width=1100
)

fig_top_books.write_html("./charts/popular_books.html")

fig_top_books.show()

In [6]:
#top 15 most reviewed books
top_books_rev = book_rating.nlargest(15, 'Num_reviews')[['Title', 'Author', 'Num_reviews']]

fig_top_books_rev = px.bar(
    top_books_rev, x='Num_reviews', y='Title', orientation='h',
    color='Num_reviews', color_continuous_scale='Viridis',
    title='Top 15 most reviewed Books on Goodreads',
    text=top_books_rev['Author'],
    template='plotly_dark'
)

fig_top_books_rev.update_layout(
    yaxis={'categoryorder': 'total ascending', 'title': ''},
    title_x=0.5,
    coloraxis_colorbar=dict(title='Reviews (in thousands)')
)

fig_top_books_rev.update_traces(
    textposition='inside',
    textfont=dict(size=12),
    showlegend=False
    )
fig_top_books_rev.show()

# Genre

* The most prominent genres in my dataset can be categorized into major groups:
    - Faith, Philosophy, Autobiography, Science, Psychology, History, Self-help

* The minor groups whihc has less than 5 counts:
    - Economy, Dystopian, Biography, Photography, Feminism, Politics, Humor, Drama.

In [7]:
unique_genres = book_rating['Genre'].unique()
unique_genres

array(['Dystopian', 'History', 'Faith', 'Psychology', 'Politics',
       'Biography', 'Science', 'Autobiography', 'Self-help', 'Philosophy',
       'Economy', 'Photography', 'Humor', 'Drama', 'Feminism'],
      dtype=object)

In [8]:
genre_counts = book_rating['Genre'].value_counts()
genre_counts

Genre
Faith            19
Philosophy       17
Autobiography    14
Psychology       12
Science          11
History           8
Self-help         5
Economy           4
Dystopian         2
Biography         2
Photography       2
Politics          1
Humor             1
Drama             1
Feminism          1
Name: count, dtype: int64

In [9]:
import pandas as pd
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output

#define minor genress
minor_genres = ['Economy', 'Dystopian', 'Biography', 'Photography', 'Feminism', 'Politics', 'Humor', 'Drama']

#Create grouped_genre column
book_rating['Grouped_Genre'] = book_rating['Genre'].apply(lambda x: 'Others' if x in minor_genres else x)

#initialize dash app 
app = Dash(__name__)

#define app layut 
app.layout = html.Div(style={'backgroundColor': '#1e1e1e'}, children=[
    html.Div(style={'position': 'relative', 'width': '100%', 'height':'600px'}, children=[
        
        #dropdown positioned absolutely within chart container
        html.Div(style={'position': 'absolute', 'top':'20px', 'left': '20px', 'z-index':'1', 'width':'200px'}, children=[
            html.Label('Select Genre', style={'color': 'salmon', 'fontSize': 18, 'display': 'block', 'textAlign': 'center', 'marginBottom': '10px'}),
            dcc.Dropdown(
                id='genre-dropdown',
                options=[{'label': genre, 'value': genre} for genre in book_rating['Grouped_Genre'].unique()],
                value = 'Faith',
                style={'width': '200px', 'display': 'inline-block'},
                clearable=False
            )
        ]),
         dcc.Graph(id='genre-bar-chart', style={'height': '100%'})
    ])     
])

#define callback to update the bar chart
@app.callback(
    Output('genre-bar-chart', 'figure'),
    Input('genre-dropdown', 'value')
)

def update_chart(selected_genre):
    genre_books = book_rating[book_rating['Grouped_Genre'] == selected_genre].sort_values(by='Num_rating')

    fig = go.Figure()
    
    fig.add_trace(
        go.Bar(
            x=genre_books['Num_rating'],
            y=genre_books['Title'],
            orientation='h',
            text=genre_books['Num_rating'],
            textposition='auto',
            hovertext=genre_books['Author'],
            hoverinfo='text',
            marker=dict(
                color=genre_books['Num_rating'],
                colorscale='Viridis', 
                showscale=True,
                colorbar=dict(
                    title='',
                    thickness=15
                )
            )
        )
    )
    
    title_text = f'Goodreads Ratings for books I\'ve read in the <span style="color: fuchsia";>{selected_genre}</span> Genre'
    
    fig.update_layout(
        template='plotly_dark',
        title=title_text,
        title_x=0.5,
        title_font_size=20,
        title_font_color= 'yellow',
        xaxis_title='Number of Ratings (Log Scale)',
        yaxis_title='Book Title'
    )
    
    fig.update_xaxes(type="log")

    return fig

# default browser: http://127.0.0.1:8050

if __name__ == '__main__':
    app.run_server(debug=True)