In [10]:
import pandas as pd

book_rating = pd.read_csv(r'./Goodreads.csv', encoding='ISO-8859-1')

book_rating.head()

Unnamed: 0,ID,Title,Author,Fiction,Genre,Gender,Origin,Language,Pages,Year Published,Centuries,Nobel Prize,Avg_rating,Num_rating,Num_reviews,URL
0,1,1984,George Orwell,Fiction,Dystopian,male,UK,English,368,1949,20th,,4.19,4795158,124200,https://www.goodreads.com/book/show/61439040-1...
1,2,100 Headlines That Changed the World,James Maloney,Non-fiction,History,male,UK,English,309,2012,21st,,3.57,141,23,https://www.goodreads.com/book/show/14567468-1...
2,43,Infidel,Ayaan Hirsi Ali,Non-fiction,Faith,female,Somalia,English,353,2006,21st,,4.19,90769,7029,https://www.goodreads.com/book/show/81227.Infidel
3,4,30-Second Psychology,Christian Jarrett,Non-fiction,Psychology,male,UK,English,160,2011,21st,,3.75,1032,96,https://www.goodreads.com/book/show/11931275-3...
4,56,No god but God,Reza Aslan,Non-fiction,Faith,male,Iran,English,310,2005,21st,,4.13,27475,1771,https://www.goodreads.com/book/show/25307.No_g...


In [11]:
book_rating.columns

Index(['ID', 'Title', 'Author', 'Fiction', 'Genre', 'Gender', 'Origin',
       'Language', 'Pages', 'Year Published', 'Centuries', 'Nobel Prize',
       'Avg_rating', 'Num_rating', 'Num_reviews', 'URL '],
      dtype='object')

# Descriptive Statistics

- I had to chnage the data type of num_rating and num_reviews to integers as their values for descriptive stats werent shown - they were classified as 'object'


In [12]:
#book_rating.dtypes

# book_rating[['Avg_rating', 'Num_rating', 'Num_reviews']].describe()

In [13]:
import plotly.express as px

# distr_avg_rating = px.histogram(book_rating, x='Num_rating', nbins=20, title='Distribution of Number of Ratings')
# distr_avg_rating.show()

# distr_avg_rating = px.histogram(book_rating, x='Num_reviews', nbins=20, title='Distribution of Number of Reviews')
# distr_avg_rating.show()

In [14]:
#Top Performers
top_books = book_rating.nlargest(15, 'Num_rating')[['Title', 'Author', 'Num_rating']]

top_books['Title_Author'] = top_books['Title'] + ' by ' + top_books['Author']

fig_top_books = px.bar(
    top_books, x='Num_rating', y='Title_Author', orientation='h',
    color='Num_rating', color_continuous_scale='Viridis',
    title='Top 10 most popular books by ratings')

fig_top_books.update_layout(
    yaxis={'categoryorder': 'total ascending'}
)

fig_top_books.show()

In [15]:
#top 15 most reviewed books
top_books_rev = book_rating.nlargest(15, 'Num_reviews')[['Title', 'Author', 'Num_reviews']]

top_books_rev['Title_Author'] = top_books_rev['Title'] + ' by ' + top_books_rev['Author']

fig_top_books = px.bar(
    top_books_rev, x='Num_reviews', y='Title_Author', orientation='h',
    color='Num_reviews', color_continuous_scale='Viridis',
    title='Top 15 most reviewed Books')

fig_top_books.update_layout(
    yaxis={'categoryorder': 'total ascending'}
)

fig_top_books.show()

# Genre

* The most prominent genres in my dataset can be categorized into major groups:
    - Faith, Philosophy, Autobiography, Science, Psychology, History, Self-help

* The minor groups whihc has less than 5 counts:
    - Economy, Dystopian, Biography, Photography, Feminism, Politics, Humor, Drama.

In [16]:
unique_genres = book_rating['Genre'].unique()
unique_genres

array(['Dystopian', 'History', 'Faith', 'Psychology', 'Politics',
       'Biography', 'Science', 'Autobiography', 'Philosophy', 'Self-help',
       'Economy', 'Photography', 'Humor', 'Drama', 'Feminism'],
      dtype=object)

In [17]:
genre_counts = book_rating['Genre'].value_counts()
genre_counts

Genre
Faith            19
Philosophy       17
Autobiography    14
Psychology       12
Science          11
History           8
Self-help         5
Economy           4
Dystopian         2
Biography         2
Photography       2
Politics          1
Humor             1
Drama             1
Feminism          1
Name: count, dtype: int64

In [19]:
import pandas as pd
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output

#define minor genress
minor_genres = ['Economy', 'Dystopian', 'Biography', 'Photography', 'Feminism', 'Politics', 'Humor', 'Drama']

#Create grouped_genre column
book_rating['Grouped_Genre'] = book_rating['Genre'].apply(lambda x: 'Others' if x in minor_genres else x)

#initialize dash app 
app = Dash(__name__)

#define app layut 
app.layout = html.Div([
    dcc.Dropdown(
        id='genre-dropdown',
        options=[{'label': genre, 'value': genre} for genre in book_rating['Grouped_Genre'].unique()],
        value = 'Faith',
        clearable=False
    ),
    dcc.Graph(id='genre-bar-chart')
])

#define callback to update the bar chart
@app.callback(
    Output('genre-bar-chart', 'figure'),
    Input('genre-dropdown', 'value')
)

def update_chart(selected_genre):
    genre_books = book_rating[book_rating['Grouped_Genre'] == selected_genre].sort_values(by='Num_rating')

    fig = go.Figure()
    
    fig.add_trace(
        go.Bar(
            x=genre_books['Num_rating'],
            y=genre_books['Title'],
            orientation='h',
            text=genre_books['Num_rating'],
            textposition='auto',
            hovertext=genre_books['Author'],
            hoverinfo='text',
            marker=dict(
                color=genre_books['Num_rating'],
                colorscale='Viridis', 
                showscale=True,
                colorbar=dict(
                    title='Number of Rating',
                    thickness=15
                )
            )
        )
    )
    
    fig.update_layout(
        title=f'Books by Number of Ratings in {selected_genre} Genre',
        height=600,
        xaxis_title='Number of Ratings (Log Scale)',
        yaxis_title='Book Title'
    )
    
    fig.update_xaxes(type="log")

    return fig

if __name__ == '__main__':
    app.run_server(debug=True)