In [None]:
#loading the necessary python libraries
import numpy as np
import pandas as pd

import seaborn as sns
sns.set(font_scale = 1.5)

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use('fivethirtyeight')

In [None]:
#loading the dataset into a pandas dataframe
amazon_books = pd.read_csv('../data-sources/amazon-bestsellers/amazon-bestsellers.csv')
amazon_books.head()

In [None]:
amazon_books.info()

In [None]:
amazon_books.describe(include='all').T

In [None]:
amazon_books['Genre'].value_counts(normalize=True)

In [None]:
sns.catplot(data=amazon_books, x='Year', hue='Genre', kind='count',
            alpha=0.8, height=6, aspect=15/6, palette=['k', 'crimson'])
plt.xlabel('Year', fontsize=20, labelpad=20)
plt.ylabel('Number of Titles', fontsize=20, labelpad=20)
plt.show()

In [None]:
sns.catplot(data=amazon_books, x='Year', y='Reviews', hue='Genre',
            kind='bar', alpha=0.8, height=6, aspect=15/6, ci=None,
            palette=['k', 'crimson'])
plt.xlabel('Year', fontsize=20, labelpad=20)
plt.ylabel('Average Number of Reviews', fontsize=20, labelpad=20)
plt.show()

In [None]:
sns.catplot(data=amazon_books, x='Year', y='Price', hue='Genre',
            kind='bar', alpha=0.8, height=6, aspect=15/6, ci=None,
            palette=['k', 'crimson'])
plt.xlabel('Year', fontsize=20, labelpad=20)
plt.ylabel('Average Price (S)', fontsize=20, labelpad=20)
plt.show()

In [None]:
sns.catplot(data=amazon_books, x='Year', y='User Rating', hue='Genre',
            kind='bar', alpha=0.8, height=6, aspect=15/6, ci=None,
            palette=['k', 'crimson'])
plt.xlabel('Year', fontsize=20, labelpad=20)
plt.ylabel('Average User Rating', fontsize=20, labelpad=20)
plt.show()

In [None]:
highest_rated_authors = amazon_books.groupby('Author', as_index=False)[['User Rating']].mean()\
                        .sort_values('User Rating', ascending=False).head(15)

fig, ax = plt.subplots(figsize=(15, 6))
sns.barplot(data=highest_rated_authors, x='Author', y='User Rating',
            alpha=0.8, color='k', ax=ax)
ax.set_xlabel('Author', labelpad=20)
ax.set_ylabel('Average User Rating', labelpad=20)
plt.xticks(rotation=90)
plt.show()

In [None]:
most_reviewed_authors = amazon_books.groupby('Author', as_index=False)[['Reviews']].mean()\
                        .sort_values('Reviews', ascending=False).head(15)

fig, ax = plt.subplots(figsize=(15, 6))
sns.barplot(data=most_reviewed_authors, x='Author', y='Reviews',
            alpha=0.8, color='k', ax=ax)
ax.set_xlabel('Author', labelpad=20)
ax.set_ylabel('Average Number of Reviews', labelpad=20)
plt.xticks(rotation=90)
plt.show()

In [None]:
most_expensive_authors = amazon_books.groupby('Author', as_index=False)[['Price']].mean()\
                            .sort_values('Price', ascending=False).head(15)

fig, ax = plt.subplots(figsize=(15, 6))
sns.barplot(data=most_expensive_authors, x='Author', y='Price',
            alpha=0.8, color='k', ax=ax)
ax.set_xlabel('Author', labelpad=20)
ax.set_ylabel('Average Price ($)', labelpad=20)
plt.xticks(rotation=90)
plt.show()

In [None]:
highest_rated_titles = amazon_books.groupby('Name', as_index=False)[['User Rating']].mean()\
                        .sort_values('User Rating', ascending=False).head(15)

fig, ax = plt.subplots(figsize=(15, 25))
sns.barplot(data=highest_rated_titles, y='Name', x='User Rating',
            alpha=0.8, color='k', ax=ax)
ax.set_ylabel('Name', fontsize=40, labelpad=20)
ax.set_xlabel('Average User Rating', fontsize=40, labelpad=20)
plt.xticks(fontsize=30)
plt.yticks(fontsize=30)
plt.show()

In [None]:
most_reviewed_titles = amazon_books.groupby('Name', as_index=False)[['Reviews']].mean()\
                        .sort_values('Reviews', ascending=False).head(15)

fig, ax = plt.subplots(figsize=(15, 25))
sns.barplot(data=most_reviewed_titles, y='Name', x='Reviews',
            alpha=0.8, color='k', ax=ax)
ax.set_ylabel('Name', fontsize=40, labelpad=20)
ax.set_xlabel('Average Number of Reviews', fontsize=40, labelpad=20)
plt.xticks(fontsize=30)
plt.yticks(fontsize=30)
plt.show()

In [None]:
most_expensive_titles = amazon_books.groupby('Name', as_index=False)[['Price']].mean()\
                            .sort_values('Price', ascending=False).head(15)

fig, ax = plt.subplots(figsize=(15, 25))
sns.barplot(data=most_expensive_titles, y='Name', x='Price',
            alpha=0.8, color='k', ax=ax)
ax.set_ylabel('Name', fontsize=40, labelpad=20)
ax.set_xlabel('Average Price ($)', fontsize=40, labelpad=20)
plt.xticks(fontsize=30)
plt.yticks(fontsize=30)
plt.show()

In [None]:
genre_mean_price = amazon_books.groupby('Genre', as_index=False)[['Price']].mean()

fig, ax = plt.subplots(figsize=(8, 8))
sns.barplot(data=genre_mean_price, x='Genre', y='Price', palette=['k', 'crimson'], ax=ax)
ax.set_ylabel('Average Price ($)', labelpad=20)
ax.set_xlabel('Genre', labelpad=20)
plt.show()