In [1]:
# import pandas package
import pandas as pd

In [2]:
# Optional: use the pd.set_option() to display all rows in a dataframe by default
# pd.set_option('display.max_rows', 600)

# Create a DataFrame

In [3]:
# read the "bestsellers with categories" csv file (Dataset on Amazon's Top 50 bestselling books from 2009 to 2019.)
df_books = pd.read_csv('data/bestsellers with categories.csv')

In [4]:
# get access to the shape attribute
df_books.shape

(550, 7)

In [5]:
# find the dataframe info
df_books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         550 non-null    object 
 1   Author       550 non-null    object 
 2   User Rating  550 non-null    float64
 3   Reviews      550 non-null    int64  
 4   Price        550 non-null    int64  
 5   Year         550 non-null    int64  
 6   Genre        550 non-null    object 
dtypes: float64(1), int64(3), object(3)
memory usage: 30.2+ KB


In [6]:
# find the data types of each column
df_books.dtypes

Name            object
Author          object
User Rating    float64
Reviews          int64
Price            int64
Year             int64
Genre           object
dtype: object

# Display a DataFrame

In [7]:
# show first 5 rows in a dataframe
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [8]:
# describe basic statistics of the dataframe (mean, std, min, max)
df_books.describe()

Unnamed: 0,User Rating,Reviews,Price,Year
count,550.0,550.0,550.0,550.0
mean,4.618364,11953.281818,13.1,2014.0
std,0.22698,11731.132017,10.842262,3.165156
min,3.3,37.0,0.0,2009.0
25%,4.5,4058.0,7.0,2011.0
50%,4.7,8580.0,11.0,2014.0
75%,4.8,17253.25,16.0,2017.0
max,4.9,87841.0,105.0,2019.0


# Add a new column

In [9]:
# Your task is to create a column named 'Critic Rating' that should have random integer numbers between 1 and 4

# 1. import numpy and create 550 random integer numbers between 1 and 4
import numpy as np

int_rating = np.random.randint(1,4,550)

In [10]:
# 2. add new 'Critic Rating' column to dataframe using the random numbers created
df_books['Critic Rating'] = int_rating

In [11]:
# show dataframe first 5 rows
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre,Critic Rating
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,1
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,3
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,3
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,2
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,1


# Basic Attributes, Methods and Functions

In [12]:
# get access to the columns attribute
df_books.columns

Index(['Name', 'Author', 'User Rating', 'Reviews', 'Price', 'Year', 'Genre',
       'Critic Rating'],
      dtype='object')

# Selecting Two or More Columns from a Dataframe

In [13]:
# move the new 'Critic Rating' column between the columns "User Rating" and "Reviews" Then update the dataframe
# Tip: Copy and paste the column names obtained with the columns attribute and then rearrange elements using [[]]
df_books = df_books[['Name', 'Author', 'User Rating', 'Critic Rating', 'Reviews', 'Price', 'Year', 'Genre']]

In [14]:
# show the first 5 rows
df_books.head()

Unnamed: 0,Name,Author,User Rating,Critic Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,1,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,3,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,3,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,2,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,1,7665,12,2019,Non Fiction


# Operations on Dataframes

In [15]:
# create a column named "Average Rating" by using the following formula: Average Rating = (User Rating + Critic Rating)/2
df_books['Average Rating'] = (df_books['User Rating'] + df_books['Critic Rating'])/2

In [16]:
# use the round function to round the values of the dataframe to 1 decimal and update the dataframe
df_books= df_books.round(1) # or round(df_books, 1)

# Value Counts

In [17]:
# count elements in "Genre" column by category and return the relative frequency
df_books['Genre'].value_counts(normalize=True)

Genre
Non Fiction    0.563636
Fiction        0.436364
Name: proportion, dtype: float64

# Rename Columns

In [18]:
# rename columns "User Rating," "Critic Rating" and "Average Rating" to "UR," "CR" and "AR" then update the dataframe with the columns
df_books.rename({'User Rating':'UR', 'Critic Rating':'CR', 'Average Rating':'AR'}, axis=1, inplace=True)

In [20]:
# show first 5 rows
df_books.head()

Unnamed: 0,Name,Author,UR,CR,Reviews,Price,Year,Genre,AR
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,1,17350,8,2016,Non Fiction,2.8
1,11/22/63: A Novel,Stephen King,4.6,3,2052,22,2011,Fiction,3.8
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,3,18979,15,2018,Non Fiction,3.8
3,1984 (Signet Classics),George Orwell,4.7,2,21424,6,2017,Fiction,3.4
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,1,7665,12,2019,Non Fiction,2.9


In [22]:
# select only "Name", "Author", "UR", "CR", "AR" and "Year" columns and update dataframe
df_books = df_books[["Name", "Author", "UR", "CR", "AR", "Year"]]

# Sort a dataframe

In [23]:
# sort the dataframe descending by "UR" and "CR"
df_books.sort_values(by=['UR', 'CR'], ascending=False)

Unnamed: 0,Name,Author,UR,CR,AR,Year
84,Dog Man: Brawl of the Wild: From the Creator o...,Dav Pilkey,4.9,3,4.0,2019
87,Dog Man: Lord of the Fleas: From the Creator o...,Dav Pilkey,4.9,3,4.0,2018
174,Humans of New York : Stories,Brandon Stanton,4.9,3,4.0,2015
187,Jesus Calling: Enjoying Peace in His Presence ...,Sarah Young,4.9,3,4.0,2011
190,Jesus Calling: Enjoying Peace in His Presence ...,Sarah Young,4.9,3,4.0,2014
...,...,...,...,...,...,...
393,The Goldfinch: A Novel (Pulitzer Prize for Fic...,Donna Tartt,3.9,1,2.4,2014
107,Fifty Shades of Grey: Book One of the Fifty Sh...,E L James,3.8,3,3.4,2013
106,Fifty Shades of Grey: Book One of the Fifty Sh...,E L James,3.8,1,2.4,2012
132,Go Set a Watchman: A Novel,Harper Lee,3.6,3,3.3,2015
