# Importing and Reading Data

In [2]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
plt.style.use('ggplot')
pd.set_option('display.max_columns', 200)

# Understanding The Data

- **.shape / .info()** – to check the size and structure of the dataset
- **.dtypes** – to understand data types for each column
- **.describe()** – to get summary statistics of numeric columns

In [3]:
df= pd.read_csv('books_data.csv')

In [4]:
df.shape

(616, 7)

In [6]:
df.columns

Index(['Title', 'Author', 'Rating', 'Avg Rating', 'Date Added', 'Num Pages',
       'Genre'],
      dtype='object')

In [9]:
df.dtypes

Title          object
Author         object
Rating         object
Avg Rating    float64
Date Added     object
Num Pages      object
Genre          object
dtype: object

# Data Preparation

In [4]:
df.head()

Unnamed: 0,Title,Author,Rating,Avg Rating,Date Added,Num Pages,Genre
0,Hunger(Unbound #3),"Gonnella, Nicoli",it was ok,4.43,"Apr 19, 2025","1,035pp",Unknown
1,Silence(Unbound #2),"Gonnella, Nicoli",liked it,4.23,"Apr 19, 2025",482pp,Artists' books
2,Dissonance(Unbound #1),"Gonnella, Nicoli",really liked it,4.38,"Apr 16, 2025",778pp,Unknown
3,The Dungeon Anarchist's Cookbook(Dungeon Crawl...,"Dinniman, Matt",it was ok,4.48,"Apr 15, 2025",534pp,Fiction
4,"Carl's Doomsday Scenario(Dungeon Crawler Carl,...","Dinniman, Matt",liked it,4.54,"Apr 14, 2025",364pp,Unknown


In [5]:
df = df.rename(columns={
    "Title": "title",
    "Author": "author",
    "Rating": "rating",
    "Avg Rating": "avg_rating",
    "Date Added": "date_added",
    "Num Pages": "num_of_pages",
    "Genre": "genre"
})

In [6]:
df.columns

Index(['title', 'author', 'rating', 'avg_rating', 'date_added', 'num_of_pages',
       'genre'],
      dtype='object')

In [7]:
rating_map = {
    "did not like it": 1,
    "it was ok": 2,
    "liked it": 3,
    "really liked it": 4,
    "it was amazing": 5
}

# Apply mapping to create the new column
df["rating_num"] = df["rating"].map(rating_map)

In [8]:
df.tail()

Unnamed: 0,title,author,rating,avg_rating,date_added,num_of_pages,genre,rating_num
611,The Journal of Curious Letters(The 13th Realit...,"Dashner, James",really liked it,3.94,"Mar 01, 2015",434pp,Juvenile Fiction,4.0
612,"Forest Born(The Books of Bayern, #4)","Hale, Shannon",it was ok,4.04,"Mar 01, 2015",389pp,Young Adult Fiction,2.0
613,"Rise of the Evening Star(Fablehaven, #2)","Mull, Brandon",really liked it,4.18,"Mar 01, 2015",456pp,Juvenile Fiction,4.0
614,The Titan’s Curse(Percy Jackson and the Olympi...,"Riordan, Rick",really liked it,4.37,"Mar 01, 2015",352pp,Juvenile Fiction,4.0
615,Harry Potter and the Sorcerer's Stone(Harry Po...,"Rowling, J.K.",really liked it,4.47,"Mar 01, 2015",309pp,JUVENILE FICTION,4.0


In [11]:

df["num_of_pages"] = df["num_of_pages"].str.replace(",", "").str.replace("pp", "")
df["num_of_pages"] = pd.to_numeric(df["num_of_pages"], errors="coerce")  # Converts invalid values to NaN


In [14]:
df['date_added'] = pd.to_datetime(df['date_added'])

In [17]:
df["author"] = df["author"].str.split(", ").str[::-1].str.join(" ")

In [19]:
df

Unnamed: 0,title,author,rating,avg_rating,date_added,num_of_pages,genre,rating_num
0,Hunger(Unbound #3),Nicoli Gonnella,it was ok,4.43,2025-04-19,1035.0,Unknown,2.0
1,Silence(Unbound #2),Nicoli Gonnella,liked it,4.23,2025-04-19,482.0,Artists' books,3.0
2,Dissonance(Unbound #1),Nicoli Gonnella,really liked it,4.38,2025-04-16,778.0,Unknown,4.0
3,The Dungeon Anarchist's Cookbook(Dungeon Crawl...,Matt Dinniman,it was ok,4.48,2025-04-15,534.0,Fiction,2.0
4,"Carl's Doomsday Scenario(Dungeon Crawler Carl,...",Matt Dinniman,liked it,4.54,2025-04-14,364.0,Unknown,3.0
...,...,...,...,...,...,...,...,...
611,The Journal of Curious Letters(The 13th Realit...,James Dashner,really liked it,3.94,2015-03-01,434.0,Juvenile Fiction,4.0
612,"Forest Born(The Books of Bayern, #4)",Shannon Hale,it was ok,4.04,2015-03-01,389.0,Young Adult Fiction,2.0
613,"Rise of the Evening Star(Fablehaven, #2)",Brandon Mull,really liked it,4.18,2015-03-01,456.0,Juvenile Fiction,4.0
614,The Titan’s Curse(Percy Jackson and the Olympi...,Rick Riordan,really liked it,4.37,2015-03-01,352.0,Juvenile Fiction,4.0
