# Scraping Dataset of Books

### Self-guided project in courtesy of https://jovian.com/aakashns/python-web-scraping-project-guide

#### Project Outline:
- We are going to scrape https://books.toscrape.com/index.html
- We will get a list of books organized by their title
- For each book, we will grab rating and price
- For the list of books, we will create a CSV file in the following format: 
```
Book Title,Rating,Price
```

In [9]:
import requests
from bs4 import BeautifulSoup
import csv

In [10]:
# Set the User-Agent in headers to mimic a web browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

In [11]:
# Specify the CSV file path
csv_file_path = "book_data.csv"

In [12]:
# Create functions to extract information from article tags
def name_func(tag):
    name = tag.find('h3').a['title']
    return name

def rating_func(tag):
    rating = tag.find('p')['class'][1]
    return rating

def price_func(tag):
    price = tag.find('p', class_='price_color').text[2:]
    return price


In [13]:
# Open the CSV file in write mode
with open(csv_file_path, mode="w", newline="", encoding="utf-8-sig") as csv_file:
    writer = csv.writer(csv_file)

# Write the header 
    writer.writerow(["Title", "Rating", "Price"])

# Initialize the page number 
    page_number = 1

# Continue looping while the page number is <= 50
    while page_number <= 50:
        
# Send a request to the website for the current page
        page_url = f"https://books.toscrape.com/catalogue/page-{page_number}.html"
        source = requests.get(page_url, headers=headers).text
        soup = BeautifulSoup(source, "html.parser")

# Find all the book information on the current page
        all_article_tags = soup.find_all('article', class_='product_pod')
        for tag in all_article_tags:
            # Extract information using functions
            title = name_func(tag)
            rating = rating_func(tag)
            price = price_func(tag)

            # Write the data to the CSV file
            writer.writerow([title.encode('utf-8').decode('utf-8'), rating, price])

        # Increment the page number for the next iteration
        page_number += 1

print(f"Book data has been saved to '{csv_file_path}'.")


Book data has been saved to 'book_data.csv'.
