# Exercise: Web Scraping with Beautiful Soup
*****
# Part 1
### Gather Book Prices and Ratings

In [6]:
from bs4 import BeautifulSoup
import requests

site = 'http://books.toscrape.com/'
page = ""

def get_page(path):
    page_text = requests.get(path).text
    soup = BeautifulSoup(page_text, 'html.parser')
    return soup

def get_products(soup):
    products = soup.find_all(attrs = {'class': 'product_pod'})
    return products

print(" Getting Books...")
books = []
passed = False
while not passed:
    url = site + page
    soup = get_page(url)
    products = get_products(soup)
    
    for item in products:
        rating = item.find(attrs = {'class':'star-rating'}).attrs['class'][1]
        title = item.find_all('a')[1].get('title')
        price = item.find(attrs = {'class':'price_color'}).text[1:]
        status = item.find(attrs = {'class':'instock availability'}).text.split('\n')[3].strip()

        book = {
            'title': title,
            'price': price,
            'rating': rating,
            'status': status
        }

        books.append(book)

    try:
        next_page_link = soup.find_all('a')
        if next_page_link[-1].text == 'previous':
            passed = True
        next_page_link = next_page_link[-1].get('href')
        if 'catalogue/' not in next_page_link:
            next_page_link = 'catalogue/' + next_page_link
    except:
        next_page_link = None
    
    page = next_page_link
    if next_page_link is None:
        passed = True
    
print(len(books), 'books collected')
print('\n Data Example')
print(books[0])

 Getting Books...
1000 books collected

 Data Example
{'title': 'A Light in the Attic', 'price': '£51.77', 'rating': 'Three', 'status': 'In stock'}


### Export Data to a CSV

In [32]:
import csv

columns = ['title','price','rating','status']
with open('book_list.csv','w', encoding = 'utf-8') as file:
    csvwriter = csv.writer(file)
    csvwriter.writerow(columns)
    for book in books:
        book_items = [book[key] for key in book.keys()]
        csvwriter.writerow(book_items)
file.close()
