In [62]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

## Functions

In [63]:
def get_additional_information(book_url):
    url = f'https://books.toscrape.com/catalogue/{book_url}'
    response = requests.get(url)

    soup = BeautifulSoup(response.text, 'html.parser')
    
    previous_element = soup.find('div', id='product_description')
    if previous_element:
        book_description = previous_element.find_next('p').text
    else:
        book_description = None
        
    table = soup.find('table', class_='table table-striped')
    if table:
        book_availability = table.find_all('tr')[5].text
    else:
        book_availability = None

    return [book_description, book_availability]


In [64]:
def extract_information(url):
        response = requests.get(url)
        
        soup = BeautifulSoup(response.text, 'html.parser')

        book_title = soup.find_all('h3')
        book_titles_list = [title.a['title'] for title in book_title if title.a]

        book_prices = soup.find_all('p', class_ = 'price_color')
        book_prices_list = [price.text[1:] for price in book_prices]

        book_in_stock = soup.find_all('p', class_='instock availability')
        book_in_stock_list = [stock.text.strip() for stock in book_in_stock]

        # Take the urls
        book_links = soup.find_all('div', class_='image_container')
        book_urls = [link.a['href'] for link in book_links if link.a]
        relative_urls = [url.replace('../../../', '')for url in book_urls]



        return book_titles_list, book_prices_list, book_in_stock_list, relative_urls

In [65]:
def automatization_process(ganre, url, data_frame):
    book_description_all = []
    book_availability_all = []
    title, price, stock, urls = extract_information(url)

    for book in urls:

        description, availability = get_additional_information(book)
        book_description_all.append(description)
        book_availability_all.append(availability)

    for i in range(len(title)):
        data_frame.loc[len(data_frame)] = [ganre.title(), title[i], price[i], stock[i], book_description_all[i], book_availability_all[i]]


In [66]:
df = pd.DataFrame(columns=['gender', 'title', 'price', 'stock', 'description', 'availability'])

In [67]:
df

Unnamed: 0,gender,title,price,stock,description,availability


## Travel Books

In [68]:
automatization_process('Travel', 'https://books.toscrape.com/catalogue/category/books/travel_2/index.html', df)

In [69]:
df

Unnamed: 0,gender,title,price,stock,description,availability
0,Travel,It's Only the Himalayas,£45.17,In stock,"âWherever you go, whatever you do, just . . ...",\nAvailability\nIn stock (19 available)\n
1,Travel,Full Moon over Noahâs Ark: An Odyssey to Mou...,£49.43,In stock,Acclaimed travel writer Rick Antonson sets his...,\nAvailability\nIn stock (15 available)\n
2,Travel,See America: A Celebration of Our National Par...,£48.87,In stock,To coincide with the 2016 centennial anniversa...,\nAvailability\nIn stock (14 available)\n
3,Travel,Vagabonding: An Uncommon Guide to the Art of L...,£36.94,In stock,With a new foreword by Tim Ferriss â¢Thereâ...,\nAvailability\nIn stock (8 available)\n
4,Travel,Under the Tuscan Sun,£37.33,In stock,A CLASSIC FROM THE BESTSELLING AUTHOR OF UNDER...,\nAvailability\nIn stock (7 available)\n
5,Travel,A Summer In Europe,£44.34,In stock,"On her thirtieth birthday, Gwendolyn Reese rec...",\nAvailability\nIn stock (7 available)\n
6,Travel,The Great Railway Bazaar,£30.54,In stock,"First published more than thirty years ago, Pa...",\nAvailability\nIn stock (6 available)\n
7,Travel,A Year in Provence (Provence #1),£56.88,In stock,National BestsellerIn this witty and warm-hear...,\nAvailability\nIn stock (6 available)\n
8,Travel,The Road to Little Dribbling: Adventures of an...,£23.21,In stock,The hilarious and loving sequel to a hilarious...,\nAvailability\nIn stock (3 available)\n
9,Travel,Neither Here nor There: Travels in Europe,£38.95,In stock,"Bill Bryson's first travel book, The Lost Cont...",\nAvailability\nIn stock (3 available)\n


## Mystery Books

In [73]:
n=2
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/mystery_3/page-{i}.html'
    automatization_process('Mystery', url, df)

## Historical Books

In [75]:
n=2
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/historical-fiction_4/page-{i}.html'
    automatization_process('Historical', url, df)

## Sequential Art Books

In [77]:
n=4
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/sequential-art_5/page-{i}.html'
    automatization_process('Art', url, df)

In [84]:
len(df[df['gender'] == 'Art'])

75

## Classics Books

In [81]:
automatization_process('Classics', 'https://books.toscrape.com/catalogue/category/books/classics_6/index.html', df)

In [83]:
len(df[df['gender'] == 'Classics'])

19

## Philosophy Books

In [85]:
automatization_process('Philosophy', 'https://books.toscrape.com/catalogue/category/books/philosophy_7/index.html', df)

In [86]:
len(df[df['gender'] == 'Philosophy'])

11

## Romance Books

In [88]:
n = 2 
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/romance_8/page-{i}.html'
    automatization_process('Romance', url, df)

In [89]:
len(df[df['gender'] == 'Romance'])

35

## Womens Fiction Books

In [90]:
automatization_process('Womens Fiction', 'https://books.toscrape.com/catalogue/category/books/womens-fiction_9/index.html', df)

In [91]:
len(df[df['gender'] == 'Womens Fiction'])

17

## Fiction Books

In [92]:
n = 4
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/fiction_10/page-{i}.html'
    automatization_process('Fiction', url, df)

In [93]:
len(df[df['gender'] == 'Fiction'])

65

## Childrens Books

In [95]:
n = 2
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/childrens_11/page-{i}.html'
    automatization_process('Childrens', url, df)

In [96]:
len(df[df['gender'] == 'Childrens'])

29

## Religion Books

In [97]:
automatization_process('Religion', 'https://books.toscrape.com/catalogue/category/books/religion_12/index.html', df)

In [98]:
len(df[df['gender'] == 'Religion'])

7

## Nonfiction Books

In [99]:
n = 6
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/nonfiction_13/page-{i}.html'
    automatization_process('Nonfiction', url, df)

In [100]:
len(df[df['gender'] == 'Nonfiction'])

110

## Music Books

In [101]:
automatization_process('Music', 'https://books.toscrape.com/catalogue/category/books/music_14/index.html', df)

In [102]:
len(df[df['gender'] == 'Music'])

13

## Default Books

In [103]:
n = 8
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/default_15/page-{i}.html'
    automatization_process('Default', url, df)

In [104]:
len(df[df['gender'] == 'Default'])

152

## Science Fiction Books


In [105]:
automatization_process('Science Fiction', 'https://books.toscrape.com/catalogue/category/books/science-fiction_16/index.html', df)

In [106]:
len(df[df['gender'] == 'Science Fiction'])

16

## Sports and Games


In [110]:
automatization_process('Sports and Games', 'https://books.toscrape.com/catalogue/category/books/sports-and-games_17/index.html', df)

In [113]:
len(df[df['gender'] == 'Sports And Games'])

10

In [112]:
df

Unnamed: 0,gender,title,price,stock,description,availability
0,Travel,It's Only the Himalayas,£45.17,In stock,"âWherever you go, whatever you do, just . . ...",\nAvailability\nIn stock (19 available)\n
1,Travel,Full Moon over Noahâs Ark: An Odyssey to Mou...,£49.43,In stock,Acclaimed travel writer Rick Antonson sets his...,\nAvailability\nIn stock (15 available)\n
2,Travel,See America: A Celebration of Our National Par...,£48.87,In stock,To coincide with the 2016 centennial anniversa...,\nAvailability\nIn stock (14 available)\n
3,Travel,Vagabonding: An Uncommon Guide to the Art of L...,£36.94,In stock,With a new foreword by Tim Ferriss â¢Thereâ...,\nAvailability\nIn stock (8 available)\n
4,Travel,Under the Tuscan Sun,£37.33,In stock,A CLASSIC FROM THE BESTSELLING AUTHOR OF UNDER...,\nAvailability\nIn stock (7 available)\n
...,...,...,...,...,...,...
623,Sports And Games,The Book of Basketball: The NBA According to T...,£44.84,In stock,There is only one writer on the planet who pos...,\nAvailability\nIn stock (3 available)\n
624,Sports And Games,"Friday Night Lights: A Town, a Team, and a Dream",£51.22,In stock,Return once again to the enduring account of l...,\nAvailability\nIn stock (3 available)\n
625,Sports And Games,Sugar Rush (Offensive Line #2),£24.42,In stock,Colt Avery is on fire. Dubbed one of the NFL's...,\nAvailability\nIn stock (2 available)\n
626,Sports And Games,Settling the Score (The Summer Games #1),£44.91,In stock,From the USA TODAY Bestselling author of Scori...,\nAvailability\nIn stock (1 available)\n


## Unknown Books

In [114]:
n = 4
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/add-a-comment_18/page-{i}.html'
    automatization_process('Unknown', url, df)

In [115]:
len(df[df['gender'] == 'Unknown'])

67

## Fantasy Books

In [116]:
n = 3
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/fantasy_19/page-{i}.html'
    automatization_process('Fantasy', url, df)

In [117]:
len(df[df['gender'] == 'Fantasy'])

48

## New Adult Books

In [118]:
automatization_process('New Adult', 'https://books.toscrape.com/catalogue/category/books/new-adult_20/index.html', df)

In [119]:
len(df[df['gender'] == 'New Adult'])

6

## Young Adult Books


In [120]:
n = 3
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/young-adult_21/page-{i}.html'
    automatization_process('Young Adult', url, df)

In [121]:
len(df[df['gender'] == 'Young Adult'])

54

## Science Books


In [122]:
automatization_process('Science', 'https://books.toscrape.com/catalogue/category/books/science_22/index.html', df)

In [123]:
len(df[df['gender'] == 'Science'])

14

## Poetry Books


In [124]:
automatization_process('Poetry', 'https://books.toscrape.com/catalogue/category/books/poetry_23/index.html', df)

In [125]:
len(df[df['gender'] == 'Poetry'])

19

## Paranormal Books


In [126]:
automatization_process('Paranormal', 'https://books.toscrape.com/catalogue/category/books/paranormal_24/index.html', df)

In [127]:
len(df[df['gender'] == 'Paranormal'])

1

## Art Books


In [128]:
automatization_process('Art', 'https://books.toscrape.com/catalogue/category/books/art_25/index.html', df)

## Psychology Books


In [129]:
automatization_process('Psychology', 'https://books.toscrape.com/catalogue/category/books/psychology_26/index.html', df)

In [130]:
len(df[df['gender'] == 'Psychology'])

7

## Autobiography Books

In [131]:
automatization_process('Autobiography', 'https://books.toscrape.com/catalogue/category/books/autobiography_27/index.html', df)

In [132]:
len(df[df['gender'] == 'Autobiography'])

9

## Parenting Books


In [133]:
automatization_process('Parenting', 'https://books.toscrape.com/catalogue/category/books/parenting_28/index.html', df)

In [134]:
len(df[df['gender'] == 'Parenting'])

1

## Adult Fiction Books


In [135]:
automatization_process('Adult Fiction', 'https://books.toscrape.com/catalogue/category/books/adult-fiction_29/index.html', df)

In [136]:
len(df[df['gender'] == 'Adult Fiction'])

1

## Humor Books


In [137]:
automatization_process('Humor', 'https://books.toscrape.com/catalogue/category/books/humor_30/index.html', df)

In [138]:
len(df[df['gender'] == 'Humor'])

10

## Horror Books


In [139]:
automatization_process('Horror', 'https://books.toscrape.com/catalogue/category/books/horror_31/index.html', df)

In [140]:
len(df[df['gender'] == 'Horror'])

17

## History


In [141]:
automatization_process('History', 'https://books.toscrape.com/catalogue/category/books/history_32/index.html', df)

In [142]:
len(df[df['gender'] == 'History'])

18

## Food and Drink Books


In [143]:
n = 2
for i in range(1, n+1):
    url = f'https://books.toscrape.com/catalogue/category/books/food-and-drink_33/page-{i}.html'
    automatization_process('Food And Drink', url, df)

In [144]:
len(df[df['gender'] == 'Food And Drink'])

30

## Christian Fiction


In [145]:
automatization_process('Christian', 'https://books.toscrape.com/catalogue/category/books/christian-fiction_34/index.html', df)

In [146]:
len(df[df['gender'] == 'Christian'])

6

## Business Books


In [147]:
automatization_process('Business', 'https://books.toscrape.com/catalogue/category/books/business_35/index.html', df)

In [148]:
len(df[df['gender'] == 'Business'])

12

## Biography Books


In [149]:
automatization_process('Biography', 'https://books.toscrape.com/catalogue/category/books/biography_36/index.html', df)

In [150]:
len(df[df['gender'] == 'Biography'])

5

## Thriller Books


In [151]:
automatization_process('Thriller', 'https://books.toscrape.com/catalogue/category/books/thriller_37/index.html', df)

In [152]:
len(df[df['gender'] == 'Thriller'])

11

## Contemporary Books


In [153]:
automatization_process('Contemporary', 'https://books.toscrape.com/catalogue/category/books/contemporary_38/index.html', df)

In [154]:
len(df[df['gender'] == 'Contemporary'])

3

## Spirituality Books


In [155]:
automatization_process('Spirituality', 'https://books.toscrape.com/catalogue/category/books/spirituality_39/index.html', df)

In [156]:
len(df[df['gender'] == 'Spirituality'])

6

## Academic Books


In [157]:
automatization_process('Academic', 'https://books.toscrape.com/catalogue/category/books/academic_40/index.html', df)

In [158]:
len(df[df['gender'] == 'Academic'])

1

## Self Help Books


In [159]:
automatization_process('Self Help', 'https://books.toscrape.com/catalogue/category/books/self-help_41/index.html', df)

In [160]:
len(df[df['gender'] == 'Self Help'])

5

## Historical Books


In [161]:
automatization_process('Historical', 'https://books.toscrape.com/catalogue/category/books/historical_42/index.html', df)

In [162]:
len(df[df['gender'] == 'Historical'])

28

## Christian Books


In [163]:
automatization_process('Christian', 'https://books.toscrape.com/catalogue/category/books/christian_43/index.html', df)

## Suspense Books

In [164]:
automatization_process('Suspense', 'https://books.toscrape.com/catalogue/category/books/suspense_44/index.html', df)

## Short Stories Books


In [165]:
automatization_process('Short Stories', 'https://books.toscrape.com/catalogue/category/books/short-stories_45/index.html', df)

## Novels Books


In [166]:
automatization_process('Novels', 'https://books.toscrape.com/catalogue/category/books/novels_46/index.html', df)

## Health Books


In [167]:
automatization_process('Health', 'https://books.toscrape.com/catalogue/category/books/health_47/index.html', df)

## Politics Books

In [168]:
automatization_process('Politics', 'https://books.toscrape.com/catalogue/category/books/politics_48/index.html', df)

## Cultural Books


In [169]:
automatization_process('Cultural', 'https://books.toscrape.com/catalogue/category/books/cultural_49/index.html', df)

## Erotica Books


In [170]:
automatization_process('Erotica', 'https://books.toscrape.com/catalogue/category/books/erotica_50/index.html', df)

## Crime Books


In [171]:
automatization_process('Crime', 'https://books.toscrape.com/catalogue/category/books/crime_51/index.html', df)

In [173]:
df

Unnamed: 0,gender,title,price,stock,description,availability
0,Travel,It's Only the Himalayas,£45.17,In stock,"âWherever you go, whatever you do, just . . ...",\nAvailability\nIn stock (19 available)\n
1,Travel,Full Moon over Noahâs Ark: An Odyssey to Mou...,£49.43,In stock,Acclaimed travel writer Rick Antonson sets his...,\nAvailability\nIn stock (15 available)\n
2,Travel,See America: A Celebration of Our National Par...,£48.87,In stock,To coincide with the 2016 centennial anniversa...,\nAvailability\nIn stock (14 available)\n
3,Travel,Vagabonding: An Uncommon Guide to the Art of L...,£36.94,In stock,With a new foreword by Tim Ferriss â¢Thereâ...,\nAvailability\nIn stock (8 available)\n
4,Travel,Under the Tuscan Sun,£37.33,In stock,A CLASSIC FROM THE BESTSELLING AUTHOR OF UNDER...,\nAvailability\nIn stock (7 available)\n
...,...,...,...,...,...,...
1000,Politics,Why the Right Went Wrong: Conservatism--From G...,£52.65,In stock,âDionne's expertise is evident in this finel...,\nAvailability\nIn stock (14 available)\n
1001,Politics,Equal Is Unfair: America's Misguided Fight Aga...,£56.86,In stock,Weâve all heard that the American Dream is v...,\nAvailability\nIn stock (12 available)\n
1002,Cultural,Amid the Chaos,£36.58,In stock,Some people call Eritrea the âNorth Korea of...,\nAvailability\nIn stock (15 available)\n
1003,Erotica,Dark Notes,£19.19,In stock,They call me a slut. Maybe I am.Sometimes I do...,\nAvailability\nIn stock (15 available)\n


In [174]:
df.to_csv('books.csv')