## Instalando Selenium

In [1]:
# pip install selenium

## Importando Bibliotecas

In [2]:
import pandas as pd
import re
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

## Inicializando uma instância do Google Chrome

In [3]:
service = Service()

options = webdriver.ChromeOptions()

driver = webdriver.Chrome(service=service, options=options)

In [4]:
url = 'https://books.toscrape.com/'

driver.get(url)

## Encontrando Elementos do HTML

- find_element(By.ID, "id")
- find_element(By.NAME, "name")
- find_element(By.XPATH, "xpath")
- find_element(By.LINK_TEXT, "link text")
- find_element(By.PARTIAL_LINK_TEXT, "partial link text")
- **find_element(By.TAG_NAME, "tag name")**
- **find_element(By.CLASS_NAME, "class name")**
- find_element(By.CSS_SELECTOR, "css selector")

### Títulos dos livros

In [5]:
driver.find_elements(By.TAG_NAME, "h3")[0].find_element(By.TAG_NAME, "a").get_attribute('title')

'A Light in the Attic'

In [6]:
lista_elementos_titulos = driver.find_elements(By.TAG_NAME, "h3")

In [7]:
lista_titulos = []

for i in range(len(lista_elementos_titulos)):
    lista_titulos.append(lista_elementos_titulos[i].find_element(By.TAG_NAME, "a").get_attribute('title'))

In [8]:
lista_titulos

['A Light in the Attic',
 'Tipping the Velvet',
 'Soumission',
 'Sharp Objects',
 'Sapiens: A Brief History of Humankind',
 'The Requiem Red',
 'The Dirty Little Secrets of Getting Your Dream Job',
 'The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull',
 'The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics',
 'The Black Maria',
 'Starving Hearts (Triangular Trade Trilogy, #1)',
 "Shakespeare's Sonnets",
 'Set Me Free',
 "Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)",
 'Rip it Up and Start Again',
 'Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991',
 'Olio',
 'Mesaerion: The Best Science Fiction Stories 1800-1849',
 'Libertarianism for Beginners',
 "It's Only the Himalayas"]

### Preço dos livros

In [9]:
lista_elemento_preco = driver.find_elements(By.CLASS_NAME, "price_color")

In [10]:
lista_preco = []

for i in range(len(lista_elemento_preco)):
    lista_preco.append(lista_elemento_preco[i].text)

In [11]:
lista_preco

['£51.77',
 '£53.74',
 '£50.10',
 '£47.82',
 '£54.23',
 '£22.65',
 '£33.34',
 '£17.93',
 '£22.60',
 '£52.15',
 '£13.99',
 '£20.66',
 '£17.46',
 '£52.29',
 '£35.02',
 '£57.25',
 '£23.88',
 '£37.59',
 '£51.33',
 '£45.17']

### Quantidade em Estoque

In [12]:
lista_elementos_titulos[0].find_element(By.TAG_NAME,'a').click()

In [13]:
driver.find_element(By.CLASS_NAME, 'instock').text

'In stock (22 available)'

In [18]:
driver.back()

In [15]:
lista_clicks = []

for i in range(len(lista_elementos_titulos)):
    lista_clicks.append(lista_elementos_titulos[i].find_element(By.TAG_NAME, "a"))

In [16]:
# lista_clicks[1].click()

In [19]:
lista_qtd = []

for link in lista_clicks:
        link.click()
        
        lista_qtd.append(driver.find_element(By.CLASS_NAME, 'instock').text)
        
        driver.back()

In [23]:
lista_qtd

['In stock (22 available)',
 'In stock (20 available)',
 'In stock (20 available)',
 'In stock (20 available)',
 'In stock (20 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)',
 'In stock (19 available)']

In [24]:
lista_qtd_regex = []

for i in range(len(lista_qtd)):
    lista_qtd_regex.append(int(re.findall('[0-9]+', lista_qtd[i])[0]))

In [25]:
lista_qtd_regex

[22,
 20,
 20,
 20,
 20,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19,
 19]

## Tabulando os dados

In [26]:
data = {'titulo': lista_titulos, 'preco': lista_preco, 'qtd_disponivel': lista_qtd_regex}

df = pd.DataFrame(data=data)

In [27]:
df

Unnamed: 0,titulo,preco,qtd_disponivel
0,A Light in the Attic,£51.77,22
1,Tipping the Velvet,£53.74,20
2,Soumission,£50.10,20
3,Sharp Objects,£47.82,20
4,Sapiens: A Brief History of Humankind,£54.23,20
5,The Requiem Red,£22.65,19
6,The Dirty Little Secrets of Getting Your Dream...,£33.34,19
7,The Coming Woman: A Novel Based on the Life of...,£17.93,19
8,The Boys in the Boat: Nine Americans and Their...,£22.60,19
9,The Black Maria,£52.15,19
