In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

from selenium import webdriver

First, let's write a function that extracts the offer. We'll use selenium because Extra uses Javascript to dinamically load content, so beautiful soup won't do. We'll use css selectors to find the target elements

In [63]:
def extract_product_offers(url):
    """ (string) -> Dataframe
        RECEIVES a webpage url, scraps the page and
        RETURNS a Dataframe containing relevant data. 
    """

    wrapper_class_name = 'styles__Wrapper-crf3j2-0 eFgqLx'
    product_wrapper_class = 'styles__ProductGridItem-crf3j2-1'

    wrapper_class_name = 'div.styles__Wrapper-crf3j2-0.eFgqLx'

    image_class_name = 'a.styles__CardMediaWrapper-sc-1gzprri-4.WLhYY'
    product_unavailable = 'span.styles__ProductIsUnavailable-sc-1idhk7x-9'
    discount_class = 'div.styles__WrapperProductCard-sc-1gzprri-10.irDXTp'
    driver = webdriver.Firefox() 
    driver.get(url)


    wrapper = driver.find_element_by_css_selector(wrapper_class_name)
    products = driver.find_elements_by_class_name(product_wrapper_class)

    products_df = pd.DataFrame([], columns=['Title', 'Url', 'Sdk', 'discount', 'unavailable'])
    for product in products:
        image = product.find_element_by_css_selector(image_class_name)
        unavailable = len(product.find_elements_by_css_selector(product_unavailable)) != 0

        discounts = image.find_elements_by_css_selector(discount_class)

        if (len(discounts) == 0):
            discount = None
        else:
            discount = discounts[0].get_attribute('innerText')
            # Because the discount text contains a minus sign prepending it, we remove it
            discount = discount[1:]
        

        title = image.get_attribute('title')
        url = image.get_attribute('href')
        ind = url.rfind('/')+1
        sdk = url[ind:]
        products_df = products_df.append({'Title': title, 'Url': url, 'Sdk': sdk, 'discount': discount, 'unavailable': unavailable}, ignore_index=True)
    driver.quit()
    return products_df

We want only products that contain a discount. Because the website accepts the discount as a parameter "des", we just pass it allong with the url.

In [75]:

refrigerators_url = 'https://www.extra.com.br/c/eletrodomesticos/refrigeradores/?filtro=c13_c14_c13&des=0TO25'

tvs_url = 'https://www.extra.com.br/c/tv-e-video/televisores/\
          ?filtro=c1_c2&nid=202485&des=0TO100'

printers_url = 'https://www.extra.com.br/c/informatica/z\
                impressoras/?filtro=c56_c61&des=0TO100'


In [65]:

refrigerators = extract_product_offers(refrigerators_url)
tvs = extract_product_offers(tvs_url)
printers = extract_product_offers(printers_url)


In [70]:
import csv

In [72]:
refrigerators.to_csv('refrigerators.csv', index=False)

In [73]:
tvs.to_csv('tvs.csv', index=False)

In [74]:
printers.to_csv('printers.csv', index=False)

Finally, let's print each dataframe found:

In [76]:
refrigerators

Unnamed: 0,Title,Url,Sdk,discount,unavailable
0,Refrigerador Consul Frost Free CRM43NK com 2 P...,https://www.extra.com.br/refrigerador-consul-f...,9600890,4%,False
1,Refrigerador Consul Frost Free Facilite CRB39A...,https://www.extra.com.br/refrigerador-consul-f...,10153519,7%,False
2,Refrigerador Consul Frost Free CRM39AB Duplex ...,https://www.extra.com.br/refrigerador-consul-f...,50004596,4%,False
3,Refrigerador Electrolux DF44 com Prateleira Re...,https://www.extra.com.br/refrigerador-electrol...,15243319,7%,False
4,Refrigerador Electrolux Frost Free DB53 Bottom...,https://www.extra.com.br/refrigerador-electrol...,11688788,3%,False
5,Refrigerador Electrolux DF56 com Icemax Branco...,https://www.extra.com.br/refrigerador-electrol...,15243314,6%,False
6,Refrigerador Brastemp Side Inverse BRO80AB com...,https://www.extra.com.br/refrigerador-brastemp...,4420011,4%,False
7,Refrigerador Panasonic Frost Free NR-BB53GV3W ...,https://www.extra.com.br/refrigerador-panasoni...,55007189,4%,False
8,Refrigerador Consul CRE44AB Frost Free Duplex ...,https://www.extra.com.br/refrigerador-consul-c...,50003891,3%,True
9,Refrigerador Electrolux Infinity DF80X Frost F...,https://www.extra.com.br/refrigerador-electrol...,9344,13%,True


In [77]:
tvs

Unnamed: 0,Title,Url,Sdk,discount,unavailable
0,"Smart TV LED 75"" UHD 4K LG 75UN8000PSB Wi-Fi, ...",https://www.extra.com.br/smart-tv-led-75-uhd-4...,55007677,5%,False
1,"Smart TV LED 55"" UHD 4K LG 55NANO81 NanoCell, ...",https://www.extra.com.br/smart-tv-led-55-uhd-4...,55007673,23%,False
2,"Smart TV LED 65"" UHD 4K LG 65NANO86 NanoCell, ...",https://www.extra.com.br/smart-tv-led-65-uhd-4...,55007670,5%,False
3,"Smart TV LED 70"" UHD 4K LG 70UN7310PSC Wi-Fi, ...",https://www.extra.com.br/smart-tv-led-70-uhd-4...,55007679,9%,False
4,"Smart TV LED 65"" 4K TCL 65P8M com Android TV, ...",https://www.extra.com.br/smart-tv-led-65-4k-tc...,55000479,3%,False
5,"Smart TV QLED 75"" UHD 8K Samsung 75Q800T Proce...",https://www.extra.com.br/smart-tv-qled-75-uhd-...,55006512,15%,False
6,"Smart TV LED 65"" UHD 4K LG 65NANO81 NanoCell, ...",https://www.extra.com.br/smart-tv-led-65-uhd-4...,55007672,8%,False
7,"Smart TV LED 82"" UHD 4K LG 82UN8000PSB Wi-Fi, ...",https://www.extra.com.br/smart-tv-led-82-uhd-4...,55007676,21%,False
8,"Smart TV QLED 65"" UHD 8K Samsung 65Q800T Proce...",https://www.extra.com.br/smart-tv-qled-65-uhd-...,55006511,33%,False
9,"Smart TV LED 65"" UHD 4K LG 65UN7100PSA Wi-Fi, ...",https://www.extra.com.br/smart-tv-led-65-uhd-4...,55014287,9%,False


In [78]:
printers

Unnamed: 0,Title,Url,Sdk,discount,unavailable
0,Toner Compativel MLT D203U Preto 15K Fast Prin...,https://www.extra.com.br/toner-compativel-mlt-...,13012324,,False
1,Impressora Tanque de Tinta Epson EcoTank L1800...,https://www.extra.com.br/impressora-tanque-de-...,9195161,6%,True
2,Impressora HP LaserJet Pro P1102w com ePrint -...,https://www.extra.com.br/Impressora-HP-LaserJe...,1706498,26%,True
3,Impressora HP OfficeJet Pro 8210 - Preto,https://www.extra.com.br/Impressora-HP-OfficeJ...,8182623,7%,True
4,Impressora EPSON Matricial LX-350 EDG - BRCC24021,https://www.extra.com.br/impressora-epson-matr...,3720616,9%,True
5,Impressora HP Laserjet Pro CP1025 CE913A#696,https://www.extra.com.br/impressora-hp-laserje...,406095,19%,True
6,Impressora HP LaserJet Pro M15w Wireless,https://www.extra.com.br/impressora-hp-laserje...,12895633,21%,True
7,Impressora Laser Mono Brother HL-L6202DW 46ppm...,https://www.extra.com.br/impressora-laser-mono...,9803133,5%,True
