In [1]:
from re import findall
from requests import Session
import re
from lxml.html import fromstring
from unidecode import unidecode

In [2]:
from math import ceil

In [3]:
def get_tree_from_url(session, url, cookies={}):
    response = session.get(url, cookies=cookies)
    return fromstring(response.text)


def extract_price(price):
    return int(re.sub(r"\s|R|\$|,", "", price)) / 100


def process_text(text):
    text = unidecode(text)
    text = text.replace(',', '')
    text = text.lower()
    return text

In [4]:
BASE_URL = 'https://www.comper.com.br/'
TOKEN_URL = "https://www.comper.com.br:443/api/sessions/"
DEPS_URL = "https://www.comper.com.br/api/catalog_system/pub/category/tree/3/"
PAGINATION = '//script[contains(text(), "window.location.hash = pageclickednumber")]'
PAGINATION_URL = "\$\('\#ResultItems_%s'\).load\('(?P<a>[^']+)'"
PAGE_COUNT = 'pagecount_%s = (?P<a>\d+);'

PRODUCTS = '//div[@class="shelf-item"]'
PRODUCT_NAME = 'div/h3[@class="shelf-item__title"]/a'
PRODUCT_BRAND = 'div/div[@class="shelf-item__brand"]/a'
PRODUCT_URL = 'div/h3/a/@href'
PRICE_REGULAR = 'div/div/div/span[@class="shelf-item__list-price"]'
PRICE = 'div/div/div/span/div/div/strong'

In [5]:
def set_token(session, store_id):
    """
    Set tokens for webscrapping session
    """
    session.get(url=f"https://www.comper.com.br:443/Site/Track.aspx?sc={store_id}")
    token_json={"public":{}}
    session.post(TOKEN_URL, cookies=session.cookies, json=token_json)
    tokens = session.cookies.get_dict()
    return tokens 

In [6]:
def get_json_deps(session):
    """
    Get json departments
    """
    json_response = session.get(DEPS_URL).json()
    return json_response

In [7]:
def get_departments(session):
    all_departments = {}
    for department in get_json_deps(session):
        department_name = department['name']
        if (department['hasChildren']) and (department['children'] != []):
            for sub_department in department['children']:
                sub_department_name = sub_department['name']
                category_name = process_text(f"{department_name} > {sub_department_name}")
                url = sub_department['url']
                url = url.replace('.vtexcommercestable.', '')
                all_departments[sub_department['url']] = category_name
                       
        else:
            category_name = process_text(f"{department_name}")
            all_departments[department['url']] = category_name   
    
    return all_departments

In [8]:
def get_pagination(tree):
    try:
        pager_element = tree.xpath('//div[@class="pager top"]')[0]
    except IndexError:
        return None, None
    
    pager_id = pager_element.attrib['id'].split('_')[1]
    pagination_script = tree.xpath(PAGINATION)[0]
    script = pagination_script.text
    page_count = findall(PAGE_COUNT % pager_id, script)
    page_count = int(page_count[0])
    pagination_url = findall(PAGINATION_URL % pager_id, script)[0]
    pagination_url = pagination_url.replace('PS=32', 'PS=50')
    total_products = 32 * page_count
    page_count = ceil(total_products/50)
    return page_count, pagination_url

In [9]:
def get_pages(session, department_url, cookies):
    tree = get_tree_from_url(session, department_url, cookies=cookies)
    max_pages, pagination_url = get_pagination(tree)
    for i in range(1, max_pages + 1):
        product_page =  BASE_URL + pagination_url + str(i)
        print(product_page)
        yield get_tree_from_url(session, product_page, cookies=cookies) 


In [10]:
def extract_products(page):
    products_info = []
    for item in page.xpath(PRODUCTS):
        row = {}
        product_sku = item.xpath('@data-product-id')[0]
        product_name = item.xpath(PRODUCT_NAME)[0].text
        product_brand = item.xpath(PRODUCT_BRAND)[0].text
        product_url = item.xpath(PRODUCT_URL)[0]        
        try:
            regular_price = extract_price(item.xpath(PRICE_REGULAR)[0].text.strip())
            best_price = extract_price(item.xpath(PRICE)[0].text)
        except:
            regular_price, best_price = None, None
        
        products_info.append(
            {
                'sku' : product_sku,
                'url': product_url,
                'name': product_name,
                'brand': product_brand,
                'price': best_price,
                'regular_price': regular_price
            }
        )
    return products_info


In [11]:
session = Session()
tokens = set_token(session, 1)
print(tokens)

{'ISSMB': 'ScreenMedia=0&UserAcceptMobile=False', 'SGTS': '8BA93BB040C3D9E8B0D12BD62A3579EC', 'VTEXSC': 'sc=1', 'vtex_segment': 'eyJjYW1wYWlnbnMiOm51bGwsImNoYW5uZWwiOiIyIiwicHJpY2VUYWJsZXMiOm51bGwsInJlZ2lvbklkIjpudWxsLCJ1dG1fY2FtcGFpZ24iOm51bGwsInV0bV9zb3VyY2UiOm51bGwsInV0bWlfY2FtcGFpZ24iOm51bGwsImN1cnJlbmN5Q29kZSI6IkJSTCIsImN1cnJlbmN5U3ltYm9sIjoiUiQiLCJjb3VudHJ5Q29kZSI6IkJSQSIsImN1bHR1cmVJbmZvIjoicHQtQlIiLCJjaGFubmVsUHJpdmFjeSI6InB1YmxpYyJ9', 'vtex_session': 'eyJhbGciOiJFUzI1NiIsImtpZCI6IjlEOERFRUUzMjVDMEE3RUI4MzIzQTk0QUYwMkUyRTRBMDc2RjdCMzAiLCJ0eXAiOiJqd3QifQ.eyJhY2NvdW50LmlkIjoiZTkxYzY4YjUtMjNiMy00ZGY1LThhNmQtZWE1Mzg4ZjFjYzM0IiwiaWQiOiI3ZmE5NDk3NS1lNGM1LTRhZmItOGU3Mi1hZTU1MWRmNzM4NmUiLCJ2ZXJzaW9uIjoyLCJzdWIiOiJzZXNzaW9uIiwiYWNjb3VudCI6InNlc3Npb24iLCJleHAiOjE3MDA1Mzg3MzksImlhdCI6MTY5OTg0NzUzOSwiaXNzIjoidG9rZW4tZW1pdHRlciIsImp0aSI6ImQ0MTNmYTUzLTI0ZjMtNDg2Ni1hMmRlLWVhNGU2YTM3NDU1NiJ9.sVTyDypVeJT8SQAVFFRM2sVL1T5cUq52gk6ccgRkjj22quqZowo6UY6qxdZ61VSe31BkIOmmNRlJSiTrRlNfbA'}


In [12]:
from time import sleep

In [None]:
for department_url in list(get_departments(session).keys()):
    print(department_url)
    for page in get_pages(session, department_url, tokens):
            products = extract_products(page)
            sleep(100)
            for i in products:
                print(i)
            print('Lenght - - - ', len(products))

https://www.comper.com.br/bebidas/aguas-chas-e-energeticos
https://www.comper.com.br//buscapagina?fq=C%3a%2f1281%2f1295%2f&PS=50&sl=8c91f63c-18aa-4c65-a2da-7ee53dbab287&cc=32&sm=0&PageNumber=1
{'sku': '1460609', 'url': 'https://www.comper.com.br/energetico-monster-energy-473ml/p', 'name': 'Energético Monster Energy 473ml', 'brand': 'Monster', 'price': None, 'regular_price': None}
{'sku': '86096', 'url': 'https://www.comper.com.br/energetico-red-bull-sugar-free-250ml/p', 'name': 'Energético Red Bull Sugar Free 250ml', 'brand': 'Red Bull', 'price': 8.29, 'regular_price': 9.49}
{'sku': '2348853', 'url': 'https://www.comper.com.br/energetico-monster-ultra-violet-zero-acucar-473ml/p', 'name': 'Energético Monster Ultra Violet Zero Açúcar 473ml', 'brand': 'Monster', 'price': None, 'regular_price': None}
{'sku': '2344270', 'url': 'https://www.comper.com.br/agua-mineral-purissima-com-gas-1-5-litro/p', 'name': 'Água Mineral Puríssima com Gás 1,5 Litro', 'brand': 'Purissima', 'price': None, 'regu

{'sku': '659312', 'url': 'https://www.comper.com.br/energetico-tnt-269ml/p', 'name': 'Energético TNT 269ml', 'brand': 'Tnt', 'price': None, 'regular_price': None}
{'sku': '577057', 'url': 'https://www.comper.com.br/energetico-red-bull-473ml/p', 'name': 'Energético Red Bull 473ml', 'brand': 'Red Bull', 'price': None, 'regular_price': None}
{'sku': '524492', 'url': 'https://www.comper.com.br/agua-de-coco-ducoco-1-litro/p', 'name': 'Água de Coco Ducoco 1 Litro', 'brand': 'Ducoco', 'price': None, 'regular_price': None}
{'sku': '496162', 'url': 'https://www.comper.com.br/agua-de-coco-kerococo-1-litro/p', 'name': 'Água de Coco Kerococo 1 Litro', 'brand': 'Kero Coco', 'price': None, 'regular_price': None}
{'sku': '301850', 'url': 'https://www.comper.com.br/energetico-red-bull-355ml/p', 'name': 'Energético Red Bull 355ml', 'brand': 'Red Bull', 'price': None, 'regular_price': None}
{'sku': '116688', 'url': 'https://www.comper.com.br/agua-de-coco-sococo-1-litro/p', 'name': 'Água de Coco Sococo 1

https://www.comper.com.br//buscapagina?fq=C%3a%2f1281%2f1301%2f&PS=50&sl=8c91f63c-18aa-4c65-a2da-7ee53dbab287&cc=32&sm=0&PageNumber=1
{'sku': '2301822', 'url': 'https://www.comper.com.br/refrigerante-coca-cola-2-litros/p', 'name': 'Refrigerante Coca-Cola 2 Litros', 'brand': 'Coca Cola', 'price': None, 'regular_price': None}
{'sku': '2784548', 'url': 'https://www.comper.com.br/refresco-em-po-tang-maracuja-18g/p', 'name': 'Refresco em Pó Tang Maracujá 18g', 'brand': 'Tang', 'price': None, 'regular_price': None}
{'sku': '2784459', 'url': 'https://www.comper.com.br/refresco-em-po-tang-uva-18g/p', 'name': 'Refresco em Pó Tang Uva 18g', 'brand': 'Tang', 'price': None, 'regular_price': None}
{'sku': '2608588', 'url': 'https://www.comper.com.br/suco-integral-life-nectar-de-laranja-da-fazenda-900ml/p', 'name': 'Suco Integral Life Néctar de Laranja da Fazenda 900ml', 'brand': 'Life Sucos', 'price': None, 'regular_price': None}
{'sku': '1954083', 'url': 'https://www.comper.com.br/suco-integral-pr

{'sku': '2784564', 'url': 'https://www.comper.com.br/refresco-em-po-tang-manga-18g/p', 'name': 'Refresco em Pó Tang Manga 18g', 'brand': 'Tang', 'price': None, 'regular_price': None}
{'sku': '2774224', 'url': 'https://www.comper.com.br/refresco-em-po-fresh-maracuja-15g/p', 'name': 'Refresco em Pó Fresh Maracujá 15g', 'brand': 'Fresh', 'price': None, 'regular_price': None}
{'sku': '2755262', 'url': 'https://www.comper.com.br/suco-mitto-manga-integral-1-litro/p', 'name': 'Suco Mitto Manga Integral 1 Litro', 'brand': 'Mitto', 'price': None, 'regular_price': None}
{'sku': '2755211', 'url': 'https://www.comper.com.br/suco-mitto-tangerina-integral-1-litro/p', 'name': 'Suco Mitto Tangerina Integral 1 Litro', 'brand': 'Mitto', 'price': None, 'regular_price': None}
{'sku': '2753596', 'url': 'https://www.comper.com.br/suco-mitto-uva-tinto-integral-1-litro/p', 'name': 'Suco Mitto Uva Tinto Integral 1 Litro', 'brand': 'Mitto', 'price': None, 'regular_price': None}
{'sku': '2753456', 'url': 'https:

{'sku': '741930', 'url': 'https://www.comper.com.br/suco-pronto-maguary-nectar-de-uva-200ml/p', 'name': 'Suco Pronto Maguary Néctar de Uva 200ml', 'brand': 'Maguary', 'price': None, 'regular_price': None}
{'sku': '687502', 'url': 'https://www.comper.com.br/refrigerante-fanta-laranja-1-5-litro/p', 'name': 'Refrigerante Fanta Laranja 1,5 Litro', 'brand': 'Fanta', 'price': None, 'regular_price': None}
{'sku': '679984', 'url': 'https://www.comper.com.br/suco-pronto-maguary-light-laranja-tetra-pack-1-litro/p', 'name': 'Suco Pronto Maguary Light Laranja Tetra Pack 1 Litro', 'brand': 'Maguary', 'price': None, 'regular_price': None}
{'sku': '656364', 'url': 'https://www.comper.com.br/agua-tonica-antarctica-zero-350ml/p', 'name': 'Água Tônica Antarctica Zero Açúcar Lata 350ml', 'brand': 'Antarctica', 'price': None, 'regular_price': None}
{'sku': '653845', 'url': 'https://www.comper.com.br/suco-pronto-del-valle-frut-uva-450ml/p', 'name': 'Suco Pronto Del Valle Frut Uva 450ml', 'brand': 'Del Vall

{'sku': '2619237', 'url': 'https://www.comper.com.br/suco-life-nectar-de-caju-da-fazenda-900ml/p', 'name': 'Suco Life Néctar de Caju da Fazenda 900ml', 'brand': 'Outros', 'price': None, 'regular_price': None}
{'sku': '2607468', 'url': 'https://www.comper.com.br/refrigerante-guarana-antarctica-natu-sem-acucar-lata-260ml/p', 'name': 'Refrigerante Guaraná Antarctica Natu Sem Açúcar Lata 260ml', 'brand': 'Antarctica', 'price': None, 'regular_price': None}
{'sku': '2588021', 'url': 'https://www.comper.com.br/suco-integral-prat-s-larranja--acerola-e-maca-900ml/p', 'name': "Suco Integral Prat's Larranja, Acerola e Maçã 900ml", 'brand': 'Prats', 'price': None, 'regular_price': None}
{'sku': '2586789', 'url': 'https://www.comper.com.br/suco-pronto-del-valle-kapo-maca-200ml/p', 'name': 'Suco Pronto Del Valle Kapo Maçã 200ml', 'brand': 'Del Valle', 'price': None, 'regular_price': None}
{'sku': '2583828', 'url': 'https://www.comper.com.br/refresco-em-po-mid-abacaxi-20g/p', 'name': 'Refresco em Pó 

{'sku': '1177265', 'url': 'https://www.comper.com.br/refresco-em-po-tang-morango-25g/p', 'name': 'Refresco em Pó Tang Morango 25g', 'brand': 'Tang', 'price': None, 'regular_price': None}
{'sku': '1177249', 'url': 'https://www.comper.com.br/refresco-em-po-tang-tangerina-25g/p', 'name': 'Refresco em Pó Tang Tangerina 25g', 'brand': 'Tang', 'price': None, 'regular_price': None}
{'sku': '1177230', 'url': 'https://www.comper.com.br/refresco-em-po-tang-manga-25g/p', 'name': 'Refresco em Pó Tang Manga 25g', 'brand': 'Tang', 'price': None, 'regular_price': None}
{'sku': '1177192', 'url': 'https://www.comper.com.br/refresco-em-po-tang-maracuja-25g/p', 'name': 'Refresco em Pó Tang Maracujá 25g', 'brand': 'Tang', 'price': None, 'regular_price': None}
{'sku': '1177184', 'url': 'https://www.comper.com.br/refresco-em-po-tang-limao-25g/p', 'name': 'Refresco em Pó Tang Limão 25g', 'brand': 'Tang', 'price': None, 'regular_price': None}
{'sku': '1177168', 'url': 'https://www.comper.com.br/refresco-em-po

{'sku': '64017', 'url': 'https://www.comper.com.br/suco-pronto-maguary-nectar-de-maracuja-500ml/p', 'name': 'Suco Pronto Maguary Néctar de Maracujá 500ml', 'brand': 'Maguary', 'price': None, 'regular_price': None}
{'sku': '31577', 'url': 'https://www.comper.com.br/suco-integral-aurora-uva-tinto-1-5-litro/p', 'name': 'Suco Integral Aurora Uva Tinto 1,5 Litro', 'brand': 'Aurora', 'price': None, 'regular_price': None}
Lenght - - -  2
https://www.comper.com.br/bebidas/cervejas
https://www.comper.com.br//buscapagina?fq=C%3a%2f1281%2f1305%2f&PS=50&sl=8c91f63c-18aa-4c65-a2da-7ee53dbab287&cc=32&sm=0&PageNumber=1
{'sku': '2764407', 'url': 'https://www.comper.com.br/cerveja-heineken-lata-269ml/p', 'name': 'Cerveja Heineken Lata 269ml', 'brand': 'Heineken', 'price': 3.49, 'regular_price': 3.89}
{'sku': '2528673', 'url': 'https://www.comper.com.br/cerveja-skol-pilsen-lata-269ml-pack-com-15-unidades/p', 'name': 'Cerveja Skol Pilsen Lata 269ml Pack com 15 Unidades', 'brand': 'Skol', 'price': None, '