### Carrega libs

In [None]:
# scrape data
import requests
import re

# dataframe
import pandas as pd
import os

# database
from supabase import create_client, Client

Documentação Supabase: https://supabase.com/docs/reference/python/introduction

### Kabum

In [None]:
def get_categoria(path):
    menu_path = path.split('/')
    categoria = menu_path[0] if len(menu_path) > 0 else ""
    return categoria

def get_subcategoria(path):
    menu_path = path.split('/')
    subcategoria = menu_path[1] if len(menu_path) > 1 else ""
    return subcategoria

In [None]:
def scrape_data_kabum_json(url):
    payload = {}
    headers = {}

    response = requests.request("GET", url, headers=headers, data=payload)

    data = response.json()
    products = data['data']
    
    products_list = []
    
    for product in products:
        menu_path = product["attributes"]["menu"]
        categoria = get_categoria(menu_path)
        subcategoria = get_subcategoria(menu_path)
        
        offer = product["attributes"].get("offer", {})
        price_pix = offer.get("price_with_discount") if offer else None
        price = offer.get("price") if offer else None
        
        if price_pix is None:
            price_pix = product["attributes"].get("price_with_discount")
        if price is None:
            price = product["attributes"].get("price")
        
        name = product["attributes"]["title"]
        
        openbox = product["attributes"]["is_openbox"]
        openbox = 1 if openbox else 0

        images = product["attributes"].get("images", [])
        image = images[1] if len(images) > 1 else ""      
          
        description = product["attributes"]["tag_description"]
        
        id_kabum = product["id"]
        url = ("https://www.kabum.com.br/produto/" + str(id_kabum))
        
        product_info = {
            "id_kabum": id_kabum,
            "categoria": categoria,
            "subcategoria": subcategoria,
            "nome": name,
            "preco": price,
            "preco_pix": price_pix,
            "descricao": description,
            "openbox": openbox,
            "imagem": image,
            "site": "kabum",
            "url": url
            #"data": datetime.now().strftime("%d-%m-%Y %H:%M:%S")
        }
        products_list.append(product_info)
        
    return products_list

### Tratamento dos dados

In [None]:
links = [
    # Disco Rígido
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/disco-rigido-hd?page_number=1&page_size=1000&facet_filters=eyJJbnRlcmZhY2UiOlsiU0FUQSJdfQ%3D%3D&sort=most_searched&include=gift',
    
    # Memória RAM
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/memoria-ram?page_number=1&page_size=1000&facet_filters=eyJDb21wYXRpYmlsaWRhZGUiOlsiRGVza3RvcCJdLCJDYXBhY2lkYWRlIjpbIjE2IEdCICgxeCAxNkdCKSIsIjggR0IgKDF4IDhHQikiXX0%3D&sort=most_searched&include=gift',
    
    # SSD
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/ssd-2-5?page_number=1&page_size=1000&facet_filters=eyJjYXRlZ29yeSI6WyJIYXJkd2FyZSJdLCJDYXBhY2lkYWRlIGRlIEFybWF6ZW5hbWVudG8iOlsiMVRCIiwiMlRCIiwiNFRCIl19&sort=most_searched&include=gift',
    
    # Cooler Fan
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/coolers?page_number=1&page_size=1000&facet_filters=eyJDb21wYXRpYmlsaWRhZGUiOlsiSW50ZWwiLCJBTUQiXX0%3D&sort=most_searched&include=gift',
    
    # Placa de Vídeo
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/placa-de-video-vga?page_number=1&page_size=1000&facet_filters=eyJjYXRlZ29yeSI6WyJIYXJkd2FyZSJdfQ%3D%3D&sort=most_searched&include=gift',
    
    # Fonte
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/fontes?page_number=1&page_size=1000&facet_filters=eyJjYXRlZ29yeSI6WyJIYXJkd2FyZSJdLCJDYWJlYW1lbnRvIjpbIlNlbWkgTW9kdWxhciIsIkZ1bGwgTW9kdWxhciJdfQ%3D%3D&sort=most_searched&include=gift',
    
    # Processador
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/processadores?page_number=1&page_size=1000&facet_filters=eyJrYWJ1bV9wcm9kdWN0IjpbInRydWUiXX0%3D&sort=most_searched&include=gift',
    
    # Placa mãe
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/placas-mae?page_number=1&page_size=1000&facet_filters=eyJrYWJ1bV9wcm9kdWN0IjpbInRydWUiXX0%3D&sort=most_searched&include=gift'

    # Smartphone
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/celular-smartphone/smartphones?page_number=1&page_size=1000&facet_filters=eyJrYWJ1bV9wcm9kdWN0IjpbInRydWUiXX0%3D&sort=most_searched&include=gift',

    # Notebooks
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/computadores/notebooks?page_number=1&page_size=1000&facet_filters=eyJTaXN0ZW1hIE9wZXJhY2lvbmFsIjpbImlPUyIsIkxpbnV4IiwiV2luZG93cyJdfQ%3D%3D&sort=most_searched&include=gift'

]

In [None]:
produtos = []
for link in links:
    produtos.append(scrape_data_kabum_json(link))

### Armazena em database

In [None]:
supabase_url = os.getenv('SUPABASE_URL')
supabase_key = os.getenv('SUPABASE_KEY') 

supabase: Client = create_client(supabase_url, supabase_key)

In [None]:
for produto in produtos:
    for item in produto: 
        data, count = supabase.table('produtosKabum').insert(item).execute()

In [None]:
response = supabase.table('produtosKabum').select("*").execute()

df = pd.DataFrame(response.data)

In [None]:
df.head(5)