### Carrega libs

In [1]:
# scrape data
from bs4 import BeautifulSoup as bs
import requests
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime

# dataframe
import pandas as pd
import os

# database
import sqlite3

### Kabum

In [2]:
def get_categoria(path):
    menu_path = path.split('/')
    categoria = menu_path[0] if len(menu_path) > 0 else ""
    return categoria

def get_subcategoria(path):
    menu_path = path.split('/')
    subcategoria = menu_path[1] if len(menu_path) > 1 else ""
    return subcategoria

In [3]:
def scrape_data_kabum_json(url):
    payload = {}
    headers = {}

    response = requests.request("GET", url, headers=headers, data=payload)

    data = response.json()
    products = data['data']
    
    products_list = []
    
    for product in products:
        menu_path = product["attributes"]["menu"]
        categoria = get_categoria(menu_path)
        subcategoria = get_subcategoria(menu_path)
        
        offer = product["attributes"].get("offer", {})
        price_pix = offer.get("price_with_discount") if offer else None
        price = offer.get("price") if offer else None
        
        if price_pix is None:
            price_pix = product["attributes"].get("price_with_discount")
        if price is None:
            price = product["attributes"].get("price")
        
        name = product["attributes"]["title"]
        
        openbox = product["attributes"]["is_openbox"]
        openbox = 1 if openbox else 0

        images = product["attributes"].get("images", [])
        image = images[1] if len(images) > 1 else ""      
          
        description = product["attributes"]["tag_description"]
        
        id = product["id"]
        url = ("https://www.kabum.com.br/produto/" + str(id))
        
        product_info = {
            "id": id,
            "categoria": categoria,
            "subcategoria": subcategoria,
            "nome": name,
            "preco": price,
            "preco_pix": price_pix,
            "descricao": description,
            "openbox": openbox,
            "imagem": image,
            "site": "kabum",
            "url": url,
            "data": datetime.now().strftime("%d-%m-%Y %H:%M:%S")
        }
        products_list.append(product_info)
        
    return products_list

### Tratamento dos dados

In [4]:
links = [
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/disco-rigido-hd?page_number=1&page_size=1000&facet_filters=eyJJbnRlcmZhY2UiOlsiU0FUQSJdfQ%3D%3D&sort=most_searched&include=gift',
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/memoria-ram?page_number=2&page_size=1000&facet_filters=eyJDb21wYXRpYmlsaWRhZGUiOlsiRGVza3RvcCJdLCJDYXBhY2lkYWRlIjpbIjE2IEdCICgxeCAxNkdCKSIsIjggR0IgKDF4IDhHQikiXX0%3D&sort=most_searched&include=gift',
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/ssd-2-5?page_number=3&page_size=1000&facet_filters=eyJjYXRlZ29yeSI6WyJIYXJkd2FyZSJdLCJDYXBhY2lkYWRlIGRlIEFybWF6ZW5hbWVudG8iOlsiMVRCIiwiMlRCIiwiNFRCIl19&sort=most_searched&include=gift',
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/coolers?page_number=2&page_size=100&facet_filters=eyJDb21wYXRpYmlsaWRhZGUiOlsiSW50ZWwiLCJBTUQiXX0%3D&sort=most_searched&include=gift',
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/placa-de-video-vga?page_number=2&page_size=1000&facet_filters=eyJjYXRlZ29yeSI6WyJIYXJkd2FyZSJdfQ%3D%3D&sort=most_searched&include=gift',
    'https://servicespub.prod.api.aws.grupokabum.com.br/catalog/v2/products-by-category/hardware/fontes?page_number=2&page_size=1000&facet_filters=eyJjYXRlZ29yeSI6WyJIYXJkd2FyZSJdLCJDYWJlYW1lbnRvIjpbIlNlbWkgTW9kdWxhciIsIkZ1bGwgTW9kdWxhciJdfQ%3D%3D&sort=most_searched&include=gift',
    
]

In [5]:
produtos = []
for link in links:
    produtos.append(scrape_data_kabum_json(link))

Cria dataframe e CSV

In [6]:
dfs = []
for produto in produtos:
    df = pd.DataFrame(produto)
    dfs.append(df)

df_kabum = pd.concat(dfs, ignore_index=True)

In [7]:
df_kabum.head(5)

Unnamed: 0,id,categoria,subcategoria,nome,preco,preco_pix,descricao,openbox,imagem,site,url,data
0,100916,Hardware,Disco Rígido (HD),"HD Seagate 2TB BarraCuda, 3.5', SATA - ST2000D...",494.11,419.99,"Um desempenho robusto e confiabilidade, Cache ...",0,https://images.kabum.com.br/produtos/fotos/100...,kabum,https://www.kabum.com.br/produto/100916,16-06-2024 16:48:30
1,95803,Hardware,Disco Rígido (HD),"HD Seagate 4TB BarraCuda, 3.5', SATA - ST4000D...",823.52,699.99,O HD perfeito para armazenamento de todos os s...,0,https://images.kabum.com.br/produtos/fotos/958...,kabum,https://www.kabum.com.br/produto/95803,16-06-2024 16:48:30
2,472212,Hardware,Disco Rígido (HD),"HD Western Digital Purple, 2TB, 64mb, Sata 3 -...",505.87,429.99,Conte com a confiabilidade e desempenho em dis...,0,,kabum,https://www.kabum.com.br/produto/472212,16-06-2024 16:48:30
3,460466,Hardware,Disco Rígido (HD),"HD WD Red Plus, 4TB, 5400 RPM, 3.5', SATA - WD...",823.52,699.99,Com desempenho confiável e um tempo de ativida...,0,,kabum,https://www.kabum.com.br/produto/460466,16-06-2024 16:48:30
4,460467,Hardware,Disco Rígido (HD),"HD WD Red Plus, 6TB, 5400 RPM, 3.5', SATA - WD...",1117.64,949.99,Com desempenho confiável e um tempo de ativida...,0,https://images.kabum.com.br/produtos/fotos/460...,kabum,https://www.kabum.com.br/produto/460467,16-06-2024 16:48:30


### Armazena em database

In [8]:
if not os.path.exists('data'):
    os.makedirs('data')
data_path = os.path.join('data', 'mydb.db')

conn = sqlite3.connect(data_path)
cur = conn.cursor()

cur.execute("""
CREATE TABLE IF NOT EXISTS produtosKabum (
    idDB INTEGER PRIMARY KEY AUTOINCREMENT,
    id INTEGER,
    categoria VARCHAR(255),
    subcategoria VARCHAR(255),
    nome VARCHAR(255),
    preco REAL,
    preco_pix REAL,
    descricao VARCHAR(255),
    openBox INTEGER,
    imagem VARCHAR(255),
    site VARCHAR(255),
    url VARCHAR(255),
    data DATETIME
)
""")

df_kabum.to_sql('produtosKabum', con=conn, if_exists='append', index=False) 

conn.close()

''' SQl to df
sql = pd.read_sql_query("SELECT * FROM produtosKabum", conn)
df = pd.DataFrame(sql)
'''



' SQl to df\nsql = pd.read_sql_query("SELECT * FROM produtosKabum", conn)\ndf = pd.DataFrame(sql)\n'