# Sallve

In [56]:
import sys
import os
import re
import json
import time
import unicodedata
from urllib.parse import urljoin, urlparse, parse_qs, urlencode
from pathlib import Path
from typing import List, Dict, Optional

import requests
import pandas as pd
from bs4 import BeautifulSoup

sys.path.append(os.path.abspath("/home/usuario/√Årea de trabalho/Dados/models"))

from skin import (
    SKIN_TYPE_CANONICAL_ORDER,
    SKIN_TYPE_SYNONYMS_PT,
)

from exclude import (
    EXCLUDE_KEYWORDS,
)

from ingredient import (
    INGREDIENTES_VALIDOS,
)

from benefits import (
    BENEFIT_SYNONYMS_PT,
    BENEFIT_CANONICAL_ORDER,
)

from category import (
    CATEGORY_CANONICAL_ORDER,
    CATEGORY_HINTS,
)

## Informa√ß√µes Iniciais



In [57]:
BASE_URL = "https://www.sallve.com.br"
COLLECTION_URL = "https://www.sallve.com.br/collections/loja"


SKIN_FILTER_URLS = {
    "seca":      "https://www.sallve.com.br/collections/super-ativos?filter.p.m.filter.skin_type=pele+seca",
    "mista":     "https://www.sallve.com.br/collections/super-ativos?filter.p.m.filter.skin_type=pele+mista",
    "sens√≠vel":  "https://www.sallve.com.br/collections/super-ativos?filter.p.m.filter.skin_type=pele+sens%C3%ADvel",
    "normal":    "https://www.sallve.com.br/collections/super-ativos?filter.p.m.filter.skin_type=pele+normal",
    "oleosa":    "https://www.sallve.com.br/collections/super-ativos?filter.p.m.filter.skin_type=pele+oleosa",
    "todos os tipos": "https://www.sallve.com.br/collections/super-ativos?filter.p.m.filter.skin_type=todos+os+tipos+de+pele",
}

HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    )
}

IMAGES_DIR = Path("images")
IMAGES_DIR.mkdir(parents=True, exist_ok=True)

## Utilit√°rios

### Fun√ß√µes auxiliares para normaliza√ß√£o de texto, remo√ß√£o de acentos, tokeniza√ß√£o de ingredientes, padroniza√ß√£o de dados, sanitiza√ß√£o de nomes de arquivos e formata√ß√£o de pre√ßos.

In [None]:
def _strip_accents_lower(s: str) -> str:
    if not s:
        return ""
    s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
    s = s.lower()
    s = s.replace("-", " ")
    s = re.sub(r"[^\w\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def _safe_join_url(url: str) -> str:
    return "https:" + url if url.startswith("//") else url

def _tokenize_ingredientes(texto: str):
    if not texto:
        return []
    texto = texto.replace("\n", " ").replace("\r", " ")
    partes = re.split(r"[;,¬∑‚Ä¢|\u2022]|(?:\s{2,})|,|\.", texto)
    return [p.strip() for p in partes if p and p.strip()]

def _padroniza_por_lista(tokens, lista_validos):
    valid_norm_map = {_strip_accents_lower(v): v for v in lista_validos}
    padronizados, vistos = [], set()

    for tok in tokens:
        n = _strip_accents_lower(tok)
        if "hialuronato" in n and "sodio" in n and "acido hialuronico" in valid_norm_map:
            key = "acido hialuronico"
        elif "matrixyl" in n:
            key = _strip_accents_lower("pept√≠deos matrixyl")
        else:
            key = None
            for kn in valid_norm_map.keys():
                if n == kn or n.startswith(kn) or kn in n:
                    key = kn
                    break
        if key and key not in vistos:
            padronizados.append(valid_norm_map[key])
            vistos.add(key)
    return padronizados

def _add_or_replace_page_param(url: str, page: int) -> str:

    parsed = urlparse(url)
    q = parse_qs(parsed.query, keep_blank_values=True)
    q["page"] = [str(page)]
    new_query = urlencode({k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in q.items()}, doseq=True)
    return parsed._replace(query=new_query).geturl()

def _sanitize_filename(name: str) -> str:

    base = _strip_accents_lower(name)
    base = re.sub(r"[^a-z0-9]+", "-", base).strip("-")
    base = re.sub(r"-{2,}", "-", base)
    return base or "produto"

def _normalize_price_text(txt: str) -> str | None:
    if not txt:
        return None
    
    t = re.sub(r"\s+", " ", txt).strip().replace("\xa0", " ")
    
    m = re.search(r"(?:R\$)?\s*(\d{1,3}(?:\.\d{3})*,\d{2})", t)
    if m:
        num = m.group(1).replace(".", "").replace(",", ".")
        try:
            return f"{float(num):.2f}"
        except ValueError:
            return None
    
    return None

def should_exclude_product(product_name: str) -> bool:

    if not product_name:
        return True
    
    # Normaliza o nome do produto para compara√ß√£o (remove acentos e converte para min√∫sculas)
    def strip_accents(s: str) -> str:
        if not isinstance(s, str):
            s = str(s)
        return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')
    
    product_norm = strip_accents(product_name.lower())
    
    # Tamb√©m verifica as palavras do EXCLUDE_KEYWORDS importado (caso esteja atualizado)
    for keyword in EXCLUDE_KEYWORDS:
        if keyword and strip_accents(keyword.lower()) in product_norm:
            return True
    
    return False

##  Coleta de Links de Produtos Dispon√≠veis

In [59]:
def get_product_links_from_collection(max_pages: int = 8):
    links, vistos = [], set()
    for page in range(1, max_pages + 1):
        url = f"{COLLECTION_URL}?page={page}"
        try:
            r = requests.get(url, headers=HEADERS, timeout=30)
            if r.status_code != 200:
                break
            soup = BeautifulSoup(r.content, "html.parser")
            for a in soup.find_all("a", href=True):
                href = a["href"]
                if "/products/" in href:
                    full = urljoin(BASE_URL, href)
                    if full not in vistos:
                        vistos.add(full)
                        links.append(full)
            time.sleep(0.7)
        except Exception as e:
            print(f"[get_product_links_from_collection] Erro na p√°gina {page}: {e}")
            break
    return links

## Ingredientes

In [60]:
def extract_ingredients(soup: BeautifulSoup) -> str:

    ingredientes_brutos = []
    area_ing = soup.find("div", class_=re.compile(r"\btabcontent\b.*\bingredients\b", re.I))

    if area_ing:
        for h2 in area_ing.find_all("h2"):
            txt = h2.get_text(" ", strip=True)
            if txt:
                ingredientes_brutos.append(txt)
        resume = area_ing.find("div", class_=re.compile(r"\bingredients_resume\b", re.I))
        if resume:
            raw = resume.get_text("\n", strip=True)
            raw_norm = _strip_accents_lower(raw)
            i1 = raw_norm.find("ingredientes:")
            if i1 != -1:
                bloco_raw = raw[i1 + len("ingredientes:") :]
                for stopper in ["ingredientes em portugues:", "ingredientes em portugu√™s:"]:
                    cut_idx = _strip_accents_lower(bloco_raw).find(stopper)
                    if cut_idx != -1:
                        bloco_raw = bloco_raw[:cut_idx]
                        break
                ingredientes_brutos.extend(_tokenize_ingredientes(bloco_raw))

            for key in ["ingredientes em portugues:", "ingredientes em portugu√™s:"]:
                j = raw_norm.find(key)
                if j != -1:
                    bloco_pt = raw[j + len(key) :]
                    ingredientes_brutos.extend(_tokenize_ingredientes(bloco_pt))
                    break

    tokens = []
    
    for item in ingredientes_brutos:
        if not item:
            continue
        t = item.strip().strip(":").strip()
        if not t:
            continue
        if len(t.split()) > 8 and "," not in t and ";" not in t:
            continue
        tokens.append(t)

    padronizados = _padroniza_por_lista(tokens, INGREDIENTES_VALIDOS)
    return "; ".join(padronizados)

## Benef√≠cios

In [61]:
def padroniza_beneficios(textos_beneficios: List[str]) -> List[str]:
    if not textos_beneficios:
        return []
    encontrados = set()
    norm_syn = {
        canonico: [_strip_accents_lower(s) for s in patt_list if s]
        for canonico, patt_list in BENEFIT_SYNONYMS_PT.items()
    }

    for txt in textos_beneficios:
        n = _strip_accents_lower(txt)
        for canonico, padds in norm_syn.items():
            if any(patt in n for patt in padds):
                encontrados.add(canonico)
    if BENEFIT_CANONICAL_ORDER:
        order_map = {name: i for i, name in enumerate(BENEFIT_CANONICAL_ORDER)}
        return sorted(list(encontrados), key=lambda x: order_map.get(x, 999))
    return sorted(list(encontrados))

def extract_beneficios(soup: BeautifulSoup) -> str:
    candidatos = []

    for det in soup.find_all("details", class_=re.compile(r"\bDifferentials\b", re.I)):
        for li in det.find_all("li"):
            txt = li.get_text(" ", strip=True)
            if not txt:
                continue
            tnorm = _strip_accents_lower(txt)
            if re.fullmatch(r"\d+(?:[.,]\d+)?", tnorm):
                continue
            if any(w in tnorm for w in ["ponto", "pontos", "minha sallve"]):
                continue
            if len(txt) <= 200:
                candidatos.append(txt)

    if not candidatos:
        main = soup.find("article", class_=re.compile(r"\bRegularMain__content\b"))
        cont = main or soup
        for li in cont.find_all("li"):
            txt = li.get_text(" ", strip=True)
            if txt and len(txt) <= 160:
                candidatos.append(txt)

    cats = padroniza_beneficios(candidatos)
    return "; ".join(cats)

## Imagem

In [62]:
def _pick_best_from_srcset(srcset: str) -> str:
    best_url, best_w = None, -1
    for part in srcset.split(","):
        part = part.strip()
        if not part:
            continue
        m = re.match(r"(.+?)\s+(\d+)w", part)
        if m:
            url, w = m.group(1).strip(), int(m.group(2))
            if w > best_w:
                best_url, best_w = url, w
        else:
            if best_url is None:
                best_url = part
    return best_url

def extract_image_url(soup: BeautifulSoup) -> Optional[str]:
    img = soup.find("img", class_=re.compile(r"\bview__image\b", re.I))
    candidates = []
    if img:
        if img.get("srcset"):
            candidates.append(_pick_best_from_srcset(img["srcset"]))
        if img.get("src"):
            candidates.append(img["src"])
    if not candidates:
        for im in soup.find_all("img"):
            classes = " ".join(im.get("class", []))
            if re.search(r"(product|image|gallery|media|hero|view)", classes, re.I):
                if im.get("srcset"):
                    candidates.append(_pick_best_from_srcset(im["srcset"]))
                if im.get("src"):
                    candidates.append(im.get("src"))

    if not candidates:
        meta = soup.find("meta", property="og:image")
        if meta and meta.get("content"):
            candidates.append(meta["content"])
    for url in candidates:
        if not url:
            continue
        url = _safe_join_url(url.strip())
        return urljoin(BASE_URL, url)
    return None

def _infer_ext_from_url(url: str) -> str:
    path = urlparse(url).path
    ext = os.path.splitext(path)[1].lower()
    if ext in [".jpg", ".jpeg", ".png", ".webp", ".gif"]:
        return ext if ext != ".jpeg" else ".jpg"
    return ".jpg"

def download_image(image_url: str, product_name: str) -> Optional[str]:
    if not image_url:
        return None
    
    try:
        r = requests.get(image_url, headers=HEADERS, timeout=40)
        r.raise_for_status()
        ext = _infer_ext_from_url(image_url)
        base = _sanitize_filename(product_name)
        filename = f"{base}{ext}"
        dest = IMAGES_DIR / filename
        counter = 1
        while dest.exists():
            filename = f"{base}-{counter}{ext}"
            dest = IMAGES_DIR / filename
            counter += 1
        with open(dest, "wb") as f:
            f.write(r.content)
        return filename
    except Exception as e:
        print(f"[download_image] Falha ao baixar {image_url}: {e}")
        return None

# Pre√ßo e tamanho

In [63]:

def extract_price(soup: BeautifulSoup) -> Optional[str]:

    for cls_pat in [r"\bTotalPrice\b", r"\bTotalPrice__CTA\b"]:
        el = soup.find(["strong", "span"], class_=re.compile(cls_pat, re.I))
        if el and el.get_text(strip=True):
            p = _normalize_price_text(el.get_text(" ", strip=True))
            if p:
                return p
    box = soup.find(class_=re.compile(r"\bProductPrice\b", re.I))
    if box:
        p = _normalize_price_text(box.get_text(" ", strip=True))
        if p:
            return p
        strong = box.find("strong")
        if strong:
            p = _normalize_price_text(strong.get_text(" ", strip=True))
            if p:
                return p
    generic = soup.find(class_=re.compile(r"price", re.I))
    if generic:
        p = _normalize_price_text(generic.get_text(" ", strip=True))
        if p:
            return p
    p = _normalize_price_text(soup.get_text(" ", strip=True))
    return p

def extract_size(soup: BeautifulSoup) -> Optional[str]:
    size_element = soup.find("span", class_=re.compile(r"\bProductWeight\b", re.I))
    
    if size_element:
        return size_element.get_text(strip=True)
    for alt in [
        soup.find(class_=re.compile(r"weight", re.I)),
        soup.find(class_=re.compile(r"size", re.I)),
        soup.find(class_=re.compile(r"volume", re.I)),
        soup.find(class_=re.compile(r"quantity", re.I)),
    ]:
        if alt and alt.get_text(strip=True):
            text = alt.get_text(strip=True)
            m = re.search(r"\d+\,?\d*\s*(?:ml|g|mg|kg|l|oz)", text, re.I)
            if m:
                return m.group()
    return None

## Nome e subt√≠tulo

In [64]:

def extract_subtitle(soup: BeautifulSoup) -> Optional[str]:

    for sel in [
        "p.ProductSubtitle", "p.product-subtitle",
        ".product__subtitle", ".product__text", ".product__text.inline-richtext",
        ".ProductSummary", ".ProductDescription p",
    ]:
        node = soup.select_one(sel)
        if node and node.get_text(strip=True):
            st = node.get_text(" ", strip=True)
            return st if len(st) <= 220 else st[:220]
    return None

def extract_name(soup: BeautifulSoup) -> Optional[str]:
    name_element = soup.find("span", id="ProductNameTitle")
    if name_element and name_element.get_text(strip=True):
        return name_element.get_text(strip=True)
    for alt in [
        soup.find("h1"),
        soup.find("h2"),
        soup.find("title"),
        soup.find(class_=re.compile(r"product.*title", re.I)),
        soup.find(class_=re.compile(r"\bname\b", re.I)),
    ]:
        if alt and alt.get_text(strip=True):
            return alt.get_text(strip=True)
    return None

def fetch_soup(url: str) -> Optional[BeautifulSoup]:
    try:
        r = requests.get(url, headers=HEADERS, timeout=30)
        if r.status_code != 200:
            return None
        return BeautifulSoup(r.content, "html.parser")
    except Exception as e:
        print(f"[fetch_soup] Erro em {url}: {e}")
        return None

## Categoria  e Tipos de Pele

In [65]:
_CAT_ORDER_MAP = {c: i for i, c in enumerate(CATEGORY_CANONICAL_ORDER)}

def _norm_plain(s: str) -> str:
    return _strip_accents_lower(s or "")

def classify_category(name: str, description: str | None = None) -> Optional[str]:
    txt = _norm_plain(f"{name or ''} {description or ''}")
    hits = []
    for cat, needles in CATEGORY_HINTS.items():
        for needle in needles:
            if _norm_plain(needle) and _norm_plain(needle) in txt:
                hits.append(cat)
                break
    if not hits:
        return None
    hits.sort(key=lambda c: _CAT_ORDER_MAP.get(c, 10_000))
    return hits[0]

_SKIN_SYNONYMS_NORM = {
    canonico: [_strip_accents_lower(x) for x in lst if x]
    for canonico, lst in SKIN_TYPE_SYNONYMS_PT.items()
}
_SKIN_ORDER_MAP = {name: i for i, name in enumerate(SKIN_TYPE_CANONICAL_ORDER or [])}

def _classify_skin_types_from_strings(*strings: str) -> List[str]:
    big = _strip_accents_lower(" ".join(s for s in strings if s))
    found = set()
    for canonico, pats in _SKIN_SYNONYMS_NORM.items():
        if any(p and p in big for p in pats):
            found.add(canonico)
    if not found:
        return []
    return sorted(found, key=lambda x: _SKIN_ORDER_MAP.get(x, 10_000))


## Extra√ß√£o

In [1]:
def extract_product_data_from_soup(soup: BeautifulSoup, product_url: str, nome: str) -> Dict:
    subtitulo_txt = extract_subtitle(soup)
    beneficios_txt = extract_beneficios(soup) or ""
    ingredientes_txt = extract_ingredients(soup) or ""
    preco_txt = extract_price(soup) or ""
    tamanho_txt = extract_size(soup) or ""

    categoria = classify_category(nome, subtitulo_txt) or ""

    tipos_detectados = _classify_skin_types_from_strings(nome, subtitulo_txt or "", beneficios_txt)

    data = {
        "marca": "sallve",
        "nome": nome,
        "subtitulo": subtitulo_txt,
        "categoria": categoria,
        "quantidade": tamanho_txt,
        "preco": preco_txt,
        "ingredientes": ingredientes_txt,
        "beneficios": beneficios_txt,
        "tipo_pele": "; ".join(tipos_detectados) if tipos_detectados else "",
        "imagem": "",
    }

    img_url = extract_image_url(soup)
    if img_url:
        img_filename = download_image(img_url, nome)
        if img_filename:
            data["imagem"] = img_filename

    return data

def scrape_sallve_products() -> List[Dict]:
    print("Iniciando webscraping da Sallve...")
    print("Coletando produtos em /collections/loja com pagina√ß√£o...")
    product_links = get_product_links_from_collection(max_pages=8)
    if not product_links:
        print("Nenhum produto encontrado na cole√ß√£o. (Checar HTML/seletores)")
        return []

    print(f"Encontrados {len(product_links)} links de produto (antes de filtro).")
    products = []

    for i, url in enumerate(product_links, 1):
        print(f" [{i}/{len(product_links)}] {url}")
        soup = fetch_soup(url)
        if not soup:
            print("\n Falha ao abrir p√°gina.")
            continue

        nome = extract_name(soup)
        if not nome:
            print("\n Nome n√£o encontrado.")
            continue

        if should_exclude_product(nome):
            print(f"\nExclu√≠do por keyword (models): {nome}")
            continue

        prod = extract_product_data_from_soup(soup, url, nome)
        products.append(prod)
        print(f" \nOK: {nome}")
        time.sleep(0.7)

    print(f"Total p√≥s-filtro: {len(products)}")
    return products

def _collect_collection_products_with_pagination(collection_url: str, max_pages: int = 20):
    encontrados = set()
    for page in range(1, max_pages + 1):
        page_url = _add_or_replace_page_param(collection_url, page)
        try:
            r = requests.get(page_url, headers=HEADERS, timeout=30)
            if r.status_code != 200:
                break
            soup = BeautifulSoup(r.content, "html.parser")
            anchors = soup.find_all("a", href=True)
            page_found = 0
            for a in anchors:
                href = a["href"]
                if "/products/" in href:
                    text = a.get_text(" ", strip=True) or ""
                    if not text:
                        img = a.find("img", alt=True)
                        if img and img.get("alt"):
                            text = img["alt"]
                    if text:
                        page_found += 1
                        encontrados.add(_strip_accents_lower(text))
            if page_found == 0:
                break
            time.sleep(0.5)
        except Exception as e:
            print(f"[skin-type-collect] Erro na p√°gina: {page_url} -> {e}")
            break
    return encontrados

def enrich_with_skin_types(products: List[Dict]) -> List[Dict]:
    if not products:
        return products

    print("\nColetando tipos de pele via filtros da Sallve...")
    name_to_idx = {}
    for idx, p in enumerate(products):
        n = _strip_accents_lower(p.get("nome", ""))
        if n:
            name_to_idx.setdefault(n, set()).add(idx)

    # Conjunto para rastrear produtos que s√£o para "todos os tipos"
    todos_os_tipos_produtos = set()
    
    # PRIMEIRO: Processar "todos os tipos" para identificar produtos universais
    if "todos os tipos" in SKIN_FILTER_URLS:
        canonical = "todos os tipos"
        url = SKIN_FILTER_URLS[canonical]
        print(f" - Filtro '{canonical}': {url}")
        norm_names = _collect_collection_products_with_pagination(url, max_pages=20)
        print(f"   ¬∑ {len(norm_names)} nome(s) coletado(s)")
        for nn in norm_names:
            if nn in name_to_idx:
                for idx in name_to_idx[nn]:
                    # Marcar como produto para todos os tipos
                    todos_os_tipos_produtos.add(idx)
                    # Definir apenas "todos os tipos"
                    products[idx]["tipo_pele"] = "todos os tipos"
        time.sleep(0.6)

    # SEGUNDO: Processar outros tipos espec√≠ficos (exceto "todos os tipos")
    for canonical, url in SKIN_FILTER_URLS.items():
        if canonical == "todos os tipos":
            continue  # J√° processado acima
            
        print(f" - Filtro '{canonical}': {url}")
        norm_names = _collect_collection_products_with_pagination(url, max_pages=20)
        print(f"   ¬∑ {len(norm_names)} nome(s) coletado(s)")
        for nn in norm_names:
            if nn in name_to_idx:
                for idx in name_to_idx[nn]:
                    # Se o produto j√° √© para "todos os tipos", pular
                    if idx in todos_os_tipos_produtos:
                        continue
                        
                    current = products[idx].get("tipo_pele", "") or ""
                    tipos = [t.strip() for t in current.split(";") if t.strip()]
                    
                    if canonical not in tipos:
                        tipos.append(canonical)
                    if SKIN_TYPE_CANONICAL_ORDER:
                        order = {v: i for i, v in enumerate(SKIN_TYPE_CANONICAL_ORDER)}
                        tipos = sorted(tipos, key=lambda x: order.get(x, 999))
                    products[idx]["tipo_pele"] = "; ".join(tipos)
        time.sleep(0.6)

    # Produtos sem tipo definido recebem "todos os tipos"
    for idx, p in enumerate(products):
        if not (p.get("tipo_pele") or "").strip():
            p["tipo_pele"] = "todos os tipos"

    return products

NameError: name 'BeautifulSoup' is not defined

## Arquivo e Main

In [67]:
def save_data(products_data: List[Dict]):
    if not products_data:
        print("Nenhum dado para salvar.")
        return

    clean = []
    for p in products_data:
        clean.append({
            "marca": p.get("marca"),
            "nome": p.get("nome"),
            "subtitulo": p.get("subtitulo"),
            "categoria": p.get("categoria"),
            "quantidade": p.get("quantidade"),
            "preco": p.get("preco"),
            "beneficios": p.get("beneficios"),
            "ingredientes": p.get("ingredientes"),
            "tipo_pele": p.get("tipo_pele"),
            "imagem": p.get("imagem"),
        })

    # Salva apenas JSON
    with open("sallve_products.json", "w", encoding="utf-8") as f:
        json.dump(clean, f, ensure_ascii=False, indent=2)
    
    print(f"‚úÖ JSON salvo: sallve_products.json ({len(clean)} produtos)")

if __name__ == "__main__":
    try:
        data = scrape_sallve_products()
        data = enrich_with_skin_types(data)
        save_data(data)
        print(f"\nConclu√≠do! Produtos extra√≠dos: {len(data)}")
        print(f"Imagens salvas em: {IMAGES_DIR.resolve()}")
    except Exception as e:
        print(f"\nERRO: {e}")

Iniciando webscraping da Sallve...
Coletando produtos em /collections/loja com pagina√ß√£o...
Encontrados 138 links de produto (antes de filtro).
 [1/138] https://www.sallve.com.br/products/bastao-antioxidante-para-olhos
Encontrados 138 links de produto (antes de filtro).
 [1/138] https://www.sallve.com.br/products/bastao-antioxidante-para-olhos
 
OK: Bast√£o Antiolheiras Antioxidante
 
OK: Bast√£o Antiolheiras Antioxidante
 [2/138] https://www.sallve.com.br/products/kit-tudinho
 [2/138] https://www.sallve.com.br/products/kit-tudinho

Exclu√≠do por keyword (models): Kit Tudinho
 [3/138] https://www.sallve.com.br/collections/kits/products/kit-basicao

Exclu√≠do por keyword (models): Kit Tudinho
 [3/138] https://www.sallve.com.br/collections/kits/products/kit-basicao

Exclu√≠do por keyword (models): Kit Basic√£o
 [4/138] https://www.sallve.com.br/products/protetor-solar-bastao-com-cor-antimanchas-fps-90

Exclu√≠do por keyword (models): Kit Basic√£o
 [4/138] https://www.sallve.com.br/prod

## Gera√ß√£o de CSV (Opcional)

Execute a c√©lula abaixo apenas se precisar gerar o arquivo CSV a partir do JSON.

In [2]:
import csv
import json

def json_to_csv(json_file="alterado.json", csv_file="sallve_products.csv"):
    """
    Converte o arquivo JSON salvo para CSV.
    Uso: 
    - json_to_csv()  # usa o arquivo padr√£o
    - json_to_csv("meu_arquivo.json", "meu_arquivo.csv")  # usa arquivo personalizado
    """
    try:
        with open(json_file, "r", encoding="utf-8") as f:
            data = json.load(f)
        
        if not data:
            print(f"Nenhum dado encontrado no arquivo {json_file}")
            return
        
        cols = ["marca", "nome", "subtitulo", "categoria", "quantidade", "preco", "beneficios", "ingredientes", "tipo_pele", "imagem"]
        
        with open(csv_file, "w", encoding="utf-8", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=cols)
            writer.writeheader()
            for row in data:
                # Converte None para string vazia no CSV
                csv_row = {k: (row.get(k) or "") for k in cols}
                writer.writerow(csv_row)
        
        print(f"‚úÖ CSV gerado: {csv_file} ({len(data)} linhas)")
        print(f"üìÅ A partir do JSON: {json_file}")
        
    except FileNotFoundError:
        print(f"‚ùå Arquivo {json_file} n√£o encontrado!")
        print("üìÇ Arquivos JSON dispon√≠veis na pasta:")
        import glob
        json_files = glob.glob("*.json")
        if json_files:
            for f in json_files:
                print(f"   - {f}")
        else:
            print("   Nenhum arquivo .json encontrado")
    except Exception as e:
        print(f"‚ùå Erro ao converter JSON para CSV: {e}")

# Exemplo de uso:
print("Como usar:")
print("json_to_csv()  # gera CSV do arquivo padr√£o")
print("json_to_csv('alterado.json')  # gera CSV do arquivo alterado.json")
print("json_to_csv('alterado.json', 'meu_arquivo.csv')  # personaliza ambos os nomes")

# Execute uma das linhas abaixo (remova o #):
# json_to_csv()  # arquivo padr√£o
json_to_csv("alterado.json")  # seu arquivo personalizado

Como usar:
json_to_csv()  # gera CSV do arquivo padr√£o
json_to_csv('alterado.json')  # gera CSV do arquivo alterado.json
json_to_csv('alterado.json', 'meu_arquivo.csv')  # personaliza ambos os nomes
‚úÖ CSV gerado: sallve_products.csv (39 linhas)
üìÅ A partir do JSON: alterado.json
