# FEEL22.COM product scraper

In [None]:
import json

file_path = '/content/drive/MyDrive/Upwork/mahmoud/fell22_products.json'

# Read the JSON file
with open(file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Display the first product
if data and isinstance(data, list):
    print("First product from the JSON file:")
    print(json.dumps(data[0], indent=2))
elif data and isinstance(data, dict):
    print("JSON file contains a dictionary. Displaying it:")
    print(json.dumps(data, indent=2))
else:
    print("JSON file is empty or in an unexpected format.")

First product from the JSON file:
{
  "id": 8797514039434,
  "title": "Revuele Perfect Hair Reboot Gift Set",
  "handle": "revuele-perfect-hair-reboot-gift-set",
  "body_html": "<p><strong>This Set Contains:</strong></p>\n<ul>\n<li>Revuele Perfect Hair Repair Shampoo</li>\n<li>Revuele Perfect Hair Repair Conditionar</li>\n<li>Revuele Argan Oil</li>\n<li>\n<strong>Free</strong> Revuele Ampoules</li>\n</ul>",
  "published_at": "2025-12-12T15:35:00+02:00",
  "created_at": "2025-12-09T20:40:29+02:00",
  "updated_at": "2025-12-14T13:18:52+02:00",
  "vendor": "Revuele",
  "product_type": "Hair Care Gifts and Sets",
  "tags": [
    "XMASGIFTS25"
  ],
  "variants": [
    {
      "id": 47436758515850,
      "title": "Default Title",
      "option1": "Default Title",
      "option2": null,
      "option3": null,
      "sku": "REVS18",
      "requires_shipping": true,
      "taxable": true,
      "featured_image": null,
      "available": true,
      "price": "28.00",
      "grams": 1,
      "com

In [None]:
BASE_URL = "https://feel22.com/products/"

# Ensure 'data' from daouk_products.json is loaded and is a list
if 'data' in globals() and isinstance(data, list):
    daouk_handles = [item['handle'] for item in data if 'handle' in item]
    FEEL22_URLS = [f"{BASE_URL}{handle}" for handle in daouk_handles]
    print(f"Generated {len(FEEL22_URLS)} URLs for Feel22.com.")
    # Display the first few generated URLs as a sanity check
    print("First 5 Feel22 URLs:")
    for i in range(min(5, len(FEEL22_URLS))):
        print(FEEL22_URLS[i])
else:
    print("Error: 'data' variable not found or not in expected format. Please ensure daouk_products.json was loaded correctly.")
    FEEL22_URLS = []


Generated 7930 URLs for Feel22.com.
First 5 Feel22 URLs:
https://feel22.com/products/revuele-perfect-hair-reboot-gift-set
https://feel22.com/products/revuele-purifying-clear-skin-gift-set
https://feel22.com/products/revuele-retinol-moonlift-gift-set
https://feel22.com/products/babyliss-hair-straightner-st089e
https://feel22.com/products/isdin-retinal-intense-holiday-gift-set


In [None]:
import requests
import re
import json
import html
import os
import time
from google.colab import files

# -----------------------------
# CONFIG
# -----------------------------
BACKUP_EVERY = 500
PROGRESS_FILE = "progress.txt"
BACKUP_PREFIX = "backup_"
FINAL_FILE = "final_results.json"
TIMEOUT = 15
DELAY = 0.25  # be polite

# -----------------------------
# FETCH FUNCTION
# -----------------------------
def fetch_product_data(url: str) -> dict:
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Accept": "text/html",
    }

    res = requests.get(url, headers=headers, timeout=20)
    res.raise_for_status()
    html_text = res.text

    # Find ALL assignments
    pattern = re.compile(
        r'Shopify\.current_product\s*=\s*(\{[\s\S]*?\})\s*;',
        re.MULTILINE
    )

    matches = pattern.findall(html_text)
    if not matches:
        raise ValueError("Shopify.current_product not found")

    # Take the LAST meaningful assignment
    raw_obj = matches[-1]

    # Remove JS spread syntax {...{ JSON }}
    raw_obj = re.sub(r'^\{\s*\.\.\.\s*\{', '{', raw_obj)
    raw_obj = re.sub(r'\}\s*\}$', '}', raw_obj)

    # Decode HTML entities
    raw_obj = html.unescape(raw_obj)

    # Convert to Python dict
    product = json.loads(raw_obj)

    # Sanity check
    if not product.get("id"):
        raise ValueError("Parsed product is empty or invalid")

    return product

# -----------------------------
# LOAD PROGRESS
# -----------------------------
start_index = 0
if os.path.exists(PROGRESS_FILE):
    with open(PROGRESS_FILE, "r") as f:
        start_index = int(f.read().strip())

print(f"Starting from index: {start_index}")

# -----------------------------
# LOAD URL LIST
# -----------------------------
# urls = [...]  # YOUR 4100 URLs HERE
urls = FEEL22_URLS  # assume already defined

total = len(urls)
batch_data = []
all_files = []

# -----------------------------
# MAIN LOOP
# -----------------------------
for i in range(start_index, total):
    url = urls[i]
    print(f"Processing .... {url}")

    try:
        product = fetch_product_data(url)
        batch_data.append(product)
    except Exception:
        pass  # silent fail, resume-safe

    # Save progress
    with open(PROGRESS_FILE, "w") as f:
        f.write(str(i + 1))

    # Backup every N items
    if (i + 1) % BACKUP_EVERY == 0 or i == total - 1:
        batch_num = (i + 1) // BACKUP_EVERY + (1 if (i + 1) % BACKUP_EVERY else 0)
        backup_name = f"{BACKUP_PREFIX}{batch_num:03d}.json"

        with open(backup_name, "w", encoding="utf-8") as f:
            json.dump(batch_data, f, indent=2, ensure_ascii=False)

        files.download(backup_name)
        all_files.append(backup_name)
        batch_data = []

        print(f"Processed {i + 1}/{total}")

    time.sleep(DELAY)

# -----------------------------
# FINAL MERGE
# -----------------------------
final_results = []

for file in sorted(f for f in os.listdir() if f.startswith(BACKUP_PREFIX)):
    with open(file, "r", encoding="utf-8") as f:
        final_results.extend(json.load(f))

with open(FINAL_FILE, "w", encoding="utf-8") as f:
    json.dump(final_results, f, indent=2, ensure_ascii=False)

files.download(FINAL_FILE)

print(f"\n✅ DONE — {len(final_results)} products saved to {FINAL_FILE}")

Starting from index: 0
Processing .... https://feel22.com/products/revuele-perfect-hair-reboot-gift-set
Processing .... https://feel22.com/products/revuele-purifying-clear-skin-gift-set
Processing .... https://feel22.com/products/revuele-retinol-moonlift-gift-set
Processing .... https://feel22.com/products/babyliss-hair-straightner-st089e
Processing .... https://feel22.com/products/isdin-retinal-intense-holiday-gift-set
Processing .... https://feel22.com/products/isdin-salicylic-renewal-serum-holiday-gift-set
Processing .... https://feel22.com/products/isdin-hyaluronic-concentrate-serum-holiday-gift-set
Processing .... https://feel22.com/products/uriage-star-hydration-eau-thermal-serum-gift-set
Processing .... https://feel22.com/products/uriage-star-hydration-eau-thermal-gift-set
Processing .... https://feel22.com/products/uriage-christmas-age-absolu-gift-set
Processing .... https://feel22.com/products/uriage-magic-of-youth-day-night-cream-gift-set
Processing .... https://feel22.com/pr

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 500/7930
Processing .... https://feel22.com/products/acm-az-ane-cream-15-azelaic-acid-30ml
Processing .... https://feel22.com/products/vichy-48-hours-mineral-deodorant-50ml
Processing .... https://feel22.com/products/ducray-diaseptyl-0-2-skin-purifying-spray-125ml
Processing .... https://feel22.com/products/ducray-ictyane-face-body-cleansing-shower-cream-200ml
Processing .... https://feel22.com/products/ducray-elution-rebalancing-shampoo-200ml
Processing .... https://feel22.com/products/ducray-dexyane-med-soothing-repair-cream-for-eczema-and-atopic-dermatitis-100ml
Processing .... https://feel22.com/products/avene-cleanance-detox-mask
Processing .... https://feel22.com/products/mustela-certified-organic-cleansing-gel-body-hair-400ml
Processing .... https://feel22.com/products/loreal-paris-sunrise-in-paradise-la-terra
Processing .... https://feel22.com/products/revuele-bioactive-collagen-elastin-line-filler-day-cream-50ml
Processing .... https://feel22.com/products/mustela-foa

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 1000/7930
Processing .... https://feel22.com/products/round-lab-mugwort-calming-cleanser-25ml
Processing .... https://feel22.com/products/round-lab-soybean-nourishing-sheet-mask
Processing .... https://feel22.com/products/round-lab-soybean-nourishing-cream-50ml
Processing .... https://feel22.com/products/round-lab-soybean-nourishing-cleanser-150ml
Processing .... https://feel22.com/products/round-lab-soybean-panthenol-cleanser-150ml
Processing .... https://feel22.com/products/round-lab-1025-dokdo-eye-cream-30ml
Processing .... https://feel22.com/products/round-lab-1025-dokdo-cleansing-milk-200ml
Processing .... https://feel22.com/products/round-lab-baby-mild-sunscreen-60ml
Processing .... https://feel22.com/products/round-lab-camellia-deep-collagen-firming-sun-50ml
Processing .... https://feel22.com/products/round-lab-birch-juice-moisturizing-sun-stick-spf-50
Processing .... https://feel22.com/products/nacific-uyu-cream-toner-150ml
Processing .... https://feel22.com/products/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 1500/7930
Processing .... https://feel22.com/products/yves-saint-laurent-lines-liberated-waterproof-eyeliner-pencil
Processing .... https://feel22.com/products/yves-saint-laurent-lash-clash-mascara-brown
Processing .... https://feel22.com/products/loreal-professionnel-deep-molecular-repairing-leave-in-mask-for-damaged-hair
Processing .... https://feel22.com/products/loreal-professionnel-sulfate-free-molecular-repairing-shampoo-for-damaged-hair
Processing .... https://feel22.com/products/colgate-total-active-protection-clean-mint-toothpaste-125ml
Processing .... https://feel22.com/products/bioderma-sensibio-gel-moussant-cleansing-gel-500ml
Processing .... https://feel22.com/products/valentino-uomo-eau-de-toilette-for-men
Processing .... https://feel22.com/products/valentino-born-in-roma-uomo-coral-fantasy-eau-de-toilette-for-men
Processing .... https://feel22.com/products/lancome-idole-now-eau-de-parfum-for-women
Processing .... https://feel22.com/products/giorgio-armani-my-wa

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 2000/7930
Processing .... https://feel22.com/products/burberry-her-elixir-for-women-eau-de-parfum
Processing .... https://feel22.com/products/burberry-brit-for-men-eau-de-toilette-100ml
Processing .... https://feel22.com/products/vichy-liftactiv-retinol-specialist-deep-wrinkle-and-anti-aging-serum-30ml-2
Processing .... https://feel22.com/products/viktor-and-rolf-spicebomb-metallic-musk-eau-de-parfum
Processing .... https://feel22.com/products/viktor-and-rolf-flowerbomb-extreme-eau-de-parfum-intense
Processing .... https://feel22.com/products/ralph-lauren-romance-eau-de-parfum-intense
Processing .... https://feel22.com/products/ralph-lauren-romance-elixir-eau-de-parfum-spray
Processing .... https://feel22.com/products/ralph-lauren-polo-67-eau-de-parfum
Processing .... https://feel22.com/products/ralph-lauren-polo-67-eau-de-toilette
Processing .... https://feel22.com/products/ralph-lauren-polo-eau-de-toilette
Processing .... https://feel22.com/products/mugler-angel-stellar-eau

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 2500/7930
Processing .... https://feel22.com/products/lierac-hydragenist-rehydrating-gel-cream
Processing .... https://feel22.com/products/madica-swiss-niacinamide-face-serum-15ml
Processing .... https://feel22.com/products/maybelline-sunkisser-matte-liquid-blush-trio-bundle-free-pouch
Processing .... https://feel22.com/products/maybelline-sunkisser-matte-liquid-blush-duo-free-glasses-bundle
Processing .... https://feel22.com/products/shiseido-zen-eau-de-parfum
Processing .... https://feel22.com/products/collistar-precious-body-oil-firms-nourishes-150ml
Processing .... https://feel22.com/products/collistar-acqua-vetiver-eau-de-toilette-for-men-100ml
Processing .... https://feel22.com/products/collistar-acqua-attiva-eau-de-toilette-for-men-100ml
Processing .... https://feel22.com/products/collistar-purifying-cleansing-gel-200ml
Processing .... https://feel22.com/products/unleashia-sunset-dazzle-gloss-balm
Processing .... https://feel22.com/products/pupa-milano-fruit-lovers-sce

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 3000/7930
Processing .... https://feel22.com/products/bassam-fattouh-the-blur-kit-contouring-and-highlighting-free-brush-blender
Processing .... https://feel22.com/products/bassam-fattouh-the-blur-kit-contouring-and-highlighting
Processing .... https://feel22.com/products/benefit-precisely-my-brow-pencil
Processing .... https://feel22.com/products/benefit-ka-brow-eyebrow-cream-gel-color
Processing .... https://feel22.com/products/anastasia-beverly-hills-lash-sculpt-lengthening-volumizing-mascara-deluxe-size
Processing .... https://feel22.com/products/anastasia-beverly-hills-lash-sculpt-lengthening-volumizing-mascara
Processing .... https://feel22.com/products/anastasia-beverly-hills-mini-loose-setting-powder-translucent
Processing .... https://feel22.com/products/anastasia-beverly-hills-lash-brag-mini-mascara
Processing .... https://feel22.com/products/anastasia-beverly-hills-liquid-liner
Processing .... https://feel22.com/products/frezyderm-face-serum-retinol
Processing ....

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 3500/7930
Processing .... https://feel22.com/products/bourjois-nail-polish-fabuleux-gel
Processing .... https://feel22.com/products/bourjois-nail-polish-fabuleux-gel-10-top-coat
Processing .... https://feel22.com/products/bourjois-healthy-mix-clean-primer-new-packaging
Processing .... https://feel22.com/products/max-factor-eyefinity-24h-liquid-shadow
Processing .... https://feel22.com/products/bourjois-healthy-mix-sos-lip-oil
Processing .... https://feel22.com/products/svr-ampoule-refresh-eye-concentrate-15ml
Processing .... https://feel22.com/products/face-facts-pink-himalayan-salt-body-scrub-400g
Processing .... https://feel22.com/products/face-facts-brown-sugar-body-scrub-400g
Processing .... https://feel22.com/products/face-facts-coconut-facial-scrub-60ml
Processing .... https://feel22.com/products/face-facts-strawberry-facial-scrub-60ml
Processing .... https://feel22.com/products/face-facts-peach-facial-scrub-60ml
Processing .... https://feel22.com/products/face-facts-ce

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 4000/7930
Processing .... https://feel22.com/products/atelier-beautanique-all-natural-shampoo-neroli
Processing .... https://feel22.com/products/atelier-beautanique-all-natural-shampoo-original
Processing .... https://feel22.com/products/lakme-duo-of-frizzcontrol-shampoo-cream-set
Processing .... https://feel22.com/products/lakme-duo-of-active-shampoo-fortifying-mask-set
Processing .... https://feel22.com/products/lakme-duo-k-bio-argan-oil-shampoo-set
Processing .... https://feel22.com/products/lakme-duo-bioargan-shampoo-set
Processing .... https://feel22.com/products/maybelline-lash-sensational-firework-washable-mascara
Processing .... https://feel22.com/products/lor-al-professionnel-serie-expert-metal-detox-duo-set
Processing .... https://feel22.com/products/lor-al-professionnel-serie-expert-scalp-advanced-duo-set
Processing .... https://feel22.com/products/loreal-professionnel-tecni-art-super-dust-volume-and-texture-powder-7g
Processing .... https://feel22.com/products/aza

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 4500/7930
Processing .... https://feel22.com/products/ifran-tanning-oil
Processing .... https://feel22.com/products/afnan-set-supremacy-not-only-intense
Processing .... https://feel22.com/products/afnan-set-supremacy-purple-allure
Processing .... https://feel22.com/products/afnan-set-9pm
Processing .... https://feel22.com/products/afnan-set-ornament-purple
Processing .... https://feel22.com/products/afnan-turathi-homme-blue-set
Processing .... https://feel22.com/products/afnan-supremacy-in-heaven-eau-de-parfum-for-men-100ml
Processing .... https://feel22.com/products/afnan-supremacy-pink-eau-de-parfum-for-women-100ml
Processing .... https://feel22.com/products/afnan-turathi-homme-blue-90ml
Processing .... https://feel22.com/products/afnan-tribute-blue-travel-pack-eau-de-parfum-for-men-90ml
Processing .... https://feel22.com/products/afnan-supremacy-not-only-intense-eau-de-parfum-for-men-100ml
Processing .... https://feel22.com/products/mustela-123-vitamin-barrier-cream-50ml
P

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 5000/7930
Processing .... https://feel22.com/products/yves-rocher-cherry-blooms-eau-de-toilette-100ml
Processing .... https://feel22.com/products/womensecret-intimate-daydream-gift-set-eau-de-parfum
Processing .... https://feel22.com/products/womensecret-body-mist-passionate-treasures-250ml
Processing .... https://feel22.com/products/womensecret-body-mist-pure-charm-250ml
Processing .... https://feel22.com/products/womensecret-body-mist-lady-tenderness-250ml
Processing .... https://feel22.com/products/womensecret-body-mist-forever-gold-250ml
Processing .... https://feel22.com/products/womensecret-body-mist-exotic-love-250ml
Processing .... https://feel22.com/products/womensecret-body-mist-daily-romance-250ml
Processing .... https://feel22.com/products/womensecret-body-mist-kiss-moments-250ml
Processing .... https://feel22.com/products/womensecret-intimate-daydream-eau-de-parfum-100ml
Processing .... https://feel22.com/products/womensecret-eau-its-fresh-eau-de-toilette-100ml
P

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 5500/7930
Processing .... https://feel22.com/products/talika-genius-light-therapy-mask
Processing .... https://feel22.com/products/talika-eyebrow-liposourcils-mascara-brown
Processing .... https://feel22.com/products/talika-eye-calm-roll-on
Processing .... https://feel22.com/products/johnsons-facial-wipes-micellar-moisturizing-dry-blue-25-pieces
Processing .... https://feel22.com/products/johnsons-vita-rich-oil-infusion-body-wash
Processing .... https://feel22.com/products/johnsons-vita-rich-indulging-b-lotion-peach-yogurt-250ml
Processing .... https://feel22.com/products/johnsons-vita-rich-bright-b-lotion-pomegranate
Processing .... https://feel22.com/products/johnsons-indulging-shower-gel-peach-yogurt-250ml
Processing .... https://feel22.com/products/johnsons-vita-rich-nourishing-body-wash-250ml-with-cocoa-butter
Processing .... https://feel22.com/products/lancaster-sun-care-beauty-tan-deepener
Processing .... https://feel22.com/products/biolane-2-in-1-toothpaste-50ml
Proce

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 6000/7930
Processing .... https://feel22.com/products/bioderma-photoderm-fluide-max-spf100-40ml
Processing .... https://feel22.com/products/axe-body-spray-gold-temptation-rock-150ml
Processing .... https://feel22.com/products/mustela-normal-skin-skin-freshener
Processing .... https://feel22.com/products/clarins-bright-plus-fresh-ampoule-vitamin-c-complex-8ml
Processing .... https://feel22.com/products/revuele-coconut-oil-hair-booster-30ml
Processing .... https://feel22.com/products/mancode-eau-de-toilette-for-men-ingenious-100ml
Processing .... https://feel22.com/products/mancode-eau-de-toilette-for-men-gallant-man100ml
Processing .... https://feel22.com/products/mancode-eau-de-toilette-for-men-blue-master-100ml
Processing .... https://feel22.com/products/bourjois-always-fabulous-concealer
Processing .... https://feel22.com/products/soskin-clarifying-day-fluid-spf-25
Processing .... https://feel22.com/products/nivea-pearl-beauty-deodorant-spray
Processing .... https://feel22.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 6500/7930
Processing .... https://feel22.com/products/magnoliabodysplashcaketopper250ml
Processing .... https://feel22.com/products/magnoliabodysplashorientalhaze250ml
Processing .... https://feel22.com/products/magnoliabodysplashcoastalsunsets250ml
Processing .... https://feel22.com/products/the-balm-tropics-trio-palette
Processing .... https://feel22.com/products/the-balm-to-the-rescue-moringa-tree-foaming-face
Processing .... https://feel22.com/products/the-balm-meet-matte-hughes-6-pieces-mini-kit-miami
Processing .... https://feel22.com/products/the-balm-furrowcious-brow-pencil
Processing .... https://feel22.com/products/the-balm-nuit-mascara
Processing .... https://feel22.com/products/the-balm-meet-matte-hughes-3-pieces-kit
Processing .... https://feel22.com/products/the-balm-meet-matte-hughes-6-pieces-mini-kit-15
Processing .... https://feel22.com/products/the-balm-meet-matte-hughes-6-pieces-mini-kit-13
Processing .... https://feel22.com/products/the-balm-meet-matte-hug

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 7000/7930
Processing .... https://feel22.com/products/the-aloeleb-aloe-ha-moisturizer-cream
Processing .... https://feel22.com/products/ruby-rose-eyeshadow-palette-by-melu
Processing .... https://feel22.com/products/oils-of-nature-green-tea-box
Processing .... https://feel22.com/products/oils-of-nature-glow-him-box
Processing .... https://feel22.com/products/o-b-procomfort-tampons-16s
Processing .... https://feel22.com/products/johnsons-baby-oil-aloe-vera
Processing .... https://feel22.com/products/rimmel-lasting-finish-foundation-spf-20-30ml
Processing .... https://feel22.com/products/essence-gel-nail-colour
Processing .... https://feel22.com/products/azalia-rosemary-essential-oil
Processing .... https://feel22.com/products/azalia-rosemary-oil-100ml
Processing .... https://feel22.com/products/soskin-sun-fluid-spf50
Processing .... https://feel22.com/products/l-oreal-paris-glycolic-bright-glowing-day-cream-spf17-50ml
Processing .... https://feel22.com/products/ruby-rose-feels

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 7500/7930
Processing .... https://feel22.com/products/azalia-eyebrows-lashes-balm
Processing .... https://feel22.com/products/azalia-castor-oil
Processing .... https://feel22.com/products/azalia-argan-oil
Processing .... https://feel22.com/products/azalia-avocado-oil
Processing .... https://feel22.com/products/azalia-hemp-seed-oil
Processing .... https://feel22.com/products/azalia-carrot-seed-oil
Processing .... https://feel22.com/products/azalia-rosehip-light-oil-hair-guru-120ml
Processing .... https://feel22.com/products/azalia-hair-loss-lemon-hair-guru-120ml
Processing .... https://feel22.com/products/the-organic-pharmacy-mother-bilberry-complex-cream-60g
Processing .... https://feel22.com/products/potion-kitchen-cocado-sun-tan-balm-100g
Processing .... https://feel22.com/products/potion-kitchen-lemongrass-essential-oil
Processing .... https://feel22.com/products/potion-kitchen-eucalyptus-essential-oil
Processing .... https://feel22.com/products/potion-kitchen-balance-hand

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 7930/7930


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ DONE — 7902 products saved to final_results.json


In [None]:
import shutil
import os

drive_path = "/content/drive/MyDrive/Upwork/mahmoud"

# Ensure the destination directory exists
os.makedirs(drive_path, exist_ok=True)

json_files_to_copy = [
    "/content/backup_001.json",
    "/content/backup_002.json",
    "/content/backup_003.json",
    "/content/backup_004.json",
    "/content/backup_005.json",
    "/content/backup_006.json",
    "/content/backup_007.json",
    "/content/backup_008.json",
    "/content/backup_009.json",
    "/content/backup_010.json",
    "/content/backup_011.json",
    "/content/backup_012.json",
    "/content/backup_013.json",
    "/content/backup_014.json",
    "/content/final_results.json"
]

for file_path in json_files_to_copy:
    if os.path.exists(file_path):
        try:
            shutil.copy(file_path, drive_path)
            print(f"Successfully copied: {file_path} to {drive_path}")
        except Exception as e:
            print(f"Error copying {file_path}: {e}")
    else:
        print(f"File not found: {file_path}")

Successfully copied: /content/backup_001.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_002.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_003.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_004.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_005.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_006.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_007.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_008.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_009.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_010.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_011.json to /content/drive/MyDrive/Upwork/mahmoud
Successfully copied: /content/backup_012.js

In [None]:
#@title FattalBeauty.com products scraper

import json

file_path = '/content/drive/MyDrive/Upwork/mahmoud/fattalbeauty_products.json'

# Read the JSON file
with open(file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Display the first product
if data and isinstance(data, list):
    print("First product from the JSON file:")
    print(json.dumps(data[0], indent=2))
elif data and isinstance(data, dict):
    print("JSON file contains a dictionary. Displaying it:")
    print(json.dumps(data, indent=2))
else:
    print("JSON file is empty or in an unexpected format.")

BASE_URL = "https://fattalbeauty.com/products/"

# Ensure 'data' from daouk_products.json is loaded and is a list
if 'data' in globals() and isinstance(data, list):
    fattalbeauty_handles = [item['handle'] for item in data if 'handle' in item]
    FATTALBEAUTY_URLS = [f"{BASE_URL}{handle}" for handle in fattalbeauty_handles]
    print(f"Generated {len(FATTALBEAUTY_URLS)} URLs for Fattalbeauty.com.")
    # Display the first few generated URLs as a sanity check
    print("First 5 Feel22 URLs:")
    for i in range(min(5, len(FATTALBEAUTY_URLS))):
        print(FATTALBEAUTY_URLS[i])
else:
    print("Error: 'data' variable not found or not in expected format. Please ensure Fattalbeauty_products.json was loaded correctly.")
    FATTALBEAUTY_URLS = []

First product from the JSON file:
{
  "id": 8881004937365,
  "title": "Polo Exchange Accessories PXRI0210",
  "handle": "polo-exchange-accessories-pxri0210",
  "body_html": "<p>Polo Exchange Accessories PXRI0210</p>",
  "published_at": "2025-12-13T09:14:28+02:00",
  "created_at": "2025-08-26T12:09:22+03:00",
  "updated_at": "2025-12-14T13:20:35+02:00",
  "vendor": "Polo Exchange",
  "product_type": "Watches & Accessories",
  "tags": [
    "Acc Discounts",
    "Accessories for Her",
    "Ring",
    "Watches & Accessories"
  ],
  "variants": [
    {
      "id": 46341834375317,
      "title": "Default Title",
      "option1": "Default Title",
      "option2": null,
      "option3": null,
      "sku": "276434",
      "requires_shipping": true,
      "taxable": false,
      "featured_image": null,
      "available": false,
      "price": "25.00",
      "grams": 0,
      "compare_at_price": null,
      "position": 1,
      "product_id": 8881004937365,
      "created_at": "2025-08-26T12:09:22

In [None]:
# ============================================================
# FULL CONCURRENT SHOPIFY PRODUCT SCRAPER (COLAB READY)
# ============================================================

import os
import re
import json
import html
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
from google.colab import files

# ============================================================
# CONFIG
# ============================================================

BASE_DIR = "fattalbeauty_products"
os.makedirs(BASE_DIR, exist_ok=True)

BACKUP_EVERY = 500
PROGRESS_FILE = os.path.join(BASE_DIR, "progress.txt")
BACKUP_PREFIX = os.path.join(BASE_DIR, "backup_")
FINAL_FILE = os.path.join(BASE_DIR, "final_results.json")

TIMEOUT = 20
MAX_WORKERS = 8   # safe: 8–10

progress_lock = Lock()

# ============================================================
# FETCH FUNCTION (UNCHANGED LOGIC)
# ============================================================

def fetch_product_data(url: str) -> dict:
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Accept": "text/html",
    }

    res = requests.get(url, headers=headers, timeout=20)
    res.raise_for_status()
    html_text = res.text

    # -------------------------------------------------
    # 1. QUICK AVAILABILITY CHECK
    # -------------------------------------------------
    if "window._themeProducts" not in html_text:
        return None

    # -------------------------------------------------
    # 2. EXTRACT JS OBJECT
    # window._themeProducts[ID] = { ... };
    # -------------------------------------------------
    pattern = re.compile(
        r'window\._themeProducts\[\d+\]\s*=\s*(\{[\s\S]*?\})\s*;',
        re.MULTILINE
    )

    match = pattern.search(html_text)
    if not match:
        return None

    raw_obj = match.group(1)

    # -------------------------------------------------
    # 3. CLEAN JS → JSON
    # -------------------------------------------------

    # Decode HTML entities
    raw_obj = html.unescape(raw_obj)

    # Remove trailing commas (JS allows it, JSON doesn't)
    raw_obj = re.sub(r',(\s*[}\]])', r'\1', raw_obj)

    # Quote object keys (id: -> "id":)
    raw_obj = re.sub(
        r'([{\s,])([a-zA-Z_][a-zA-Z0-9_]*)\s*:',
        r'\1"\2":',
        raw_obj
    )

    # Replace JS booleans/nulls
    raw_obj = raw_obj.replace("true", "true")
    raw_obj = raw_obj.replace("false", "false")
    raw_obj = raw_obj.replace("null", "null")

    # -------------------------------------------------
    # 4. PARSE JSON
    # -------------------------------------------------
    try:
        product = json.loads(raw_obj)
    except json.JSONDecodeError as e:
        raise ValueError(f"JSON parse failed: {e}")

    # -------------------------------------------------
    # 5. SANITY CHECK
    # -------------------------------------------------
    if not product.get("id") or not product.get("title"):
        return None

    return product

# ============================================================
# WORKER WRAPPER
# ============================================================

def fetch_worker(index, url):
    try:
        return index, fetch_product_data(url)
    except Exception:
        return index, None

# ============================================================
# LOAD PROGRESS
# ============================================================

start_index = 0
if os.path.exists(PROGRESS_FILE):
    with open(PROGRESS_FILE, "r") as f:
        start_index = int(f.read().strip())

print(f"Starting from index: {start_index}")

# ============================================================
# URL LIST
# ============================================================
urls = FATTALBEAUTY_URLS


total = len(urls)
batch_data = []

# ============================================================
# MAIN CONCURRENT LOOP
# ============================================================

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    futures = {
        executor.submit(fetch_worker, i, urls[i]): i
        for i in range(start_index, total)
    }

    for future in as_completed(futures):
        i, product = future.result()

        if product:
            batch_data.append(product)

        # ---- progress update ----
        with progress_lock:
            with open(PROGRESS_FILE, "w") as f:
                f.write(str(i + 1))

        # ---- backup ----
        if (i + 1) % BACKUP_EVERY == 0 or i == total - 1:
            batch_num = (i + 1) // BACKUP_EVERY + (1 if (i + 1) % BACKUP_EVERY else 0)
            backup_name = f"{BACKUP_PREFIX}{batch_num:03d}.json"

            with open(backup_name, "w", encoding="utf-8") as f:
                json.dump(batch_data, f, indent=2, ensure_ascii=False)

            files.download(backup_name)
            batch_data = []

            print(f"Processed {i + 1}/{total}")

# ============================================================
# FINAL MERGE
# ============================================================

final_results = []

for file in sorted(f for f in os.listdir(BASE_DIR) if f.startswith("backup_")):
    with open(os.path.join(BASE_DIR, file), "r", encoding="utf-8") as f:
        final_results.extend(json.load(f))

with open(FINAL_FILE, "w", encoding="utf-8") as f:
    json.dump(final_results, f, indent=2, ensure_ascii=False)

files.download(FINAL_FILE)

print(f"\n✅ DONE — {len(final_results)} products saved to {FINAL_FILE}")

Starting from index: 0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 500/2603


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 1000/2603


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 1500/2603


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 2000/2603


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 2500/2603


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processed 2603/2603


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ DONE — 2598 products saved to fattalbeauty_products/final_results.json


In [None]:
import shutil
import os

# SOURCE (Colab runtime)
SRC_FOLDER = "/content/fattalbeauty_products"

# DESTINATION (Google Drive)
DEST_PARENT = "/content/drive/MyDrive/Upwork/mahmoud"

# Safety check
if not os.path.isdir(SRC_FOLDER):
    raise FileNotFoundError(f"Source folder does not exist: {SRC_FOLDER}")

# Final destination path (folder itself is copied)
DEST_FOLDER = os.path.join(DEST_PARENT, os.path.basename(SRC_FOLDER))

# Remove existing folder (optional but recommended)
if os.path.exists(DEST_FOLDER):
    shutil.rmtree(DEST_FOLDER)

# Copy ENTIRE folder
shutil.copytree(SRC_FOLDER, DEST_FOLDER)

print("✅ Folder copied successfully")
print("📁 Location:", DEST_FOLDER)

✅ Folder copied successfully
📁 Location: /content/drive/MyDrive/Upwork/mahmoud/fattalbeauty_products


In [None]:
import requests
import re
import json
import html

def fetch_product_data_themeproducts(url: str) -> dict | None:
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Accept": "text/html",
    }

    res = requests.get(url, headers=headers, timeout=20)
    res.raise_for_status()
    html_text = res.text

    # -------------------------------------------------
    # 1. QUICK AVAILABILITY CHECK
    # -------------------------------------------------
    if "window._themeProducts" not in html_text:
        return None

    # -------------------------------------------------
    # 2. EXTRACT JS OBJECT
    # window._themeProducts[ID] = { ... };
    # -------------------------------------------------
    pattern = re.compile(
        r'window\._themeProducts\[\d+\]\s*=\s*(\{[\s\S]*?\})\s*;',
        re.MULTILINE
    )

    match = pattern.search(html_text)
    if not match:
        return None

    raw_obj = match.group(1)

    # -------------------------------------------------
    # 3. CLEAN JS → JSON
    # -------------------------------------------------

    # Decode HTML entities
    raw_obj = html.unescape(raw_obj)

    # Remove trailing commas (JS allows it, JSON doesn't)
    raw_obj = re.sub(r',(\s*[}\]])', r'\1', raw_obj)

    # Quote object keys (id: -> "id":)
    raw_obj = re.sub(
        r'([{\s,])([a-zA-Z_][a-zA-Z0-9_]*)\s*:',
        r'\1"\2":',
        raw_obj
    )

    # Replace JS booleans/nulls
    raw_obj = raw_obj.replace("true", "true")
    raw_obj = raw_obj.replace("false", "false")
    raw_obj = raw_obj.replace("null", "null")

    # -------------------------------------------------
    # 4. PARSE JSON
    # -------------------------------------------------
    try:
        product = json.loads(raw_obj)
    except json.JSONDecodeError as e:
        raise ValueError(f"JSON parse failed: {e}")

    # -------------------------------------------------
    # 5. SANITY CHECK
    # -------------------------------------------------
    if not product.get("id") or not product.get("title"):
        return None

    return product

product = fetch_product_data_themeproducts("https://fattalbeauty.com/products/calvin-klein-2-pack-modern-cotton-lounge-white-t-shirts")

if product:
    print(json.dumps(product, indent=2))
else:
    print("❌ No product data found")


{
  "has_3d_model": false,
  "id": 7934081925269,
  "title": "Calvin Klein 2 Pack Modern Cotton Lounge White T-Shirts",
  "available": true,
  "handle": "calvin-klein-2-pack-modern-cotton-lounge-white-t-shirts",
  "variants": [
    {
      "id": 43518768414869,
      "title": "White / S",
      "option1": "White",
      "option2": "S",
      "option3": null,
      "sku": "206445",
      "requires_shipping": true,
      "taxable": false,
      "featured_image": null,
      "available": true,
      "name": "Calvin Klein 2 Pack Modern Cotton Lounge White T-Shirts - White / S",
      "public_title": "White / S",
      "options": [
        "White",
        "S"
      ],
      "price": 6200,
      "weight": 0,
      "compare_at_price": null,
      "inventory_management": "shopify",
      "barcode": null,
      "requires_selling_plan": false,
      "selling_plan_allocations": []
    },
    {
      "id": 43518768447637,
      "title": "White / M",
      "option1": "White",
      "option2": "M",

# FATTALONLINE.COM SCRAPER

In [None]:
import requests
import json
from bs4 import BeautifulSoup

def fetch_product_data_productview(url: str) -> dict | None:
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Accept": "text/html",
    }

    res = requests.get(url, headers=headers, timeout=20)
    res.raise_for_status()

    soup = BeautifulSoup(res.text, "lxml")

    # -------------------------------------------------
    # 1. FIND productView-options DIV
    # -------------------------------------------------
    options_div = soup.select_one("div.productView-options")
    if not options_div:
        return None

    # -------------------------------------------------
    # 2. FIND NEAREST JSON SCRIPT
    # -------------------------------------------------
    script_tag = options_div.find_next("script", type="application/json")
    if not script_tag or not script_tag.string:
        return None

    raw_json = script_tag.string.strip()

    # -------------------------------------------------
    # 3. PARSE JSON
    # -------------------------------------------------
    try:
        product = json.loads(raw_json)
    except json.JSONDecodeError as e:
        raise ValueError(f"JSON parse failed: {e}")

    # -------------------------------------------------
    # 5. SANITY CHECK
    # -------------------------------------------------
    if not product[0].get("id") or not product[0].get("title"):
        return None

    return product


In [None]:
product = fetch_product_data_productview("https://fattalonline.com/products/max-factor-lasting-performance")
print(json.dumps(product, indent=2))

[
  {
    "id": 44771271377189,
    "title": "100-fair",
    "option1": "100-fair",
    "option2": null,
    "option3": null,
    "sku": "65448",
    "requires_shipping": true,
    "taxable": false,
    "featured_image": {
      "id": 41116622291237,
      "product_id": 8190333124901,
      "position": 1,
      "created_at": "2023-03-27T17:44:16+03:00",
      "updated_at": "2023-03-27T17:44:16+03:00",
      "alt": null,
      "width": 1080,
      "height": 1080,
      "src": "//fattalonline.com/cdn/shop/products/65448.png?v=1679928256",
      "variant_ids": [
        44771271377189
      ]
    },
    "available": true,
    "name": "Max Factor Lasting Performance - 100-fair",
    "public_title": "100-fair",
    "options": [
      "100-fair"
    ],
    "price": 977,
    "weight": 0,
    "compare_at_price": 1221,
    "inventory_management": "shopify",
    "barcode": null,
    "featured_media": {
      "alt": null,
      "id": 33468332179749,
      "position": 1,
      "preview_image": {
 

In [None]:
# #@title Parsing the products json into dataframe

# import json

file_path = '/content/drive/MyDrive/Upwork/mahmoud/fattalbeauty/final_results.json'
# file_path = '/content/fattalbeauty_products.json'

# Read the JSON file
with open(file_path, 'r', encoding='utf-8') as f:
    data2 = json.load(f)

# print(json.dumps(data[0], indent=2))
len(data2)

2598

In [None]:
import pandas as pd

product_info = []
for product in data:
    product_info.append({
        'product_id': product.get('id'),
        'vendor': product.get('vendor'),
        'product_type': product.get('product_type')
    })

df_product_vendor = pd.DataFrame(product_info)
display(df_product_vendor.head())

Unnamed: 0,product_id,vendor
0,8881004937365,Polo Exchange
1,8983808409749,Versace
2,8983808376981,Versace
3,8983808344213,Versace
4,8983808311445,Versace


In [None]:
print(json.dumps(data[2], indent=2))

{
  "id": 8983808376981,
  "title": "Versace Eros Energy Gift Set",
  "handle": "versace-eros-energy-gift-set",
  "body_html": "<p>Coffret Versace Eros Energy Edp100+Ts10+Gwp</p>",
  "published_at": "2025-12-04T09:48:10+02:00",
  "created_at": "2025-12-03T09:54:00+02:00",
  "updated_at": "2025-12-14T13:20:35+02:00",
  "vendor": "Versace",
  "product_type": "Fragrances",
  "tags": [
    "Eau de Parfum",
    "Frag Sets",
    "Fragrances",
    "Fragrances for Him",
    "Gift Sets",
    "Gifts for Him"
  ],
  "variants": [
    {
      "id": 46692083499157,
      "title": "Default Title",
      "option1": "Default Title",
      "option2": null,
      "option3": null,
      "sku": "293147",
      "requires_shipping": true,
      "taxable": false,
      "featured_image": null,
      "available": true,
      "price": "114.75",
      "grams": 0,
      "compare_at_price": "135.00",
      "position": 1,
      "product_id": 8983808376981,
      "created_at": "2025-12-03T09:54:00+02:00",
      "upd

In [None]:
combined_data = []
data2_lookup = {product.get('id'): product for product in data2}

for product_from_data1 in data:
    product_id = product_from_data1.get('id')
    if product_id in data2_lookup:
        # Get the product from data2 using the lookup
        product_from_data2 = data2_lookup[product_id]

        # Create a copy of the product from data1 to modify
        modified_product = product_from_data1.copy()

        # Replace the 'variants' from data1 with those from data2
        if 'variants' in product_from_data2:
            modified_product['variants'] = product_from_data2['variants']

        combined_data.append(modified_product)
    else:
        # If no match in data2, just append the original product from data1
        combined_data.append(product_from_data1)

# Display the first few combined products to verify
print("First 3 combined products with updated variants:")
for i in range(min(3, len(combined_data))):
    print(json.dumps(combined_data[i], indent=2))

print(f"\nTotal combined products: {len(combined_data)}")

First 3 combined products with updated variants:
{
  "id": 8881004937365,
  "title": "Polo Exchange Accessories PXRI0210",
  "handle": "polo-exchange-accessories-pxri0210",
  "body_html": "<p>Polo Exchange Accessories PXRI0210</p>",
  "published_at": "2025-12-13T09:14:28+02:00",
  "created_at": "2025-08-26T12:09:22+03:00",
  "updated_at": "2025-12-14T13:20:35+02:00",
  "vendor": "Polo Exchange",
  "product_type": "Watches & Accessories",
  "tags": [
    "Acc Discounts",
    "Accessories for Her",
    "Ring",
    "Watches & Accessories"
  ],
  "variants": [
    {
      "id": 46341834375317,
      "title": "Default Title",
      "option1": "Default Title",
      "option2": null,
      "option3": null,
      "sku": "276434",
      "requires_shipping": true,
      "taxable": false,
      "featured_image": null,
      "available": false,
      "name": "Polo Exchange Accessories PXRI0210",
      "public_title": null,
      "options": [
        "Default Title"
      ],
      "price": 2500,
  

In [None]:
import pandas as pd

BASE_URL = "https://fattalbeauty.com/products/"

def flatten_products(products: list[dict]) -> pd.DataFrame:
    rows = []

    for product in products:
        product_id = product.get("id")
        product_title = product.get("title", "")
        vendor = product.get("vendor", "")
        product_type = product.get("product_type", "") # Corrected from 'type' to 'product_type'
        handle = BASE_URL + product.get("handle", "")
        description = product.get("body_html", "")

        # SAFELY resolve default image (product level image)
        images = product.get("images") or []
        default_image = images[0].get('src') if images and len(images) > 0 else None # Corrected to get 'src'

        # Prepare default values
        option_colors = []
        option_sizes = []

        # Options can be dict OR list
        options = product.get("options", {})

        if isinstance(options, dict):
            option_colors = options.get("color", [])
            option_sizes = options.get("size", [])
        elif isinstance(options, list):
            for opt in options:
                if isinstance(opt, dict): # Added check: Ensure 'opt' is a dictionary
                    name = (opt.get("name") or "").lower()
                    if name == "color":
                        option_colors = opt.get("values", [])
                    if name == "size":
                        option_sizes = opt.get("values", [])

        for variant in product.get("variants", []):
            variant_title = variant.get("title") or product_title

            # Replace default title
            if variant_title.lower() == "default title":
                variant_title = product_title

            # Fallback to product image (corrected logic)
            variant_featured_image_data = variant.get("featured_image")
            if variant_featured_image_data and variant_featured_image_data.get('src'):
                image = variant_featured_image_data.get('src')
            else:
                image = default_image

            color = variant_title if variant_title in option_colors else None
            size = variant_title if variant_title in option_sizes else None

            # Safely get and convert price
            raw_price = variant.get("price")
            try:
                price = float(raw_price) / 100 if raw_price is not None else 0.0
            except ValueError:
                price = 0.0 # Default to 0.0 if conversion fails

            rows.append({
                "product_id": product_id,
                "product_title": product_title,
                "vendor": vendor,
                "product_type": product_type,
                "handle": handle,
                "description": description,
                "variant_id": variant.get("id"), # Changed to use 'id' as 'variant_id'
                "variant_title": variant_title,
                "sku": variant.get("sku"),
                "price": price,
                "color": color,
                "size": size,
                "available": variant.get("available"),
                "barcode": variant.get("barcode"),
                "image": image
            })

    return pd.DataFrame(rows)


df = flatten_products(combined_data)

# Display as table in Colab (Excel-ready)
df.head(50)

Unnamed: 0,product_id,product_title,vendor,product_type,handle,description,variant_id,variant_title,sku,price,color,size,available,barcode,image
0,8881004937365,Polo Exchange Accessories PXRI0210,Polo Exchange,Watches & Accessories,https://fattalbeauty.com/products/polo-exchang...,<p>Polo Exchange Accessories PXRI0210</p>,46341834375317,Polo Exchange Accessories PXRI0210,276434,25.0,,,False,,https://cdn.shopify.com/s/files/1/0602/5668/21...
1,8983808409749,Versace Bright Crystal Gift Set,Versace,Fragrances,https://fattalbeauty.com/products/versace-brig...,<p>Coffret Versace Bright Crystal F. Edt90+Bsg...,46692083531925,Versace Bright Crystal Gift Set,293142,112.2,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...
2,8983808376981,Versace Eros Energy Gift Set,Versace,Fragrances,https://fattalbeauty.com/products/versace-eros...,<p>Coffret Versace Eros Energy Edp100+Ts10+Gwp...,46692083499157,Versace Eros Energy Gift Set,293147,114.75,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...
3,8983808344213,Versace Pour Homme Gift Set,Versace,Fragrances,https://fattalbeauty.com/products/versace-pour...,<p>Coffret Versace Pour Homme Edt100+Ts10+Gwp</p>,46692083466389,Versace Pour Homme Gift Set,293151,104.55,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...
4,8983808311445,Versace Eros Flame Gift Set,Versace,Fragrances,https://fattalbeauty.com/products/versace-eros...,<p>Coffret Versace Eros Flame H. Edp100+Ts10+G...,46692083237013,Versace Eros Flame Gift Set,293152,125.8,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...
5,8983808245909,Versace Dylan Purple Gift Set,Versace,Fragrances,https://fattalbeauty.com/products/versace-dyla...,<p>Coffret Versace Dylan Purple F. Edp100+Bsg1...,46692081533077,Versace Dylan Purple Gift Set,293153,133.45,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...
6,8983808213141,Versace Eros Femme Gift Set,Versace,Fragrances,https://fattalbeauty.com/products/versace-eros...,<p>Coffret Versace Eros F. Edp100+Bsg100+Bl100...,46692081500309,Versace Eros Femme Gift Set,293156,112.2,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...
7,8983808180373,Versace Dylan Blue Gift Set,Versace,Fragrances,https://fattalbeauty.com/products/versace-dyla...,<p>Coffret Versace Dylan Blue H. Edt100+Ts10+G...,46692081467541,Versace Dylan Blue Gift Set,293157,112.2,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...
8,8983808147605,Versace Eros Najim Gift Set,Versace,Fragrances,https://fattalbeauty.com/products/versace-eros...,<p>Coffret Versace Eros Najim Parfum100+Ts10+G...,46692081434773,Versace Eros Najim Gift Set,293158,126.65,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...
9,8983808082069,Tommy Hilfiger For Men Gift Set,Tommy Hilfiger,Fragrances,https://fattalbeauty.com/products/tommy-hilfig...,<p>Coffret Tommy Hilfiger Men Edt100+Bw100</p>,46692081369237,Tommy Hilfiger For Men Gift Set,293816,59.5,,,True,,https://cdn.shopify.com/s/files/1/0602/5668/21...


In [None]:
df.to_csv("fattalbeauty_products.csv", index=False)

In [None]:
product_id_to_find = 7934078353557
found_product = None

for product in combined_data:
    if product.get('id') == product_id_to_find:
        found_product = product
        break

if found_product:
    print(json.dumps(found_product, indent=2))
else:
    print(f"Product with ID {product_id_to_find} not found in combined_data.")

{
  "id": 7934078353557,
  "title": "Bassam Fattouh Two Way Cake Powder Foundation",
  "handle": "bassam-fattouh-two-way-cake-powder-foundation",
  "body_html": "<p>This revolutionary long lasting, creamy yet super light foundation seamlessly conceals fine lines and imperfections ensuring a naturally luminous and silky complexion complete with an impeccable matte finish.</p>",
  "published_at": "2025-12-03T14:29:35+02:00",
  "created_at": "2023-07-05T00:32:09+03:00",
  "updated_at": "2025-12-14T13:20:35+02:00",
  "vendor": "Bassam Fattouh",
  "product_type": "Make up",
  "tags": [
    "Face",
    "Foundation",
    "Make up"
  ],
  "variants": [
    {
      "id": 43518758224021,
      "title": "S2",
      "option1": "S2",
      "option2": null,
      "option3": null,
      "sku": "188798",
      "requires_shipping": true,
      "taxable": false,
      "featured_image": {
        "id": 37967424585877,
        "product_id": 7934078353557,
        "position": 1,
        "created_at": "2023

In [None]:
import requests
import json
import time

all_products = []
page_number = 1
page_limit = 250 # Shopify API default limit

while True:
    print(f"Fetching page {page_number}...")
    url = f"https://beirutfreezone.com//products.json?page={page_number}&limit={page_limit}"

    try:
        res = requests.get(url)
        res.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
        data = res.json()
        products = data.get('products', [])

        all_products.extend(products)

        if len(products) < page_limit:
            print(f"Found {len(products)} products on page {page_number}. Assuming this is the last page.")
            break

        print(f"Found {len(products)} products on page {page_number}.")
        page_number += 1
        time.sleep(0.5) # Be kind to the server

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        break
    except json.JSONDecodeError:
        print(f"Error decoding JSON from {url}. Response content: {res.text[:200]}...")
        break

print(f"\nTotal products scraped: {len(all_products)}")

# Optionally, save to a JSON file
file_path = 'beirutfreezone_products.json'
with open(file_path, 'w', encoding='utf-8') as f:
    json.dump(all_products, f, indent=4)
print(f"All products saved to {file_path}")

Fetching page 1...
Found 250 products on page 1.
Fetching page 2...
Found 250 products on page 2.
Fetching page 3...
Found 250 products on page 3.
Fetching page 4...
Found 250 products on page 4.
Fetching page 5...
Found 250 products on page 5.
Fetching page 6...
Found 250 products on page 6.
Fetching page 7...
Found 250 products on page 7.
Fetching page 8...
Found 34 products on page 8. Assuming this is the last page.

Total products scraped: 1784
All products saved to beirutfreezone_products.json


### Read `beirutfreezone_products.json` and display first 5 rows.

In [2]:
import json
import pandas as pd

file_path = '/content/beirutfreezone_products.json'

with open(file_path, 'r', encoding='utf-8') as f:
    data_beirutfreezone = json.load(f)

data_beirutfreezone[0]
# df_beirutfreezone = pd.DataFrame(data_beirutfreezone)
# display(df_beirutfreezone.head())

{'id': 8007851507800,
 'title': 'Lacoste Original Eau De Toilette Pour Homme - 2 Sizes',
 'handle': 'lacoste-original-eau-de-toilette-pour-homme-50ml',
 'body_html': '<p>Experience the iconic scent of <strong>Lacoste Original Eau De Toilette</strong>. This 50ml bottle is perfect for the modern man, crafted with a blend of fresh and masculine notes. The perfect way to make a statement and exude confidence. Grab yours now and elevate your fragrance game.</p>',
 'published_at': '2025-12-16T11:18:30+03:00',
 'created_at': '2024-06-28T23:18:48+03:00',
 'updated_at': '2025-12-19T18:36:49+03:00',
 'vendor': 'lacoste',
 'product_type': 'perfume',
 'tags': ['All Products', 'for him', 'lacoste', 'men fragrance', 'perfume'],
 'variants': [{'id': 43526761087064,
   'title': '50ml',
   'option1': '50ml',
   'option2': None,
   'option3': None,
   'sku': None,
   'requires_shipping': True,
   'taxable': True,
   'featured_image': None,
   'available': False,
   'price': '65.00',
   'grams': 0,
   'c

In [4]:
import pandas as pd

BASE_URL = "https://beirutfreezone.com/products/"

def flatten_products(products: list[dict]) -> pd.DataFrame:
    rows = []

    for product in products:
        product_id = product.get("id")
        product_title = product.get("title", "")
        vendor = product.get("vendor", "")
        product_type = product.get("product_type", "") # Corrected from 'type' to 'product_type'
        handle = BASE_URL + product.get("handle", "")
        description = product.get("body_html", "")

        # SAFELY resolve default image (product level image)
        images = product.get("images") or []
        default_image = images[0].get('src') if images and len(images) > 0 else None # Corrected to get 'src'

        # Prepare default values
        option_colors = []
        option_sizes = []

        # Options can be dict OR list
        options = product.get("options", {})

        if isinstance(options, dict):
            option_colors = options.get("color", [])
            option_sizes = options.get("size", [])
        elif isinstance(options, list):
            for opt in options:
                if isinstance(opt, dict): # Added check: Ensure 'opt' is a dictionary
                    name = (opt.get("name") or "").lower()
                    if name == "color":
                        option_colors = opt.get("values", [])
                    if name == "size":
                        option_sizes = opt.get("values", [])

        for variant in product.get("variants", []):
            variant_title = variant.get("title") or product_title

            # Replace default title
            if variant_title.lower() == "default title":
                variant_title = product_title

            # Fallback to product image (corrected logic)
            variant_featured_image_data = variant.get("featured_image")
            if variant_featured_image_data and variant_featured_image_data.get('src'):
                image = variant_featured_image_data.get('src')
            else:
                image = default_image

            color = variant_title if variant_title in option_colors else None
            size = variant_title if variant_title in option_sizes else None

            # Safely get and convert price
            raw_price = variant.get("price")
            try:
                price = float(raw_price)
            except ValueError:
                price = 0.0 # Default to 0.0 if conversion fails

            rows.append({
                "product_id": product_id,
                "product_title": product_title,
                "vendor": vendor,
                "product_type": product_type,
                "handle": handle,
                "description": description,
                "variant_id": variant.get("id"), # Changed to use 'id' as 'variant_id'
                "variant_title": variant_title,
                "sku": variant.get("sku"),
                "price": price,
                "color": color,
                "size": size,
                "available": variant.get("available"),
                "barcode": variant.get("barcode"),
                "image": image
            })

    return pd.DataFrame(rows)


df = flatten_products(data_beirutfreezone)

# Display as table in Colab (Excel-ready)
df.head(50)

Unnamed: 0,product_id,product_title,vendor,product_type,handle,description,variant_id,variant_title,sku,price,color,size,available,barcode,image
0,8007851507800,Lacoste Original Eau De Toilette Pour Homme - ...,lacoste,perfume,https://beirutfreezone.com/products/lacoste-or...,<p>Experience the iconic scent of <strong>Laco...,43526761087064,50ml,,65.0,,50ml,False,,https://cdn.shopify.com/s/files/1/0589/8613/56...
1,8007851507800,Lacoste Original Eau De Toilette Pour Homme - ...,lacoste,perfume,https://beirutfreezone.com/products/lacoste-or...,<p>Experience the iconic scent of <strong>Laco...,43526761119832,100ml,,95.0,,100ml,False,,https://cdn.shopify.com/s/files/1/0589/8613/56...
2,8550740492376,Burberry Hero Eau De Parfum Pour Homme - 100ml,burberry,perfume,https://beirutfreezone.com/products/burberry-h...,<p><strong>Burberry Hero Eau De Parfum Pour Ho...,43519307677784,Burberry Hero Eau De Parfum Pour Homme - 100ml,,125.0,,,True,,https://cdn.shopify.com/s/files/1/0589/8613/56...
3,7813159223384,Bulgari Omnia Crystalline Women's Travel Set,bulgari,perfume,https://beirutfreezone.com/products/bvlgari-om...,<p><strong>Bulgari Omnia Crystalline Women's T...,41540898783320,Bulgari Omnia Crystalline Women's Travel Set,,135.0,,,True,,https://cdn.shopify.com/s/files/1/0589/8613/56...
4,8550740459608,Jimmy Choo Women's Gift Set,jimmy choo,perfume,https://beirutfreezone.com/products/jimmy-choo...,<p>Elevate your fragrance collection with this...,43519307612248,Jimmy Choo Women's Gift Set,,95.0,,,True,,https://cdn.shopify.com/s/files/1/0589/8613/56...
5,7813027692632,Givenchy Gentleman Men's Gift Set,givenchy,perfume,https://beirutfreezone.com/products/givenchy-g...,<p><strong>Givenchy Gentleman Men's Gift Set</...,41540639850584,Givenchy Gentleman Men's Gift Set,,125.0,,,True,,https://cdn.shopify.com/s/files/1/0589/8613/56...
6,8547034529880,Jean Paul Gaultier Divine Elixir Eau De Parfum...,jean paul gaultier,perfume,https://beirutfreezone.com/products/jean-paul-...,<p><strong>Jean Paul Gaultier Divine Elixir</s...,43508841316440,Jean Paul Gaultier Divine Elixir Eau De Parfum...,,155.0,,,True,,https://cdn.shopify.com/s/files/1/0589/8613/56...
7,8532532789336,Prada Luna Rossa Sport Eau De Toilette Pour Ho...,prada,perfume,https://beirutfreezone.com/products/prada-luna...,<p><strong>Prada Luna Rossa Sport</strong> cap...,43415310762072,Prada Luna Rossa Sport Eau De Toilette Pour Ho...,,115.0,,,True,,https://cdn.shopify.com/s/files/1/0589/8613/56...
8,8547033677912,Azzaro Wanted Men's Gift Set,azzaro,perfume,https://beirutfreezone.com/products/azzaro-wan...,<p><strong>Azzaro Wanted Men's Gift Set</stron...,43508840071256,Azzaro Wanted Men's Gift Set,,115.0,,,True,,https://cdn.shopify.com/s/files/1/0589/8613/56...
9,8547033710680,Trussardi Uomo Men's Gift Set,trussardi,perfume,https://beirutfreezone.com/products/trussardi-...,<p><strong>Trussardi Uomo Men's Gift Set</stro...,43508840169560,Trussardi Uomo Men's Gift Set,,95.0,,,True,,https://cdn.shopify.com/s/files/1/0589/8613/56...


In [7]:
# df.to_csv("beirutfreezone_products.csv", index=False)
df.to_excel("beirutfreezone_products.xlsx", index=False)