In [4]:
import requests
import json
import time

all_products = []
page_number = 1
page_limit = 250 # Shopify API default limit

while True:
    print(f"Fetching page {page_number}...")
    url = f"https://jacquesmariemage.com/products.json?page={page_number}&limit={page_limit}"

    try:
        res = requests.get(url)
        res.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
        data = res.json()
        products = data.get('products', [])

        all_products.extend(products)

        if len(products) < page_limit:
            print(f"Found {len(products)} products on page {page_number}. Assuming this is the last page.")
            break

        print(f"Found {len(products)} products on page {page_number}.")
        page_number += 1
        time.sleep(0.5) # Be kind to the server

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        break
    except json.JSONDecodeError:
        print(f"Error decoding JSON from {url}. Response content: {res.text[:200]}...")
        break

print(f"\nTotal products scraped: {len(all_products)}")

# Optionally, save to a JSON file
file_path = 'jacquesmariemage.com_products.json'
with open(file_path, 'w', encoding='utf-8') as f:
    json.dump(all_products, f, indent=4)
print(f"All products saved to {file_path}")

Fetching page 1...
Found 239 products on page 1. Assuming this is the last page.

Total products scraped: 239
All products saved to jacquesmariemage.com_products.json


In [None]:
pip install json5

Collecting json5
  Downloading json5-0.12.1-py3-none-any.whl.metadata (36 kB)
Downloading json5-0.12.1-py3-none-any.whl (36 kB)
Installing collected packages: json5
Successfully installed json5-0.12.1


In [None]:
import requests
import json5
from bs4 import BeautifulSoup


def extract_js_object(text, key):
    start = text.find(f"{key}:")
    if start == -1:
        return None

    brace_start = text.find("{", start)
    depth = 0

    for i in range(brace_start, len(text)):
        if text[i] == "{":
            depth += 1
        elif text[i] == "}":
            depth -= 1
            if depth == 0:
                return text[brace_start:i + 1]

    return None


url = "https://designerdaydream.com/products/alexander-mcqueen-am0260o-gold-metal-aviator-frames"
html = requests.get(url).text
soup = BeautifulSoup(html, "html.parser")

script_text = next(
    s.string for s in soup.find_all("script")
    if s.string and "window.DCART" in s.string and "barcode" in s.string
)

product_js = extract_js_object(script_text, "product")

# ✅ Parse JS safely
product_data = json5.loads(product_js)

print(product_data["title"])
print(product_data["variants"][0]["barcode"])

Alexander McQueen AM0260O Gold Metal Aviator Frames
889652283975


In [None]:
import requests
import json5
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed


# =========================
# CONFIG
# =========================

BASE_URL = "https://designerdaydream.com/products/"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}
TIMEOUT = 20
WORKERS = 10


# =========================
# HELPERS
# =========================

def extract_js_object(text, key):
    """
    Extract JS object like: product: { ... }
    using brace counting (safe for nested objects)
    """
    start = text.find(f"{key}:")
    if start == -1:
        return None

    brace_start = text.find("{", start)
    if brace_start == -1:
        return None

    depth = 0
    for i in range(brace_start, len(text)):
        if text[i] == "{":
            depth += 1
        elif text[i] == "}":
            depth -= 1
            if depth == 0:
                return text[brace_start:i + 1]

    return None


# =========================
# PRIMARY: Shopify product.js
# =========================

def get_product_js(handle):
    """
    Fetch clean Shopify JSON:
    /products/{handle}.js
    """
    url = f"{BASE_URL}{handle}.js"
    r = requests.get(url, headers=HEADERS, timeout=TIMEOUT)

    if r.status_code != 200:
        raise RuntimeError(f"product.js HTTP {r.status_code}")

    if not r.headers.get("content-type", "").startswith("application/json"):
        raise RuntimeError("product.js not JSON")

    data = r.json()

    # Safety check
    if data.get("handle") != handle:
        raise RuntimeError("product.js handle mismatch")

    return {
        "url": url,
        "status": "success",
        "source": "product.js",
        "data": data
    }


# =========================
# FALLBACK: DCART parsing
# =========================

def get_product_dcart(handle):
    """
    Fallback parser for broken Shopify themes using window.DCART
    """
    url = f"{BASE_URL}{handle}"
    r = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
    r.raise_for_status()

    soup = BeautifulSoup(r.text, "html.parser")

    candidates = []

    for s in soup.find_all("script"):
        text = s.string
        if not text:
            continue
        if "window.DCART" not in text:
            continue
        if "product:" not in text:
            continue

        product_js = extract_js_object(text, "product")
        if not product_js:
            continue

        try:
            product = json5.loads(product_js)
            candidates.append(product)
        except Exception:
            continue

    if not candidates:
        raise RuntimeError("DCART product not found")

    # Pick correct product by handle
    for product in candidates:
        if product.get("handle") == handle:
            return {
                "url": url,
                "status": "success",
                "source": "dcart",
                "data": product
            }

    raise RuntimeError(
        f"DCART mismatch (found {[p.get('handle') for p in candidates]})"
    )


# =========================
# UNIFIED FETCH
# =========================

def fetch_product(product):
    """
    product: dict containing at least { "handle": ... }
    """
    handle = product["handle"]

    # 1) Try clean JSON endpoint
    try:
        return get_product_js(handle)
    except Exception:
        pass

    # 2) Fallback to DCART
    return get_product_dcart(handle)


# =========================
# CONCURRENT RUNNER
# =========================

def fetch_all_products(all_products, workers=WORKERS):
    results = []

    with ThreadPoolExecutor(max_workers=workers) as executor:
        futures = {
            executor.submit(fetch_product, p): p["handle"]
            for p in all_products
            if "handle" in p
        }

        for future in as_completed(futures):
            handle = futures[future]
            try:
                result = future.result()
                results.append(result)
                print(f"✅ {handle} ({result['source']})")
            except Exception as e:
                print(f"❌ {handle} → {e}")

    return results

In [None]:
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed


# =========================
# CONFIG
# =========================

BASE_URL = "https://designerdaydream.com/products/"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}
TIMEOUT = 20
WORKERS = 10


# =========================
# PRODUCT.JS FETCHER
# =========================

def get_product_js(handle):
    """
    Fetch Shopify product JSON via:
    /products/{handle}.js
    """
    url = f"{BASE_URL}{handle}.js"

    r = requests.get(url, headers=HEADERS, timeout=TIMEOUT)

    if r.status_code != 200:
        raise RuntimeError(f"HTTP {r.status_code}")

    if not r.headers.get("content-type", "").startswith("application/json"):
        raise RuntimeError("Not JSON response")

    data = r.json()

    # Safety check
    if data.get("handle") != handle:
        raise RuntimeError(
            f"Handle mismatch (expected {handle}, got {data.get('handle')})"
        )

    return {
        "url": url,
        "status": "success",
        "data": data
    }


# =========================
# UNIFIED FETCH
# =========================

def fetch_product(product):
    """
    product must contain: { "handle": ... }
    """
    handle = product["handle"]
    return get_product_js(handle)


# =========================
# CONCURRENT RUNNER
# =========================

def fetch_all_products(all_products, workers=WORKERS):
    results = []

    with ThreadPoolExecutor(max_workers=workers) as executor:
        futures = {
            executor.submit(fetch_product, p): p["handle"]
            for p in all_products
            if "handle" in p
        }

        for future in as_completed(futures):
            handle = futures[future]
            try:
                result = future.result()
                results.append(result)
                print(f"✅ {handle}")
            except Exception as e:
                print(f"❌ {handle} → {e}")

    return results


# =========================
# EXAMPLE USAGE
# =========================

if __name__ == "__main__":
    products_data = fetch_all_products(all_products, workers=10)

    print("\nFetched:", len(products_data), "products")

❌ loewe-lw40156u-cat-eye-sunglasses-in-brown → Not JSON response
❌ loewe-lw40127u-anagram-sunglasses-in-black → Not JSON response
❌ celine-cl4006in-thin-shadow-sunglasses-in-blue → Not JSON response
❌ loewe-lw40127i-anagram-sunglasses-in-black → Not JSON response
❌ loewe-lw50068i-anagram-round-frames-in-transparent-brown → Not JSON response
❌ loewe-lw40154u-anagram-sunglasses-in-black → Not JSON response
❌ loewe-lw50069i-anagram-frames-in-black-horn → Not JSON response
❌ loewe-lw50068i-anagram-round-frames-in-black → Not JSON response
❌ loewe-lw50076i-anagram-frames-in-black → Not JSON response
❌ loewe-lw50038u-anagram-frames-in-gold → Not JSON response
❌ loewe-lw50076i-anagram-eyeglasses-frames-in-beige → Not JSON response
❌ loewe-lw40115u-cat-eye-sunglasses-in-gold-mirror → Not JSON response
❌ loewe-lw40102f-anagram-sunglasses-in-dark-havana-brown → Not JSON response
❌ loewe-lw50069i-anagram-frames-in-blonde-havana → Not JSON response
❌ loewe-lw40134u-oversized-square-sunglasses-in-h

In [7]:
import asyncio
import aiohttp


# =========================
# CONFIG
# =========================

BASE_URL = "https://jacquesmariemage.com/products/"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}
TIMEOUT = aiohttp.ClientTimeout(total=20)
CONCURRENCY = 10


# =========================
# PRODUCT.JS FETCHER
# =========================

async def get_product_js(session, handle, semaphore):
    url = f"{BASE_URL}{handle}.js"

    async with semaphore:
        async with session.get(url) as resp:
            if resp.status != 200:
                raise RuntimeError(f"HTTP {resp.status}")

            # content_type = resp.headers.get("Content-Type", "")
            # if not content_type.startswith("application/json"):
            #     raise RuntimeError("Not JSON response")

            data = await resp.json()

            # Safety check
            if data.get("handle") != handle:
                raise RuntimeError(
                    f"Handle mismatch (expected {handle}, got {data.get('handle')})"
                )

            return {
                "url": url,
                "status": "success",
                "data": data
            }


# =========================
# SINGLE PRODUCT WRAPPER
# =========================

async def fetch_product(session, product, semaphore):
    handle = product["handle"]
    result = await get_product_js(session, handle, semaphore)
    print(f"✅ {handle}")
    return result


# =========================
# ASYNC RUNNER
# =========================

async def fetch_all_products(all_products, concurrency=CONCURRENCY):
    semaphore = asyncio.Semaphore(concurrency)
    results = []

    async with aiohttp.ClientSession(
        headers=HEADERS,
        timeout=TIMEOUT
    ) as session:

        tasks = [
            fetch_product(session, product, semaphore)
            for product in all_products
            if "handle" in product
        ]

        for coro in asyncio.as_completed(tasks):
            try:
                result = await coro
                results.append(result)
            except Exception as e:
                print(f"❌ {e}")

    return results


# =========================
# ENTRY POINT
# =========================

if __name__ == "__main__":

    products_data = await fetch_all_products(all_products, concurrency=10)


    print(f"\nFetched {len(products_data)} products")

❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429
❌ HTTP 429

In [8]:
import requests
import time


# =========================
# CONFIG
# =========================

BASE_URL = "https://jacquesmariemage.com/products/"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}
TIMEOUT = 20
SLEEP_SECONDS = 1


# =========================
# PRODUCT.JS FETCHER
# =========================

def get_product_js(handle):
    """
    Fetch Shopify product JSON via:
    /products/{handle}.js
    """
    url = f"{BASE_URL}{handle}.js"

    print(f"REQUESTED: {url}")

    r = requests.get(url, headers=HEADERS, timeout=TIMEOUT)

    print(f"FINAL URL: {r.url}")

    if r.status_code != 200:
        raise RuntimeError(f"HTTP {r.status_code}")

    # if not r.headers.get("content-type", "").startswith("application/json"):
    #     raise RuntimeError("Not JSON response")

    data = r.json()

    # Safety check
    if data.get("handle") != handle:
        raise RuntimeError(
            f"Handle mismatch (expected {handle}, got {data.get('handle')})"
        )

    return data


# =========================
# SEQUENTIAL RUNNER
# =========================

def fetch_all_products(all_products):
    results = []

    for product in all_products:
        if "handle" not in product:
            continue

        handle = product["handle"]

        try:
            result = get_product_js(handle)
            results.append(result)
            print(f"✅ {handle}")
        except Exception as e:
            print(f"❌ {handle} → {e}")

        # Respect Shopify rate limits
        time.sleep(SLEEP_SECONDS)

    return results


# =========================
# ENTRY POINT
# =========================

if __name__ == "__main__":
    products_data = fetch_all_products(all_products)
    print(f"\nFetched {len(products_data)} products")

REQUESTED: https://jacquesmariemage.com/products/eichler.js
FINAL URL: https://jacquesmariemage.com/products/eichler.js
✅ eichler
REQUESTED: https://jacquesmariemage.com/products/petrus.js
FINAL URL: https://jacquesmariemage.com/products/petrus.js
✅ petrus
REQUESTED: https://jacquesmariemage.com/products/petrus-rx.js
FINAL URL: https://jacquesmariemage.com/products/petrus-rx.js
✅ petrus-rx
REQUESTED: https://jacquesmariemage.com/products/bojo-for-jmm-sunrise-hozho-ring.js
FINAL URL: https://jacquesmariemage.com/products/bojo-for-jmm-sunrise-hozho-ring.js
✅ bojo-for-jmm-sunrise-hozho-ring
REQUESTED: https://jacquesmariemage.com/products/bojo-for-jmm-to-blessings-pendant.js
FINAL URL: https://jacquesmariemage.com/products/bojo-for-jmm-to-blessings-pendant.js
✅ bojo-for-jmm-to-blessings-pendant
REQUESTED: https://jacquesmariemage.com/products/bojo-for-jmm-t-aala-i-niidli-bill-clip.js
FINAL URL: https://jacquesmariemage.com/products/bojo-for-jmm-t-aala-i-niidli-bill-clip.js
✅ bojo-for-jmm-

In [9]:
import json

file_path = 'jacquesmariemage.json'
with open(file_path, 'w', encoding='utf-8') as f:
    json.dump(products_data, f, indent=4)
print(f"All products data saved to {file_path}")

All products data saved to jacquesmariemage.json


In [10]:
products_data[0]

{'id': 8091245379756,
 'title': 'EICHLER',
 'handle': 'eichler',
 'description': '<p>Strong, feminine, and geometric, these limited-edition spectacles with a small fit feature custom ball hinges, ‘Lever’ wirecores, and our signature polished metal details.</p>',
 'published_at': '2025-12-18T19:39:38-08:00',
 'created_at': '2024-10-24T17:27:15-07:00',
 'vendor': 'Jacques Marie Mage',
 'type': 'SUNGLASSES',
 'tags': ['Circa',
  'JMM Free Frame Eligible',
  'NEW ARRIVALS',
  'shopall',
  'Sunglasses',
  'Womens'],
 'price': 96000,
 'price_min': 96000,
 'price_max': 96000,
 'available': True,
 'price_varies': False,
 'compare_at_price': None,
 'compare_at_price_min': 0,
 'compare_at_price_max': 0,
 'compare_at_price_varies': False,
 'variants': [{'id': 43910952485036,
   'title': '11A-NOIR 11 / JET GRADIENT',
   'option1': '11A-NOIR 11 / JET GRADIENT',
   'option2': None,
   'option3': None,
   'sku': 'J-ESN-QE-S6011A-OS',
   'requires_shipping': True,
   'taxable': True,
   'featured_imag

In [None]:
my_products = [product['data'] for product in products_data]
print(f"Created 'my_products' list with {len(my_products)} items.")

Created 'my_products' list with 1810 items.


In [11]:
# Display the first item in my_products to verify
import pprint
pprint.pprint(products_data[0])

{'available': True,
 'compare_at_price': None,
 'compare_at_price_max': 0,
 'compare_at_price_min': 0,
 'compare_at_price_varies': False,
 'created_at': '2024-10-24T17:27:15-07:00',
 'description': '<p>Strong, feminine, and geometric, these limited-edition '
                'spectacles with a small fit feature custom ball hinges, '
                '‘Lever’ wirecores, and our signature polished metal '
                'details.</p>',
 'featured_image': '//cdn.shopify.com/s/files/1/1757/2001/files/Eichler_Noir11_Front-Cropped_5377x3585_a0i4ai.jpg?v=1766109143',
 'handle': 'eichler',
 'id': 8091245379756,
 'images': ['//cdn.shopify.com/s/files/1/1757/2001/files/Eichler_Noir11_Front-Cropped_5377x3585_a0i4ai.jpg?v=1766109143',
            '//cdn.shopify.com/s/files/1/1757/2001/files/Eichler_Samoa_Front-Cropped_5377x3585_gp01rj.jpg?v=1766109205',
            '//cdn.shopify.com/s/files/1/1757/2001/files/Eichler_Sulfur_Front-Cropped_5377x3585_n4xrlz.jpg?v=1766109280',
            '//cdn.shopif

In [13]:
import pandas as pd

BASE_URL = "https://jacquesmariemage.com/products/"

def flatten_products(products: list[dict]) -> pd.DataFrame:
    rows = []

    for product in products:
        product_id = product.get("id")
        product_title = product.get("title", "")
        vendor = product.get("vendor", "")
        product_type = product.get("type", "") # Corrected from 'product_type' to 'type'
        handle = BASE_URL + product.get("handle", "")
        description = product.get("description", "")

        # SAFELY resolve default image (product level image)
        images = product.get("images") or []
        default_image = images[0] if images else None # Corrected to directly use the URL string

        # Prepare default values
        option_colors = []
        option_sizes = []

        # Options can be dict OR list
        options = product.get("options", {})

        if isinstance(options, dict):
            option_colors = options.get("color", [])
            option_sizes = options.get("size", [])
        elif isinstance(options, list):
            for opt in options:
                if isinstance(opt, dict): # Added check: Ensure 'opt' is a dictionary
                    name = (opt.get("name") or "").lower()
                    if name == "color":
                        option_colors = opt.get("values", [])
                    if name == "size":
                        option_sizes = opt.get("values", [])

        for variant in product.get("variants", []):
            variant_title = variant.get("title") or product_title

            # Replace default title
            if variant_title.lower() == "default title":
                variant_title = product_title

            # Fallback to product image (corrected logic)
            variant_featured_image = variant.get("featured_image") # This will be a string URL or None
            if variant_featured_image: # Check if it's not None or empty string
                image = variant_featured_image
            else:
                image = default_image

            color = variant_title if variant_title in option_colors else None
            size = variant_title if variant_title in option_sizes else None

            # Safely get and convert price
            raw_price = variant.get("price")
            # raw_compare_at_price = variant.get("compare_at_price")

            # Handle price conversion
            try:
                price = float(raw_price) / 100 if raw_price is not None else 0.0
                # compare_at_price = float(raw_compare_at_price) / 100 if raw_compare_at_price is not None else 0.0
            except ValueError:
                price = 0.0 # Default to 0.0 if conversion fails
                # compare_at_price = 0.0 # Default to 0.0 if conversion fails

            rows.append({
                "product_id": product_id,
                "product_title": product_title,
                "vendor": vendor,
                "product_type": product_type,
                "handle": handle,
                "description": description,
                "variant_id": variant.get("id"), # Changed to use 'id' as 'variant_id'
                "variant_title": variant_title,
                "sku": variant.get("sku"),
                "price": price,
                # "compare_at_price": compare_at_price,
                "color": color,
                "size": size,
                "available": variant.get("available"),
                "barcode": variant.get("barcode"),
                "image": image
            })

    return pd.DataFrame(rows)

df = flatten_products(products_data)

# Display as table in Colab (Excel-ready)
df.head(50)

Unnamed: 0,product_id,product_title,vendor,product_type,handle,description,variant_id,variant_title,sku,price,color,size,available,barcode,image
0,8091245379756,EICHLER,Jacques Marie Mage,SUNGLASSES,https://jacquesmariemage.com/products/eichler,"<p>Strong, feminine, and geometric, these limi...",43910952485036,11A-NOIR 11 / JET GRADIENT,J-ESN-QE-S6011A-OS,960.0,11A-NOIR 11 / JET GRADIENT,,True,810160927846.0,"{'id': 41225200566444, 'product_id': 809124537..."
1,8091245379756,EICHLER,Jacques Marie Mage,SUNGLASSES,https://jacquesmariemage.com/products/eichler,"<p>Strong, feminine, and geometric, these limi...",43910952452268,15D-SAMOA / SIENNA CR39,J-ESN-QE-P5015D-OS,960.0,15D-SAMOA / SIENNA CR39,,True,810160927839.0,"{'id': 41225202565292, 'product_id': 809124537..."
2,8091245379756,EICHLER,Jacques Marie Mage,SUNGLASSES,https://jacquesmariemage.com/products/eichler,"<p>Strong, feminine, and geometric, these limi...",43910952517804,15Z-SULFUR / DOVE CR39,J-ESN-QE-S6015Z-OS,960.0,15Z-SULFUR / DOVE CR39,,True,810160927853.0,"{'id': 41225211117740, 'product_id': 809124537..."
3,8091245379756,EICHLER,Jacques Marie Mage,SUNGLASSES,https://jacquesmariemage.com/products/eichler,"<p>Strong, feminine, and geometric, these limi...",43910952550572,14N-CHERRY / LAVENDER CR39,J-ESN-QE-S7014N-OS,960.0,14N-CHERRY / LAVENDER CR39,,True,810160927877.0,"{'id': 41225214361772, 'product_id': 809124537..."
4,8091245346988,PETRUS,Jacques Marie Mage,SUNGLASSES,https://jacquesmariemage.com/products/petrus,,43910952353964,01-NOIR / CERULEAN CR39,J-ESN-PR-P6001-OS,960.0,01-NOIR / CERULEAN CR39,,True,810160926856.0,"{'id': 41194760831148, 'product_id': 809124534..."
5,8091245346988,PETRUS,Jacques Marie Mage,SUNGLASSES,https://jacquesmariemage.com/products/petrus,,43910952321196,13V-BEAU / NIMBUS CR39,J-ESN-PR-P5013V-OS,960.0,13V-BEAU / NIMBUS CR39,,True,810160926849.0,"{'id': 41194768793772, 'product_id': 809124534..."
6,8091245346988,PETRUS,Jacques Marie Mage,SUNGLASSES,https://jacquesmariemage.com/products/petrus,,43910952386732,14Q-BRUT / SPRUCE CR39,J-ESN-PR-T1014Q-OS,960.0,14Q-BRUT / SPRUCE CR39,,True,810160926863.0,"{'id': 41198213922988, 'product_id': 809124534..."
7,8091245346988,PETRUS,Jacques Marie Mage,SUNGLASSES,https://jacquesmariemage.com/products/petrus,,43910952419500,20-ROVER / LIGHT BOTTLE GREEN CR39,J-ESN-PR-T3020-OS,960.0,20-ROVER / LIGHT BOTTLE GREEN CR39,,True,810160926870.0,"{'id': 41201331142828, 'product_id': 809124534..."
8,8431697363116,PETRUS RX,Jacques Marie Mage,OPTICAL,https://jacquesmariemage.com/products/petrus-rx,,45135853682860,01-NOIR / SUPERLIGHT GREY CR39,J-ERX-PR-P6001-OS,960.0,01-NOIR / SUPERLIGHT GREY CR39,,True,,"{'id': 41202187075756, 'product_id': 843169736..."
9,8431697363116,PETRUS RX,Jacques Marie Mage,OPTICAL,https://jacquesmariemage.com/products/petrus-rx,,45135853715628,13V-BEAU / SUPERLIGHT BRONZE CR39,J-ERX-PR-P5013V-OS,960.0,13V-BEAU / SUPERLIGHT BRONZE CR39,,True,,"{'id': 41202195366060, 'product_id': 843169736..."


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1813 entries, 0 to 1812
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   product_id        1813 non-null   int64  
 1   product_title     1813 non-null   object 
 2   vendor            1813 non-null   object 
 3   product_type      1813 non-null   object 
 4   handle            1813 non-null   object 
 5   description       1813 non-null   object 
 6   variant_id        1813 non-null   int64  
 7   variant_title     1813 non-null   object 
 8   sku               1446 non-null   object 
 9   price             1813 non-null   float64
 10  compare_at_price  1813 non-null   float64
 11  color             7 non-null      object 
 12  size              3 non-null      object 
 13  available         1813 non-null   bool   
 14  barcode           1437 non-null   object 
 15  image             1813 non-null   object 
dtypes: bool(1), float64(2), int64(2), object(1

In [None]:
df.to_csv('designerdaydream_data.csv', index=False)
df.to_excel('designerdaydream_data.xlsx', index=False)

In [3]:
import requests
import json
import time

URL = "https://www.mytheresa.com/api"

PAGE_SIZE = 60
SLUGS = [
    "/accessories/sunglasses",
    "/accessories/glasses",
]

session = requests.Session()

HEADERS = {
    "accept": "*/*",
    "accept-language": "en",
    "content-type": "text/plain;charset=UTF-8",
    "origin": "https://www.mytheresa.com",
    "referer": "https://www.mytheresa.com/int/en/women/accessories/sunglasses",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0",

    "x-country": "ID",
    "x-geo": "ID",
    "x-region": "JB",
    "x-store": "INT",
    "x-section": "women",
    "x-op": "ntr",
    "x-nsu": "false",
}

QUERY = """query XProductListingPageQuery($page: Int, $size: Int, $slug: String, $sort: String) {
  xProductListingPage(page: $page, size: $size, slug: $slug, sort: $sort) {
    pagination {
      currentPage
      totalPages
      totalItems
    }
    products {
      name
      slug
      sku
      designer
      department
      hasStock
      isPurchasable
      price {
        currencyCode
        regular
        original
        percentage
      }
    }
  }
}"""


def fetch_page(page, slug):
    payload = {
        "query": QUERY,
        "variables": {
            "page": page,
            "size": PAGE_SIZE,
            "slug": slug,
            "sort": None
        }
    }

    response = session.post(
        URL,
        headers=HEADERS,
        data=json.dumps(payload),
        timeout=30
    )
    response.raise_for_status()
    return response.json()


def main():
    all_products = []

    for slug in SLUGS:
        print(f"\n🔍 Fetching slug: {slug}")
        page = 1

        while True:
            print(f"  → Page {page}")
            data = fetch_page(page, slug)

            if "errors" in data:
                raise Exception(data["errors"])

            listing = data["data"]["xProductListingPage"]
            products = listing["products"]

            if not products:
                print("  ⚠️ No products returned, stopping.")
                break

            for p in products:
                p["_source_slug"] = slug
                p["_page"] = page
                all_products.append(p)

            # Stop condition
            if len(products) < PAGE_SIZE:
                print(f"  ✅ Last page reached ({len(products)} items)")
                break

            page += 1
            time.sleep(1)  # polite delay

    # Save final output
    with open("mytheresa_all_products.json", "w", encoding="utf-8") as f:
        json.dump(all_products, f, indent=2, ensure_ascii=False)

    print(f"\n🎉 Done. Total products collected: {len(all_products)}")


if __name__ == "__main__":
    main()



🔍 Fetching slug: /accessories/sunglasses
  → Page 1
  → Page 2
  → Page 3
  → Page 4
  → Page 5
  → Page 6
  → Page 7
  → Page 8
  → Page 9
  → Page 10
  → Page 11
  → Page 12
  ✅ Last page reached (44 items)

🔍 Fetching slug: /accessories/glasses
  → Page 1
  → Page 2
  ✅ Last page reached (17 items)

🎉 Done. Total products collected: 781
