### Imports

In [23]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import time

### Woolworths weekly catalogue scraper

In [34]:
# API endpoint to get category list (use "view" endpoint)
url_view = "https://embed.salefinder.com.au/productlist/view/62005/"

# Parameters for the request
params = {
    'locationId': '4679',
    'token': '570f5c4a44505b5f51477f531a03180a0e0b1c1362352b2e21363226253968717d7a787d6468626562612b',
    'saleGroup': '0',
    'rows_per_page': '10',
}

# Headers to make our request look like a real browser
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
    'Accept': '*/*',
}

# Fetch the main catalogue page
response = requests.get(url_view, params=params, headers=headers)
json_text = response.text.strip()[1:-1]
data = json.loads(json_text)

print(f"Fetched: {data['saleName']}")
print(f"Valid: {data['startDate']} to {data['endDate']}\n")

# Parse the HTML to extract categories
soup = BeautifulSoup(data['content'], 'html.parser')
categories = {}

for link in soup.find_all('a', class_='sf-navcategory-link'):
    href = link.get('href', '')
    if 'categoryId=' in href:
        category_id = href.split('categoryId=')[1].split('&')[0]
        category_name = link.text.strip()
        categories[category_id] = category_name

Fetched: Weekly Catalogue NSW
Valid: 2025-11-05T00:00:00 to 2025-11-11T23:59:59



In [35]:
url_category = "https://embed.salefinder.com.au/productlist/category/62005/"

# List to store ALL products from ALL categories
all_products = []

# Loop through each category
for category_id, category_name in categories.items():
    print(f"Scraping {category_name}...", end=" ")
    
    # Parameters for this specific category
    params = {
        'locationId': '4679',
        'token': '570f5c4a44505b5f51477f531a03180a0e0b1c1362352b2e21363226253968717d7a787d6468626562612b',
        'saleGroup': '0',
        'categoryId': category_id,  # Specific category
        'rows_per_page': '500',
    }
    
    # Make the API request
    response = requests.get(url_category, params=params, headers=headers)
    json_text = response.text.strip()[1:-1]
    data = json.loads(json_text)
    
    # Parse the HTML
    soup = BeautifulSoup(data['content'], 'html.parser')
    
    # Extract products
    for product in soup.find_all('div', class_='shelfProductStamp'):
        name_tag = product.find('span', class_='sf-item-heading')
        sale_price_tag = product.find('span', class_='sf-pricedisplay')
        savings_tag = product.find('span', class_='sf-regprice')
        
        if not name_tag or not sale_price_tag:
            continue
        
        name = name_tag.text.strip()
        sale_price = sale_price_tag.text.strip()
        stock_code = product.get('data-stockcode')
        
        sale_value = float(sale_price.replace('$', ''))
        
        if savings_tag:
            savings = savings_tag.text.strip()
            savings_value = float(savings.replace('$', ''))
            original_value = sale_value + savings_value
        else:
            savings_value = 0.0
            original_value = sale_value
        
        all_products.append({
            'category': category_name,
            'stock_code': stock_code,
            'name': name,
            'sale_price': sale_value,
            'original_price': original_value,
            'savings': savings_value
        })
    
    print(f"{len([p for p in all_products if p['category'] == category_name])} products")
    time.sleep(0.3)  # Be nice to the server

# Create DataFrame
df = pd.DataFrame(all_products)

Scraping Baby... 18 products
Scraping Bakery... 22 products
Scraping Baking... 8 products
Scraping Beauty... 36 products
Scraping Biscuits & Snacks... 17 products
Scraping Breakfast Foods... 4 products
Scraping Canned & Packet food... 16 products
Scraping Clothing... 1 products
Scraping Condiments... 3 products
Scraping Confectionery... 21 products
Scraping Cooking, Seasoning & Gravy... 3 products
Scraping Dairy... 18 products
Scraping Deli & Chilled... 32 products
Scraping Desserts... 6 products
Scraping Drinks... 26 products
Scraping Frozen Food... 26 products
Scraping Fruit & Vegetables... 14 products
Scraping Health & Wellbeing... 26 products
Scraping Health Foods... 9 products
Scraping Home & Outdoor... 82 products
Scraping Household Cleaning... 12 products
Scraping International Foods... 2 products
Scraping Jams & Spreads... 3 products
Scraping Meat... 32 products
Scraping Pet care... 16 products
Scraping Seafood... 6 products
Scraping Stationery & Media... 10 products
Scraping T

In [42]:
df = pd.DataFrame(all_products)

df.groupby('category').first().reset_index()

Unnamed: 0,category,stock_code,name,sale_price,original_price,savings
0,Baby,568656.0,Huggies Ultra Dry Nappy Pants Pk 24-36,13.0,22.0,9.0
1,Bakery,320195.0,Mission Mini Wrap Varieties 384g Pk 8,2.75,5.5,2.75
2,Baking,,Christmas Cupcake Kit Pk 12,3.5,3.5,0.0
3,Beauty,1079.0,Rexona Antiperspirant Deodorant 250ml,4.25,8.5,4.25
4,Biscuits & Snacks,756795.0,The Natural Chip Co. Potato Chips 175g,2.5,5.0,2.5
5,Breakfast Foods,202915.0,Kellogg’s Crunchy Nut Corn Flakes 640g or Froo...,5.0,10.0,5.0
6,Canned & Packet food,48883.0,Fantastic Noodles Bowl 85g,1.0,2.5,1.5
7,Clothing,109958.0,Bonds Zippy Wondersuit Assorted Pk 1,15.6,26.0,10.4
8,Condiments,618612.0,Leggo’s Fresh Pasta Sauce Varieties 450g – Fro...,5.0,7.5,2.5
9,Confectionery,813898.0,Cadbury Old Gold 175-180g,4.0,8.0,4.0


### Australia food guidelines