In [1]:
import requests
from bs4 import BeautifulSoup
import random
import pandas as pd

In [9]:
collection_url = "https://www.sportsfuel.co.nz/collections/protein-powder?sortBy=best_match&page={}"
response = requests.get(collection_url)

In [11]:
response

<Response [200]>

In [15]:
import time

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
all_products = []


for page in range(1, 4): 
    print(f"Scraping page {page}...")
    url = collection_url.format(page)
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        print(f"Failed to fetch page {page}")
        break

    soup = BeautifulSoup(response.text, "html.parser")
    product_cards = soup.find_all("a", class_="Product_product__wb3tB")

    for card in product_cards:
        try:
            
            product_name = card.find("div", class_="Product_product__title__8lGN8").text.strip()
            product_type = card.find("div", class_="Product_product__type__Nk6Ig").text.strip()
        
            # Price
            price = card.find("span", class_="Product_product__price__5Hug4").text.strip()
            compare_price_span = card.find("span", class_="Product_product__compare-at__kgfns")
            compare_price = compare_price_span.text.strip() if compare_price_span else None

            # Flavours
            options = card.find("div", class_="Product_product__options__IsdYk")
            options_text = options.text.strip() if options else None

            all_products.append({
                "name": product_name,
                "type": product_type,
                "price": price,
                "compare_price": compare_price,
                "options": options_text
            })

        except Exception as e:
            print("Error parsing product:", e)
            continue

    time.sleep(random.uniform(2, 5)) 

print(f"\nScraped {len(all_products)} products.\n")

# first 3 products
for product in all_products[:3]:
    print(product)


Scraping page 1...
Scraping page 2...
Scraping page 3...

Scraped 0 products.



#### SportsFuel.co.nz loads data dynamically ie. JavaScript runs in the browser to fill in content after the initial page loads.
#### One of the solutions is to use a headless browser (can render the web pages as they would appear in a normal browser, and execute the JavaScript code.) libraries to work with a headless browser are Selenium or Puppeteer.
#### Selenium mimics the actions of a real user interacting with a website, making it an excellent choice for scraping dynamic pages that rely heavily on JavaScript.

In [17]:
pip install selenium


Note: you may need to restart the kernel to use updated packages.


In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import random

# headless mode means that the browser window wouldn’t be visible.
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)

base_url = "https://www.sportsfuel.co.nz/collections/protein-powder?sort_by=best_match&page={}"
all_products = []


for page in range(1, 4): 
    url = base_url.format(page)
    print(f"Scraping page {page}...")
    driver.get(url)
    # To avoid detection as a bot, used random delays
    time.sleep(random.uniform(3, 5)) 

    soup = BeautifulSoup(driver.page_source, "html.parser")
    product_cards = soup.find_all("a", class_="Product_product__wb3tB")

    for card in product_cards:
        try:
            product_name = card.find("div", class_="Product_product__title__8lGN8").text.strip()
            product_type = card.find("div", class_="Product_product__type__Nk6Ig").text.strip()
            product_url = "https://www.sportsfuel.co.nz" + card["href"]

            price = card.find("span", class_="Product_product__price__5Hug4").text.strip()
            compare_price_span = card.find("span", class_="Product_product__compare-at__kgfns")
            compare_price = compare_price_span.text.strip() if compare_price_span else None

            options_div = card.find("div", class_="Product_product__options__IsdYk")
            options = options_div.text.strip() if options_div else None


            all_products.append({
                "name": product_name,
                "type": product_type,
                "price": price,
                "compare_price": compare_price,
                "options": options,
                "url": product_url
            })

        except Exception as e:
            print("Product not found", e)
            continue

driver.quit()

print(f"\nScraped {len(all_products)} products.\n")
for p in all_products[:3]:
    print(p)


Scraping page 1...
Scraping page 2...
Scraping page 3...

Scraped 120 products.

{'name': 'Optimum Nutrition Gold Standard 100% Whey Protein Powder', 'type': 'Whey Protein', 'price': '$47.95', 'compare_price': '$59.94', 'options': '5 Sizes | 22 Flavours', 'url': 'https://www.sportsfuel.co.nz/products/optimum-gold-standard-100-whey-protein'}
{'name': 'Clean Nutrition Whey Protein Powder 1kg', 'type': 'Whey Protein', 'price': '$49.46', 'compare_price': '$54.95', 'options': '7 Flavours', 'url': 'https://www.sportsfuel.co.nz/products/clean-nutrition-whey-protein-1kg'}
{'name': 'Clean Nutrition Whey Protein Powder 2kg', 'type': 'Whey Protein', 'price': '$98.91', 'compare_price': '$109.90', 'options': '1 Sizes', 'url': 'https://www.sportsfuel.co.nz/products/clean-nutrition-whey-protein-2kg'}


In [80]:
import pandas as pd

df = pd.DataFrame(all_products)

pd.set_option("display.max_rows", None)
df


Unnamed: 0,name,type,price,compare_price,options,url
0,Optimum Nutrition Gold Standard 100% Whey Prot...,Whey Protein,$47.95,$59.94,5 Sizes | 22 Flavours,https://www.sportsfuel.co.nz/products/optimum-...
1,Clean Nutrition Whey Protein Powder 1kg,Whey Protein,$49.46,$54.95,7 Flavours,https://www.sportsfuel.co.nz/products/clean-nu...
2,Clean Nutrition Whey Protein Powder 2kg,Whey Protein,$98.91,$109.90,1 Sizes,https://www.sportsfuel.co.nz/products/clean-nu...
3,Optimum Nutrition Gold Standard 100% Whey Isolate,Whey Protein Isolate,$79.95,$99.94,3 Sizes | 3 Flavours,https://www.sportsfuel.co.nz/products/optimum-...
4,Clean Nutrition Pea Protein Powder 1kg,Plant Protein,$37.71,$41.90,4 Flavours,https://www.sportsfuel.co.nz/products/clean-nu...
5,MuscleTech Nitro Tech 100% Whey Gold Protein P...,Whey Protein,$125.96,$139.95,1 Sizes | 7 Flavours,https://www.sportsfuel.co.nz/products/nitro-te...
6,MuscleTech IsoWhey Protein Powder,Whey Protein Isolate,$74.66,$82.95,2 Sizes | 2 Flavours,https://www.sportsfuel.co.nz/products/musclete...
7,Rule 1 Whey Blend Protein Powder,Whey Protein,$27.97,$39.95,4 Sizes | 11 Flavours,https://www.sportsfuel.co.nz/products/rule-one...
8,Rule 1 Whey Isolate Protein Powder,Whey Protein Isolate,$34.97,$49.95,3 Sizes | 11 Flavours,https://www.sportsfuel.co.nz/products/r1-prote...
9,Musashi Shred & Burn Protein Powder,Whey Protein,$27.19,$33.99,3 Sizes | 2 Flavours,https://www.sportsfuel.co.nz/products/musashi-...


### 1. Price per gram for a product

In [19]:
import pandas as pd
import re

# Extract weight from name
def extract_grams(name):
    name = name.lower()
    match = re.search(r'(\d+\.?\d*)\s*(kg|g|lb|lbs)', name)
    if not match:
        return None
    quantity, unit = match.groups()
    quantity = float(quantity)
    
    if unit == 'kg':
        return quantity * 1000
    elif unit == 'g':
        return quantity
    elif unit in ['lb', 'lbs']:
        return quantity * 453.592
    return None

df['weight_g'] = df['name'].apply(extract_grams)



In [21]:
df

Unnamed: 0,name,type,price,compare_price,options,url,weight_g
0,Optimum Nutrition Gold Standard 100% Whey Prot...,Whey Protein,$47.95,$59.94,5 Sizes | 22 Flavours,https://www.sportsfuel.co.nz/products/optimum-...,
1,Clean Nutrition Whey Protein Powder 1kg,Whey Protein,$49.46,$54.95,7 Flavours,https://www.sportsfuel.co.nz/products/clean-nu...,1000.000
2,Clean Nutrition Whey Protein Powder 2kg,Whey Protein,$98.91,$109.90,1 Sizes,https://www.sportsfuel.co.nz/products/clean-nu...,2000.000
3,Optimum Nutrition Gold Standard 100% Whey Isolate,Whey Protein Isolate,$79.95,$99.94,3 Sizes | 3 Flavours,https://www.sportsfuel.co.nz/products/optimum-...,
4,Clean Nutrition Pea Protein Powder 1kg,Plant Protein,$37.71,$41.90,4 Flavours,https://www.sportsfuel.co.nz/products/clean-nu...,1000.000
...,...,...,...,...,...,...,...
115,Nothing Naughty Collagen Bar,Collagen,$2.70,$3.00,2 Sizes,https://www.sportsfuel.co.nz/products/nothing-...,
116,MuscleTech Mass Tech Elite,Mass Gainer,$65.97,$119.95,2 Sizes | 5 Flavours,https://www.sportsfuel.co.nz/products/musclete...,
117,EHP Labs IsoPept Whey Protein Powder 2lb,Whey Protein Isolate,$67.95,$84.94,4 Flavours,https://www.sportsfuel.co.nz/products/ehp-labs...,907.184
118,EHP Labs Blessed Plant Protein 2lb,Plant Protein,$59.95,$74.94,7 Flavours,https://www.sportsfuel.co.nz/products/ehp-bles...,907.184


In [23]:
df['discounted_price'] = df['price'].replace('[\$,]', '', regex=True).astype(float)

# Calculate price per gram
df['price_per_g'] = df.apply(lambda row: round(row['discounted_price'] / row['weight_g'], 3) if row['weight_g'] else None, axis=1)

# Show result
print(df[['name', 'discounted_price', 'weight_g', 'price_per_g']])


                                                  name  discounted_price  \
0    Optimum Nutrition Gold Standard 100% Whey Prot...             47.95   
1              Clean Nutrition Whey Protein Powder 1kg             49.46   
2              Clean Nutrition Whey Protein Powder 2kg             98.91   
3    Optimum Nutrition Gold Standard 100% Whey Isolate             79.95   
4               Clean Nutrition Pea Protein Powder 1kg             37.71   
..                                                 ...               ...   
115                       Nothing Naughty Collagen Bar              2.70   
116                         MuscleTech Mass Tech Elite             65.97   
117           EHP Labs IsoPept Whey Protein Powder 2lb             67.95   
118                 EHP Labs Blessed Plant Protein 2lb             59.95   
119                  Clean Nutrition Fava Bean Protein             31.46   

     weight_g  price_per_g  
0         NaN          NaN  
1    1000.000        0.049  


  df['discounted_price'] = df['price'].replace('[\$,]', '', regex=True).astype(float)


In [29]:
# Count NaNs in each column
print(df.isna().sum())
df[df['weight_g'].isna()]


name                 0
type                 0
price                0
compare_price        0
options              0
url                  0
weight_g            47
discounted_price     0
price_per_g         47
dtype: int64


Unnamed: 0,name,type,price,compare_price,options,url,weight_g,discounted_price,price_per_g
0,Optimum Nutrition Gold Standard 100% Whey Prot...,Whey Protein,$47.95,$59.94,5 Sizes | 22 Flavours,https://www.sportsfuel.co.nz/products/optimum-...,,47.95,
3,Optimum Nutrition Gold Standard 100% Whey Isolate,Whey Protein Isolate,$79.95,$99.94,3 Sizes | 3 Flavours,https://www.sportsfuel.co.nz/products/optimum-...,,79.95,
6,MuscleTech IsoWhey Protein Powder,Whey Protein Isolate,$74.66,$82.95,2 Sizes | 2 Flavours,https://www.sportsfuel.co.nz/products/musclete...,,74.66,
7,Rule 1 Whey Blend Protein Powder,Whey Protein,$27.97,$39.95,4 Sizes | 11 Flavours,https://www.sportsfuel.co.nz/products/rule-one...,,27.97,
8,Rule 1 Whey Isolate Protein Powder,Whey Protein Isolate,$34.97,$49.95,3 Sizes | 11 Flavours,https://www.sportsfuel.co.nz/products/r1-prote...,,34.97,
9,Musashi Shred & Burn Protein Powder,Whey Protein,$27.19,$33.99,3 Sizes | 2 Flavours,https://www.sportsfuel.co.nz/products/musashi-...,,27.19,
12,Mitchells Nutrition Bone Broth Protein Powder,Beef Protein,$5.94,$6.60,2 Sizes | 5 Flavours,https://www.sportsfuel.co.nz/products/mitchell...,,5.94,
14,PVL ISOGOLD Premium Whey Isolate Protein Powder,Whey Protein Isolate,$152.96,$169.95,1 Sizes | 8 Flavours,https://www.sportsfuel.co.nz/products/pvl-isog...,,152.96,
16,Dymatize ISO 100 Whey Isolate Protein Powder,Whey Protein Isolate,$55.21,$84.94,2 Sizes | 10 Flavours,https://www.sportsfuel.co.nz/products/dymatize...,,55.21,
24,Optimum Nutrition Gold Standard 100% Whey Prot...,Whey Protein,$47.95,$59.94,5 Sizes | 22 Flavours,https://www.sportsfuel.co.nz/products/optimum-...,,47.95,


In [44]:
# removing rows with missing values
df_cleaned = df.dropna()
df_cleaned.head()

Unnamed: 0,name,type,price,compare_price,options,url,weight_g,discounted_price,price_per_g
1,Clean Nutrition Whey Protein Powder 1kg,Whey Protein,$49.46,$54.95,7 Flavours,https://www.sportsfuel.co.nz/products/clean-nu...,1000.0,49.46,0.049
2,Clean Nutrition Whey Protein Powder 2kg,Whey Protein,$98.91,$109.90,1 Sizes,https://www.sportsfuel.co.nz/products/clean-nu...,2000.0,98.91,0.049
4,Clean Nutrition Pea Protein Powder 1kg,Plant Protein,$37.71,$41.90,4 Flavours,https://www.sportsfuel.co.nz/products/clean-nu...,1000.0,37.71,0.038
5,MuscleTech Nitro Tech 100% Whey Gold Protein P...,Whey Protein,$125.96,$139.95,1 Sizes | 7 Flavours,https://www.sportsfuel.co.nz/products/nitro-te...,2267.96,125.96,0.056
10,Clean Nutrition Pea Protein Powder 2kg,Plant Protein,$75.42,$83.80,1 Sizes,https://www.sportsfuel.co.nz/products/clean-nu...,2000.0,75.42,0.038


In [46]:
df_cleaned = df_cleaned.rename(columns={
    'name':'Product Name',
    'type':'Type',
    'price':'Disc_Price',
    'compare_price':'Original Price',
    'options' : 'Options',
    'url': 'URL',
    'weight_g':'Weight_in_grams',
    'price_per_g':'Price_per_gram'
})
#del df_cleaned['discounted_price']
df_cleaned.head()


Unnamed: 0,Product Name,Type,Disc_Price,Original Price,Options,URL,Weight_in_grams,discounted_price,Price_per_gram
1,Clean Nutrition Whey Protein Powder 1kg,Whey Protein,$49.46,$54.95,7 Flavours,https://www.sportsfuel.co.nz/products/clean-nu...,1000.0,49.46,0.049
2,Clean Nutrition Whey Protein Powder 2kg,Whey Protein,$98.91,$109.90,1 Sizes,https://www.sportsfuel.co.nz/products/clean-nu...,2000.0,98.91,0.049
4,Clean Nutrition Pea Protein Powder 1kg,Plant Protein,$37.71,$41.90,4 Flavours,https://www.sportsfuel.co.nz/products/clean-nu...,1000.0,37.71,0.038
5,MuscleTech Nitro Tech 100% Whey Gold Protein P...,Whey Protein,$125.96,$139.95,1 Sizes | 7 Flavours,https://www.sportsfuel.co.nz/products/nitro-te...,2267.96,125.96,0.056
10,Clean Nutrition Pea Protein Powder 2kg,Plant Protein,$75.42,$83.80,1 Sizes,https://www.sportsfuel.co.nz/products/clean-nu...,2000.0,75.42,0.038


In [54]:
# Creating 2 columns- NUmber of Sizes available for a product, Flavours it offers.

# First, remove leading/trailing spaces
df_cleaned['Options'] = df_cleaned['Options'].str.strip()

# Split the column into 'size_options' and 'flavour_options'
df_cleaned['Size Options'] = df_cleaned['Options'].str.extract(r'(\d+)\s+Size[s]?', expand=False).astype(float)
df_cleaned['Flavours'] = df_cleaned['Options'].str.extract(r'(\d+)\s+Flavour[s]?', expand=False).astype(float)

In [72]:
df_cleaned[['Size Options', 'Flavours']] = df_cleaned[['Size Options', 'Flavours']].fillna(0)

# Reset your DataFrame index 
df_cleaned = df_cleaned.reset_index(drop=True) 
df_cleaned.index = df_cleaned.index + 1 
df_cleaned.head()

Unnamed: 0,Product Name,Type,Disc_Price,Original Price,URL,Weight_in_grams,discounted_price,Price_per_gram,Size Options,Flavours
1,Clean Nutrition Whey Protein Powder 1kg,Whey Protein,$49.46,$54.95,https://www.sportsfuel.co.nz/products/clean-nu...,1000.0,49.46,0.049,0.0,7.0
2,Clean Nutrition Whey Protein Powder 2kg,Whey Protein,$98.91,$109.90,https://www.sportsfuel.co.nz/products/clean-nu...,2000.0,98.91,0.049,1.0,0.0
3,Clean Nutrition Pea Protein Powder 1kg,Plant Protein,$37.71,$41.90,https://www.sportsfuel.co.nz/products/clean-nu...,1000.0,37.71,0.038,0.0,4.0
4,MuscleTech Nitro Tech 100% Whey Gold Protein P...,Whey Protein,$125.96,$139.95,https://www.sportsfuel.co.nz/products/nitro-te...,2267.96,125.96,0.056,1.0,7.0
5,Clean Nutrition Pea Protein Powder 2kg,Plant Protein,$75.42,$83.80,https://www.sportsfuel.co.nz/products/clean-nu...,2000.0,75.42,0.038,1.0,0.0


In [74]:
df_cleaned.to_csv("SportsFuel_products.csv", index=False)