In [1]:
#!pip install selenium webdriver-manager
import time
import re
from pprint import pprint
from urllib.parse import urlparse
from bs4 import BeautifulSoup
import pandas as pd

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
import time
import re
from pprint import pprint
from urllib.parse import urlparse
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager


# ---------- Selenium driver setup ----------

def make_driver(headless: bool = True) -> webdriver.Chrome:
    options = Options()

    if headless:
        options.add_argument("--headless=new")

    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--window-size=1280,800")
    options.add_argument(
        "user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    )

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=options)
    return driver


# ---------- Helpers for slug-based parsing ----------

def parse_brand_desc_size_from_slug(url: str):
    """
    Parse brand, description, size from Sheng Siong product URL slug.
    Example slug: 'ayam-brand-pulled-chicken-mayonnaise-150-g'
      -> brand='Ayam Brand'
      -> size='150 g'
      -> description='Pulled Chicken - Mayonnaise'
    """
    path = urlparse(url).path  # e.g. '/product/ayam-brand-pulled-chicken-mayonnaise-150-g'
    slug = path.rsplit("/", 1)[-1]  # 'ayam-brand-pulled-chicken-mayonnaise-150-g'
    slug = slug.strip("/")

    tokens = [t for t in slug.split("-") if t]  # ['ayam','brand','pulled','chicken','mayonnaise','150','g']

    if len(tokens) < 3:
        return None, None, None  # not enough info

    # --- detect size from the end (number + unit) ---
    size = None
    size_units = {"ml", "l", "g", "kg", "pcs", "pc", "s", "pack"}

    # look for pattern: number + unit at the tail
    size_start_idx = None
    for i in range(len(tokens) - 2, -1, -1):
        if re.fullmatch(r"\d+(\.\d+)?", tokens[i]):  # number
            if i + 1 < len(tokens) and tokens[i + 1].lower() in size_units:
                size_start_idx = i
                break

    core_tokens = tokens[:]
    if size_start_idx is not None:
        num = tokens[size_start_idx]
        unit = tokens[size_start_idx + 1]
        size = f"{num} {unit}"
        # remove them from core tokens
        core_tokens = tokens[:size_start_idx]

    # If we didn't find a size, core_tokens stays as full tokens

    # --- brand = first 2 tokens (simple & works for 'Ayam Brand') ---
    if len(core_tokens) >= 2:
        brand_tokens = core_tokens[:2]
        desc_tokens = core_tokens[2:]
    else:
        brand_tokens = core_tokens[:1]
        desc_tokens = core_tokens[1:]

    brand = " ".join(t.capitalize() for t in brand_tokens) if brand_tokens else None

    # --- description = remaining tokens, with " - " before last word ---
    description = None
    if desc_tokens:
        if len(desc_tokens) == 1:
            description = desc_tokens[0].capitalize()
        else:
            head = " ".join(t.capitalize() for t in desc_tokens[:-1])
            tail = desc_tokens[-1].capitalize()
            description = f"{head} - {tail}"

    return brand, description, size


def extract_section(info_text: str, header_keywords: list[str]) -> str | None:
    """
    Extract a text block under a heading that contains any of header_keywords.
    Generic: may or may not find something depending on the page.
    """
    lines = [ln.strip() for ln in info_text.splitlines()]
    section_lines: list[str] = []
    in_section = False

    for line in lines:
        if not in_section:
            if any(h.lower() in line.lower() for h in header_keywords):
                in_section = True
            continue

        if any(h.lower() in line.lower() for h in ["description", "key information", "key info", "ingredients", "nutrition"]):
            break

        if not line:
            continue

        cleaned = line.lstrip("•*- ").strip()
        if cleaned:
            section_lines.append(cleaned)

    if not section_lines:
        return None

    return " ".join(section_lines)


# ---------- Main Sheng Siong fetcher (Selenium) ----------

def fetch_shengsiong_product_selenium(driver, url: str) -> dict | None:
    """
    Fetch product info from a Sheng Siong product page using Selenium.

    Returns dict with fields mirroring FairPrice:
        - url
        - name
        - brand
        - size
        - current_price
        - original_price
        - promo_text
        - rating
        - description
        - key_information
        - ingredients
    """
    print(f"\n[DEBUG] Selenium requesting: {url}")
    driver.get(url)
    time.sleep(3)  # wait for JS

    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    full_text = soup.get_text("\n", strip=True)

    # --- Name from <h1> (full visual title) ---
    name_el = soup.select_one("h1")
    name = name_el.get_text(strip=True) if name_el else None
    print(f"[DEBUG] Parsed name (from <h1>): {name!r}")

    # --- Brand / Description / Size from URL slug ---
    brand, description_from_slug, size = parse_brand_desc_size_from_slug(url)
    print(f"[DEBUG] brand from slug: {brand!r}")
    print(f"[DEBUG] desc from slug: {description_from_slug!r}")
    print(f"[DEBUG] size from slug: {size!r}")

    # For description we prefer the slug-derived one (Pulled Chicken - Mayonnaise)
    description = description_from_slug or name

    # --- Price (current) ---
    current_price = None

    # Try elements whose class name contains 'price'
    price_candidates = soup.select("[class*='price']")
    for el in price_candidates:
        txt = el.get_text(" ", strip=True)
        m = re.search(r"S\$ ?(\d+(?:\.\d{1,2})?)", txt)
        if not m:
            m = re.search(r"\$ ?(\d+(?:\.\d{1,2})?)", txt)
        if m:
            current_price = float(m.group(1))
            print(f"[DEBUG] Price from price-class element: {current_price}")
            break

    # Fallback: scan full visible text
    if current_price is None:
        m = re.search(r"S\$ ?(\d+(?:\.\d{1,2})?)", full_text)
        if not m:
            m = re.search(r"\$ ?(\d+(?:\.\d{1,2})?)", full_text)
        if m:
            current_price = float(m.group(1))
            print(f"[DEBUG] Price from regex fallback: {current_price}")
        else:
            print("[DEBUG] Could not find any price in rendered text")

    # --- Original price (U.P. / Usual price) ---
    original_price = None
    up_match = re.search(r"(U\.?P\.?|Usual price)[^0-9]*?(\d+(?:\.\d{1,2})?)", full_text, re.IGNORECASE)
    if up_match:
        try:
            original_price = float(up_match.group(2))
            if current_price is not None and abs(original_price - current_price) < 1e-6:
                original_price = None
        except ValueError:
            original_price = None

    # --- Rating (very unlikely) ---
    rating = None
    for line in full_text.splitlines():
        if "rating" in line.lower():
            m = re.search(r"(\d+(\.\d)?)", line)
            if m:
                try:
                    rating = float(m.group(1))
                except ValueError:
                    rating = None
            break

    # --- Promo text ---
    promo_text = None
    promo_pattern = re.compile(
        r"(Save\s*\$\d+(\.\d{1,2})?)|"
        r"(\bBuy\s*\d+.*for\s*\$\d+)|"
        r"(\bAny\s*\d+.*for\s*\$\d+)|"
        r"(\d{1,2}%\s*off)",
        re.IGNORECASE,
    )

    for el in soup.select("div, span, p"):
        txt = el.get_text(" ", strip=True)
        if promo_pattern.search(txt):
            promo_text = txt
            break

    if promo_text is None:
        if current_price is not None:
            promo_text = f"Regular price ${current_price:.2f} (no promotion)"
        else:
            promo_text = "No promotion information found"

    # --- Text sections (best-effort) ---
    key_information = extract_section(full_text, ["Key Information", "Key info"])
    ingredients = extract_section(full_text, ["Ingredients"])

    # If slug gave nothing, try generic description extraction
    if description is None:
        description = extract_section(full_text, ["Description"])

    return {
        "url": url,
        "name": name,
        "brand": brand,
        "size": size,
        "current_price": current_price,
        "original_price": original_price,
        "promo_text": promo_text,
        "rating": rating,
        "description": description,
        "key_information": key_information,
        "ingredients": ingredients,
    }


# ---------- Crawler wrapper ----------

def xxxxcrawl_shengsiong_products_selenium(urls: list[str]) -> list[dict]:
    driver = make_driver(headless=True)
    results = []
    try:
        for url in urls:
            info = fetch_shengsiong_product_selenium(driver, url)
            results.append(info)
            time.sleep(2)  # polite delay
    finally:
        driver.quit()
    return results


# ---------- Example test run ----------

if __name__ == "__main__":
    SHENGSIONG_URLS = [
    "https://shengsiong.com.sg/product/royal-umbrella-thai-mixed-rice-2-kg",
    #"https://shengsiong.com.sg/product/allswell-starfruit-replenish-1-l",
    #"https://shengsiong.com.sg/product/premier-2ply-deluxe-kitchen-towel-6-x-60-pcs"
    #"https://shengsiong.com.sg/product/allswell-starfruit-replenish-1-l",
    #"https://shengsiong.com.sg/product/cheong-chan-thick-caramel-sauce-740-ml",
    # 1. Ayam Brand Pulled Chicken – Mayonnaise 150g
    #"https://shengsiong.com.sg/product/ayam-brand-pulled-chicken-mayonnaise-150-g",
    # 2. Ayam Brand Baked Beans – Tomato Sauce 230g
    #"https://shengsiong.com.sg/product/ayam-brand-baked-beans-tomato-sauce-230-g",
    # 3. UFC Refresh 100% Natural Coconut Water 1L
    #"https://shengsiong.com.sg/product/ufc-refresh-100-natural-coconut-water-1-l",
    # 4. Marigold HL Milk – Plain 946ml
    #"https://shengsiong.com.sg/product/marigold-hl-milk-plain-946-ml",
    # 5. Greenfields Fresh Milk – Regular 1.89L
    #"https://shengsiong.com.sg/product/greenfields-fresh-milk-regular-1-89-l",
    # 6. Dove Body Wash – Beauty Nourishing 1L
    #"https://shengsiong.com.sg/product/dove-body-wash-beauty-nourishing-1-l",
    # 7. Lifebuoy Antibacterial Body Wash – Cool Fresh ~950ml
    #"https://shengsiong.com.sg/product/lifebuoy-antibacterial-body-wash-cool-fresh-950-ml",
    # 8. Head & Shoulders Anti-Dandruff Shampoo – Smooth & Silky 650ml
    #"https://shengsiong.com.sg/product/head-shoulders-anti-dandruff-shampoo-smooth-silky-650-ml",
    # 9. Sunsilk Hair Shampoo – Smooth & Manageable 650ml
    #"https://shengsiong.com.sg/product/sunsilk-hair-shampoo-smooth-manageable-650-ml",
    # 10. Dynamo Power Gel Laundry Detergent – Regular 2.7kg
    #"https://shengsiong.com.sg/product/dynamo-power-gel-laundry-detergent-regular-2-7-kg",
    ]

    data = crawl_shengsiong_products_selenium(SHENGSIONG_URLS)
    print()
    for x in data:
        del x['promo_text']
        #pprint(x)
    #del data['promo_text']
    print("\n=== Final parsed data ===")


[DEBUG] Selenium requesting: https://shengsiong.com.sg/product/royal-umbrella-thai-mixed-rice-2-kg
[DEBUG] Parsed name (from <h1>): None
[DEBUG] brand from slug: 'Royal Umbrella'
[DEBUG] desc from slug: 'Thai Mixed - Rice'
[DEBUG] size from slug: '2 kg'
[DEBUG] Price from price-class element: 6.55


=== Final parsed data ===


In [4]:
columns = ['description', 'brand',	'size',	'current_price', 'original_price',	'rating', 'url']

df = pd.DataFrame(data)
df[columns]

Unnamed: 0,description,brand,size,current_price,original_price,rating,url
0,Thai Mixed - Rice,Royal Umbrella,2 kg,6.55,2.0,,https://shengsiong.com.sg/product/royal-umbrel...


In [11]:
def fetch_shengsiong_product_selenium(driver, url: str) -> dict | None:

    """
    Fetch product info from a Sheng Siong product page using Selenium.

    Returns dict with fields mirroring FairPrice:
        - url
        - name
        - brand
        - size
        - current_price
        - original_price
        - promo_text
        - rating
        - description
        - key_information
        - ingredients
    """
    print(f"\n[DEBUG] Selenium requesting: {url}")
    driver.get(url)
    time.sleep(3)  # wait for JS

    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    full_text = soup.get_text("\n", strip=True)

    # --- Name from <h1> (full visual title) ---
    name_el = soup.select_one("h1")
    name = name_el.get_text(strip=True) if name_el else None
    print(f"[DEBUG] Parsed name (from <h1>): {name!r}")

    # --- Brand / Description / Size from URL slug ---
    brand, description_from_slug, size = parse_brand_desc_size_from_slug(url)
    print(f"[DEBUG] brand from slug: {brand!r}")
    print(f"[DEBUG] desc from slug: {description_from_slug!r}")
    print(f"[DEBUG] size from slug: {size!r}")

    description = description_from_slug or name

    # ---------- Price (current + original) ----------
    current_price = None
    original_price = None

    # Define price regex pattern
    price_pattern = re.compile(r'(?:S?\$|S\$\s*)(\d+\.\d{1,2}|\d+)')

    # FIRST: Look for the FIRST strikethrough price (most likely the main product's original price)
    strike_price_elements = soup.find_all(['s', 'del', 'strike'])
    
    if strike_price_elements:
        # Take only the FIRST strikethrough element
        first_strike_el = strike_price_elements[0]
        txt = first_strike_el.get_text(" ", strip=True)
        m = price_pattern.search(txt)
        if m:
            try:
                strike_price = float(m.group(1))
                # Only accept reasonable prices
                if 0.1 <= strike_price <= 200:
                    original_price = strike_price
                    print(f"[DEBUG] First strikethrough price (original): ${original_price:.2f}")
                else:
                    print(f"[DEBUG] First strikethrough price out of range: ${strike_price:.2f}")
            except ValueError:
                print(f"[DEBUG] Could not parse first strikethrough price: {txt}")
        
        # Log any additional strikethrough prices for debugging
        if len(strike_price_elements) > 1:
            print(f"[DEBUG] Found {len(strike_price_elements)} strikethrough elements total")
            for i, el in enumerate(strike_price_elements[1:], 2):
                txt = el.get_text(" ", strip=True)
                m = price_pattern.search(txt)
                if m:
                    try:
                        extra_price = float(m.group(1))
                        #print(f"[DEBUG]   Striketrhough #{i}: ${extra_price:.2f} - '{txt[:50]}...'")
                    except ValueError:
                        pass

    # SECOND: Look for current price (non-strikethrough)
    # Try to find prices in the main product area first
    product_container = soup.find(['div', 'section'], class_=re.compile(r'product|detail|main', re.I))
    
    if product_container:
        # Look for price elements in product container that are NOT strikethrough
        price_elements = product_container.find_all(['div', 'span', 'p'], 
                                                   class_=re.compile(r'price|Price|PRICE'))
        for el in price_elements:
            if el.name not in ['s', 'del', 'strike'] and not el.find_parents(['s', 'del', 'strike']):
                txt = el.get_text(" ", strip=True)
                m = price_pattern.search(txt)
                if m:
                    try:
                        price_val = float(m.group(1))
                        if 0.1 <= price_val <= 200:
                            current_price = price_val
                            print(f"[DEBUG] Current price from product area: ${current_price:.2f}")
                            break
                    except ValueError:
                        pass
    
    # Fallback: look for any non-strikethrough price
    if current_price is None:
        price_candidates = soup.select("[class*='price']:not(s):not(del):not(strike)")
        for el in price_candidates:
            txt = el.get_text(" ", strip=True)
            m = price_pattern.search(txt)
            if m:
                try:
                    price_val = float(m.group(1))
                    if 0.1 <= price_val <= 200:
                        current_price = price_val
                        print(f"[DEBUG] Current price from any price-class: ${current_price:.2f}")
                        break
                except ValueError:
                    pass
    
    # Fallback: scan text around product name
    if current_price is None and name:
        # Find the text around the product name
        search_idx = full_text.lower().find(name.lower())
        if search_idx != -1:
            start = max(0, search_idx - 200)
            end = min(len(full_text), search_idx + len(name) + 200)
            context = full_text[start:end]
            
            m = price_pattern.search(context)
            if m:
                try:
                    price_val = float(m.group(1))
                    if 0.1 <= price_val <= 200:
                        current_price = price_val
                        print(f"[DEBUG] Current price near product name: ${current_price:.2f}")
                except ValueError:
                    pass
    
    # Final fallback: find first price in full text
    if current_price is None:
        m = price_pattern.search(full_text)
        if m:
            try:
                price_val = float(m.group(1))
                if 0.1 <= price_val <= 200:
                    current_price = price_val
                    print(f"[DEBUG] Current price from full text: ${current_price:.2f}")
            except ValueError:
                pass
    
    # THIRD: If we have original_price but no current_price, or vice versa, try to find the other
    if original_price and not current_price:
        # Look for a current price that's lower than the original
        all_prices = []
        text_elements = soup.find_all(['div', 'span', 'p'])
        for el in text_elements:
            txt = el.get_text(" ", strip=True)
            matches = price_pattern.findall(txt)
            for val in matches:
                try:
                    price_val = float(val)
                    if 0.1 <= price_val <= 200 and price_val not in all_prices:
                        all_prices.append(price_val)
                except ValueError:
                    pass
        
        if all_prices:
            # Filter for prices lower than original
            lower_prices = [p for p in all_prices if p < original_price - 0.001]
            if lower_prices:
                current_price = max(lower_prices)  # Take the highest lower price
                print(f"[DEBUG] Found current price lower than original: ${current_price:.2f}")
    
    elif current_price and not original_price:
        # Look for strikethrough prices we might have missed
        # Or look for "Was $X.XX" pattern
        was_pattern = re.compile(r'(?:was|u\.?p\.?)\s*[Ss]?\$?\s*(\d+\.\d{2})', re.IGNORECASE)
        was_match = was_pattern.search(full_text)
        if was_match:
            try:
                was_price = float(was_match.group(1))
                if was_price > current_price + 0.001:
                    original_price = was_price
                    print(f"[DEBUG] Found original price from Was/U.P.: ${original_price:.2f}")
            except ValueError:
                pass
    
    # FOURTH: Sanity check - original should be higher than current
    if original_price and current_price:
        if original_price <= current_price:
            print(f"[DEBUG] Original price ${original_price:.2f} <= current ${current_price:.2f}, ignoring original")
            original_price = None
        elif original_price > current_price * 3:
            print(f"[DEBUG] Original price ${original_price:.2f} > 3x current ${current_price:.2f}, might be wrong")
            # Could be a bulk price, but let's keep it with a warning
    
    print(f"[DEBUG] Final prices -> current=${current_price}, original=${original_price}")

    # ---------- Rating (very unlikely) ----------
    rating = None
    for line in full_text.splitlines():
        if "rating" in line.lower():
            m = re.search(r"(\d+(\.\d)?)", line)
            if m:
                try:
                    rating = float(m.group(1))
                except ValueError:
                    rating = None
            break

    # ---------- Promo text ----------
    promo_text = None
    promo_pattern = re.compile(
        r"(Save\s*\$\d+(\.\d{1,2})?)|"
        r"(\bBuy\s*\d+.*for\s*\$\d+)|"
        r"(\bAny\s*\d+.*for\s*\$\d+)|"
        r"(\d{1,2}%\s*off)|"
        r"(\bDiscounted\b|\bOffer\b|\bPromotion\b)",
        re.IGNORECASE,
    )

    for el in soup.select("div, span, p"):
        txt = el.get_text(" ", strip=True)
        if promo_pattern.search(txt):
            promo_text = txt
            break

    # Check if there's a discount (original_price > current_price)
    if original_price and current_price and original_price > current_price:
        discount_percent = round((1 - current_price / original_price) * 100)
        if not promo_text:
            promo_text = f"{discount_percent}% off - Was ${original_price:.2f}, Now ${current_price:.2f}"
        else:
            promo_text = f"{promo_text} ({discount_percent}% off)"
    elif not promo_text:
        if current_price is not None:
            promo_text = f"Regular price ${current_price:.2f}"
        else:
            promo_text = "No promotion information found"

    # ---------- Text sections (best-effort) ----------
    key_information = extract_section(full_text, ["Key Information", "Key info"])
    ingredients = extract_section(full_text, ["Ingredients"])

    if description is None:
        description = extract_section(full_text, ["Description"])

    return {
        "url": url,
        "name": name,
        "brand": brand,
        "size": size,
        "current_price": current_price,
        "original_price": original_price,
        "promo_text": promo_text,
        "rating": rating,
        "description": description,
        "key_information": key_information,
        "ingredients": ingredients,
    }

In [12]:
def crawl_shengsiong_products_selenium(urls: list[str]) -> list[dict]:
    driver = make_driver(headless=True)
    results = []
    try:
        for url in urls:
            info = fetch_shengsiong_product_selenium(driver, url)
            results.append(info)
            time.sleep(2)  # polite delay
    finally:
        driver.quit()
    return results

In [13]:
if __name__ == "__main__":
    SHENGSIONG_URLS = [
    "https://shengsiong.com.sg/product/royal-umbrella-thai-mixed-rice-2-kg",
    "https://shengsiong.com.sg/product/abc-extra-stout-500-ml",
    "https://shengsiong.com.sg/product/clear-men-3in1-shampoo-bodywash-active-cool-618-ml"

    ]

    store = []
    data = crawl_shengsiong_products_selenium(SHENGSIONG_URLS)
    print()
    for x in data:
        del x['promo_text']
        #pprint(x)
    #del data['promo_text']
    store.append(x)
    print("\n=== Final parsed data ===")


[DEBUG] Selenium requesting: https://shengsiong.com.sg/product/royal-umbrella-thai-mixed-rice-2-kg
[DEBUG] Parsed name (from <h1>): None
[DEBUG] brand from slug: 'Royal Umbrella'
[DEBUG] desc from slug: 'Thai Mixed - Rice'
[DEBUG] size from slug: '2 kg'
[DEBUG] First strikethrough price (original): $8.34
[DEBUG] Found 42 strikethrough elements total
[DEBUG] Current price from product area: $6.55
[DEBUG] Final prices -> current=$6.55, original=$8.34

[DEBUG] Selenium requesting: https://shengsiong.com.sg/product/abc-extra-stout-500-ml
[DEBUG] Parsed name (from <h1>): None
[DEBUG] brand from slug: 'Abc Extra'
[DEBUG] desc from slug: 'Stout'
[DEBUG] size from slug: '500 ml'
[DEBUG] First strikethrough price (original): $4.13
[DEBUG] Found 27 strikethrough elements total
[DEBUG] Current price from product area: $5.95
[DEBUG] Original price $4.13 <= current $5.95, ignoring original
[DEBUG] Final prices -> current=$5.95, original=$None

[DEBUG] Selenium requesting: https://shengsiong.com.sg

In [14]:
data

[{'url': 'https://shengsiong.com.sg/product/royal-umbrella-thai-mixed-rice-2-kg',
  'name': None,
  'brand': 'Royal Umbrella',
  'size': '2 kg',
  'current_price': 6.55,
  'original_price': 8.34,
  'rating': None,
  'description': 'Thai Mixed - Rice',
  'key_information': None,
  'ingredients': None},
 {'url': 'https://shengsiong.com.sg/product/abc-extra-stout-500-ml',
  'name': None,
  'brand': 'Abc Extra',
  'size': '500 ml',
  'current_price': 5.95,
  'original_price': None,
  'rating': None,
  'description': 'Stout',
  'key_information': None,
  'ingredients': None},
 {'url': 'https://shengsiong.com.sg/product/clear-men-3in1-shampoo-bodywash-active-cool-618-ml',
  'name': None,
  'brand': 'Clear Men',
  'size': '618 ml',
  'current_price': 9.75,
  'original_price': 16.5,
  'rating': None,
  'description': '3in1 Shampoo Bodywash Active - Cool',
  'key_information': None,
  'ingredients': None}]

In [15]:
import numpy as np

store = data
for item in store:
    pprint(item)

    if " | NTUC FairPrice" in item['description']:
        print(item['description'])
        desc = item['description'].split(" | ")[0]
        item['description'] = desc

    if " - " in item['brand']:
        print(item['brand'])
        desc = item['brand'].split(" - ")[0]
        item['brand'] = desc

    if item['original_price']==None:
        item['original_price'] = item['current_price']
    
    print("-"*110)
    print()

{'brand': 'Royal Umbrella',
 'current_price': 6.55,
 'description': 'Thai Mixed - Rice',
 'ingredients': None,
 'key_information': None,
 'name': None,
 'original_price': 8.34,
 'rating': None,
 'size': '2 kg',
 'url': 'https://shengsiong.com.sg/product/royal-umbrella-thai-mixed-rice-2-kg'}
--------------------------------------------------------------------------------------------------------------

{'brand': 'Abc Extra',
 'current_price': 5.95,
 'description': 'Stout',
 'ingredients': None,
 'key_information': None,
 'name': None,
 'original_price': None,
 'rating': None,
 'size': '500 ml',
 'url': 'https://shengsiong.com.sg/product/abc-extra-stout-500-ml'}
--------------------------------------------------------------------------------------------------------------

{'brand': 'Clear Men',
 'current_price': 9.75,
 'description': '3in1 Shampoo Bodywash Active - Cool',
 'ingredients': None,
 'key_information': None,
 'name': None,
 'original_price': 16.5,
 'rating': None,
 'size': '618

In [16]:
columns = ['description', 'brand',	'size',	'current_price', 'original_price',	'rating', 'url']

df = pd.DataFrame(data)
df[columns]

Unnamed: 0,description,brand,size,current_price,original_price,rating,url
0,Thai Mixed - Rice,Royal Umbrella,2 kg,6.55,8.34,,https://shengsiong.com.sg/product/royal-umbrel...
1,Stout,Abc Extra,500 ml,5.95,5.95,,https://shengsiong.com.sg/product/abc-extra-st...
2,3in1 Shampoo Bodywash Active - Cool,Clear Men,618 ml,9.75,16.5,,https://shengsiong.com.sg/product/clear-men-3i...


In [17]:
store

[{'url': 'https://shengsiong.com.sg/product/royal-umbrella-thai-mixed-rice-2-kg',
  'name': None,
  'brand': 'Royal Umbrella',
  'size': '2 kg',
  'current_price': 6.55,
  'original_price': 8.34,
  'rating': None,
  'description': 'Thai Mixed - Rice',
  'key_information': None,
  'ingredients': None},
 {'url': 'https://shengsiong.com.sg/product/abc-extra-stout-500-ml',
  'name': None,
  'brand': 'Abc Extra',
  'size': '500 ml',
  'current_price': 5.95,
  'original_price': 5.95,
  'rating': None,
  'description': 'Stout',
  'key_information': None,
  'ingredients': None},
 {'url': 'https://shengsiong.com.sg/product/clear-men-3in1-shampoo-bodywash-active-cool-618-ml',
  'name': None,
  'brand': 'Clear Men',
  'size': '618 ml',
  'current_price': 9.75,
  'original_price': 16.5,
  'rating': None,
  'description': '3in1 Shampoo Bodywash Active - Cool',
  'key_information': None,
  'ingredients': None}]