In [1]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

In [2]:
baseurl = "https://www.liquorland.com.au"
url = "https://www.liquorland.com.au/offers"


In [None]:

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}

# Create a webdriver instance (I'll use Firefox in this example)
driver = webdriver.Firefox()

# Send a request to the URL and create a BeautifulSoup object
driver.get(url)
soup = BeautifulSoup(driver.page_source, "html.parser")


In [None]:
# Find all product lists
prodlist = soup.find_all("div", "product-list")

# Initialize a set to store unique product links
prodlinks = set()

# Extract product links from each product list
for item in prodlist:
    for link in item.find_all('a', href=True):
        prodlinks.add(baseurl + link['href'])

# Convert the set to a list to make indexing easier
prodlinks = list(prodlinks)



In [4]:
data = []

# Loop through each product link and extract the required information
for link in prodlinks:
    driver.get(link)
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Use error handling to avoid AttributeError for ProductBrand
    try:
        product_brand_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "product-brand"))
        )
        ProductBrand = product_brand_element.text
    except Exception:
        ProductBrand = "N/A"

    # Use error handling to avoid AttributeError for ProductPrice
    try:
        product_price_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "PriceTag.zero-cents.current.primary"))
        )
        ProductPrice = product_price_element.find_element(By.CLASS_NAME, "dollarAmount").text
    except Exception:
        ProductPrice = "N/A"

    # Use error handling to avoid AttributeError for ProductName
    try:
        product_name_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "product-name"))
        )
        ProductName = product_name_element.text
    except Exception:
        ProductName = "N/A"

    GTIN = "N/A"  # Set a default value in case GTIN is not found
    product_properties = soup.find("ul", class_="product-properties two-col")
    if product_properties:
        li_elements = product_properties.find_all("li")
        for li in li_elements:
            key_element = li.find("span", class_="key")
            if key_element and key_element.text.strip() == "GTIN":
                # Use try-except to handle any potential errors in extracting GTIN
                try:
                    GTIN = li.find("span", class_="val").text.strip()
                except AttributeError:
                    GTIN = "N/A"
                break  

    Liquor = {"Brand": ProductBrand, "Title": ProductName, "Price": ProductPrice, "GTIN": GTIN}
    data.append(Liquor) 

    print(Liquor)

driver.quit()

{'Brand': 'Tooheys', 'Title': 'New Block Can 375mL', 'Price': '59', 'GTIN': 'N/A'}
{'Brand': 'Jack Daniels', 'Title': 'Tennessee Whiskey 1 Litre', 'Price': '72', 'GTIN': '082184045367'}
{'Brand': 'XXXX', 'Title': 'Gold Block Can 375mL', 'Price': '56', 'GTIN': 'N/A'}
{'Brand': 'Vodka O', 'Title': '1 Litre', 'Price': '56', 'GTIN': '9329982019888'}
{'Brand': 'Carlton', 'Title': 'Dry Bottle 330mL', 'Price': '48', 'GTIN': '9320000504416'}
{'Brand': 'Botter', 'Title': 'Prosecco DOC 750mL', 'Price': '14', 'GTIN': '8008863057812'}
{'Brand': 'Baileys', 'Title': 'Irish Cream 1 Litre', 'Price': '49', 'GTIN': '5011013100118'}
{'Brand': 'Tooheys', 'Title': 'Extra Dry Block Can 375mL', 'Price': '59', 'GTIN': 'N/A'}
{'Brand': 'Zilzie', 'Title': 'Estate Pinot Grigio 750mL', 'Price': '12', 'GTIN': 'N/A'}
{'Brand': 'Pepperjack', 'Title': 'Shiraz 375mL', 'Price': '13', 'GTIN': 'N/A'}
{'Brand': 'Botter', 'Title': 'Prosecco DOC 750mL', 'Price': '14', 'GTIN': 'N/A'}
{'Brand': 'Johnnie Walker', 'Title': 'Red

{'Brand': 'Tooheys', 'Title': 'Extra Dry Block Can 375mL', 'Price': '59', 'GTIN': '9311890249358'}
{'Brand': 'Whispers', 'Title': 'Sparkling White 750mL', 'Price': '5', 'GTIN': 'N/A'}
{'Brand': 'Jamiesons Run', 'Title': 'Grazier Chardonnay 750mL', 'Price': '6', 'GTIN': '9300770003661'}
[{'Brand': 'Tooheys', 'Title': 'New Block Can 375mL', 'Price': '59', 'GTIN': 'N/A'}, {'Brand': 'Jack Daniels', 'Title': 'Tennessee Whiskey 1 Litre', 'Price': '72', 'GTIN': '082184045367'}, {'Brand': 'XXXX', 'Title': 'Gold Block Can 375mL', 'Price': '56', 'GTIN': 'N/A'}, {'Brand': 'Vodka O', 'Title': '1 Litre', 'Price': '56', 'GTIN': '9329982019888'}, {'Brand': 'Carlton', 'Title': 'Dry Bottle 330mL', 'Price': '48', 'GTIN': '9320000504416'}, {'Brand': 'Botter', 'Title': 'Prosecco DOC 750mL', 'Price': '14', 'GTIN': '8008863057812'}, {'Brand': 'Baileys', 'Title': 'Irish Cream 1 Litre', 'Price': '49', 'GTIN': '5011013100118'}, {'Brand': 'Tooheys', 'Title': 'Extra Dry Block Can 375mL', 'Price': '59', 'GTIN': '

In [18]:
df = pd.DataFrame(data)
df.to_excel("Liquor_data.xlsx",index = False)