In [4]:
# Import dependencies
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import os
import time
import re
from bs4 import BeautifulSoup
import pandas as pd
import requests
from urllib.parse import quote
import logging
import sys
from dotenv import load_dotenv
from apify_client import ApifyClient

In [5]:
# --- 1. SET UP LOGGING (The Eyes of the Engineer) ---
# This creates a file called 'pipeline.log' that saves the history of your script.
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("pipeline.log"), # Save to file
        logging.StreamHandler(sys.stdout)    # Print to screen
    ]
)

# Load secrets
relative_path = "../../../../.env"
load_dotenv(dotenv_path=relative_path)

# Retrieve configuration
WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL")

# Safety check! Never run a pipeline with missing config.
if not WEBHOOK_URL:
    logging.error("CRITICAL: Webhook URL not found. Check your .env file.")
    exit(1) # Stop the program immediately

# Initialize Chrome WebDriver
browser = webdriver.Chrome(
    service=Service(ChromeDriverManager().install())
)

2026-02-07 02:49:38,147 - ERROR - CRITICAL: Webhook URL not found. Check your .env file.
2026-02-07 02:49:38,166 - INFO - Get LATEST chromedriver version for google-chrome
2026-02-07 02:49:38,287 - INFO - About to download new driver from https://chromedriver.storage.googleapis.com/114.0.5735.90/chromedriver_linux64.zip
2026-02-07 02:49:38,393 - INFO - Driver downloading response is 200
2026-02-07 02:49:38,622 - INFO - Get LATEST chromedriver version for google-chrome
2026-02-07 02:49:38,846 - INFO - Get LATEST chromedriver version for google-chrome
2026-02-07 02:49:38,967 - INFO - Driver has been saved in cache [/root/.wdm/drivers/chromedriver/linux64/114.0.5735.90]


WebDriverException: Message: Service /root/.wdm/drivers/chromedriver/linux64/114.0.5735.90/chromedriver unexpectedly exited. Status code was: 127


In [23]:
# Setup search parameters
city = 'taguig'
radius = '10' # in KM
product = 'arctis nova pro'
days_listed = 1
keywords = [
    'Karting',
    'Kart rim',
    'Kart frame',
    'Kart wheel hub'
]

In [4]:
# Set up base URL
url = f"https://www.facebook.com/marketplace/{quote(city)}/search?sortBy=best_match&query={quote(product)}&exact=false&radius_in_km={quote(radius)}"

browser.get(url)
browser.maximize_window()

In [5]:
# Find the close button on the login pop-up
try:
    close_button = browser.find_element(By.XPATH, '//div[@aria-label="Close" and @role="button"]')
    close_button.click()
    print("Close button clicked!")
except:
    print("Could not find or click the close button!")
    pass

Close button clicked!


In [47]:
"""
# SKIP THIS STEP FOR NOW!
# Scroll down the page to load all results
try:
    # Get the initial scroll position
    last_height = browser.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to the bottom of the page using Javascript
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Get the new scroll position
        new_height = browser.execute_script("return document.body.scrollHeight")

        # Check if we've reached the bottom
        if new_height == last_height:
            break

        last_height = new_height
        print("scrolled")
except Exception as e:
    print(f"An error occurred: {e}")

# Problem encountered: It won't let you scroll further below without logging in
# Possible Solution: Log-in using your own personal fb
#   Upon checking, we'll need to do a cookie injector to log-in because logging-in without history or cache
#   will ban the account used.
# Without scrolling, I think I can get the Top 10 if I maximize window.
"""

'\n# SKIP THIS STEP FOR NOW!\n# Scroll down the page to load all results\ntry:\n    # Get the initial scroll position\n    last_height = browser.execute_script("return document.body.scrollHeight")\n\n    while True:\n        # Scroll down to the bottom of the page using Javascript\n        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")\n\n        # Get the new scroll position\n        new_height = browser.execute_script("return document.body.scrollHeight")\n\n        # Check if we\'ve reached the bottom\n        if new_height == last_height:\n            break\n\n        last_height = new_height\n        print("scrolled")\nexcept Exception as e:\n    print(f"An error occurred: {e}")\n\n# Problem encountered: It won\'t let you scroll further below without logging in\n# Possible Solution: Log-in using your own personal fb\n#   Upon checking, we\'ll need to do a cookie injector to log-in because logging-in without history or cache\n#   will ban the account used.

In [6]:
# Retrieve the HTML
html = browser.page_source

# Use BeautifulSoup to parse the HTML
soup = BeautifulSoup(html, 'html.parser')

# Close the browser
browser.close()

In [None]:
"""
# Saving the HTML to avoid repeating the bot-like behaviors in fb
# Parameters used: {
#   'city': 'taguig',
#   'product': 'arctis nova pro',
#   'radius': 10
# }
location = 'D:\Documents\Programming\Python\Marketplace to Discord'
with open(location + '\sample.html', "w", encoding='utf-8') as html_file:
    html_file.write(html)

# Run the code below to replace the whole process above (Start up to loading the HTML file)
file_location = 'D:\Documents\Programming\Python\Marketplace to Discord\sample.html'
with open(file_location, 'r', encoding='utf-8') as f:
    html_loaded = f.read()

# Load the html file into Beautiful Soup
soup_loaded = BeautifulSoup(html_loaded, 'html.parser')
"""

  location = 'D:\Documents\Programming\Python\Marketplace to Discord'
  with open(location + '\sample.html', "w", encoding='utf-8') as html_file:


In [25]:
# Find all link elements
links = soup.find_all('a')

# Only keep items where the text matches our search term
product_links = [link for link in links if product.lower() in link.text.lower()]

# Create empty list to store product data
product_data = []

# Store the items url and text into a list of dictionaries
for product_link in product_links:
    url = product_link.get('href')
    text = '\n'.join(product_link.stripped_strings)
    product_data.append({'text': text, 'url': url})

In [26]:
product_data

[{'text': 'PHP10,000\nPHP14,000\nOriginal steelseries Arctis Nova PRO Wireless  Headphones\nManila, NCR',
  'url': '/marketplace/item/1074067744700000/?ref=search&referral_code=null&referral_story_type=post&__tn__=!%3AD'}]

In [13]:
# Create an empty list to store product data
extracted_data = []

for item in product_data:
    # Separating the lines using the newline characters
    lines = item['text'].split('\n')

    # Regular expression to find numeric values for our price
    numeric_pattern = re.compile('\d[\d,.]*')

    #Extracting prices
    # Iterate through lines to find the first line with numbers
    for line in lines:
        match = numeric_pattern.search(line)
        if match:
            # Extract the first numeric value found
            price_str = match.group()
            # Convert price to float (handle commas)
            price = float(price_str.replace(',',''))
            break
    
    if price:
        print(f'Price extracted: {price}')
    else:
        print('price not found')

    # Extracting title
    title = lines[-2]

    # Extracting location
    location = lines[-1]

    # Add extracted data to a list of dictionaries
    extracted_data.append({
        'title': title,
        'price': price,
        'location': location,
        # Remove the search query from the urls using regular expression
        'url': re.sub(r'\?.*', '', item['url'])
    })

Price extracted: 10000.0


  numeric_pattern = re.compile('\d[\d,.]*')


In [14]:
# Convert extracted data into a Pandas Dataframe
items_df = pd.DataFrame(extracted_data)

# Sort the Dataframe by the price column in ascending order
sorted_df = items_df.sort_values(by='price')

# Get the 10 cheapest entries
cheapest = sorted_df.head(10)

In [16]:
cheapest

Unnamed: 0,title,price,location,url
0,Original steelseries Arctis Nova PRO Wireless ...,10000.0,"Manila, NCR",/marketplace/item/1074067744700000/


In [None]:
# I'll try to use Discord webhooks to avoid Self-Botting, which is against Terms of Service (ToS)
# I need to use WEBHOOK_URL from my loaded secret
def post_to_discord(content):
    """
    Posts a message to Discord using a webhook.
    This is fully legal and encouraged by Discord.
    """
    data = {
        "content": content
    }

    # HTTP POST Request
    result = requests.post(WEBHOOK_URL, json=data)

    try:
        result = requests.post(WEBHOOK_URL, json=data, timeout=10) # Always add a timeout!
        result.raise_for_status()
    except requests.exceptions.Timeout:
        logging.error("Request timed out. Discord might be down.")
    except requests.exceptions.ConnectionError:
        logging.error("Connection error. Check your internet.")
    except requests.exceptions.HTTPError as err:
        logging.error(f"HTTP Error: {err}")
    else:
        # This runs only if NO exception occurred
        print(f"Success! Status Code: {result.status_code}")

In [None]:
# Execution Phase
logging.info("--- Pipeline Started ---")
# Create an empty message
message = ""

# Iterate over each row in the DataFrame containing the 10 cheapest items
for index, row in cheapest.iterrows():
    # Append the title, price, and URL of each item to the message string
    message += f"Title: {row['title']}\nPrice: {row['price']}\nLocation: {row['location']}\nURL: {row['url']}\n\n"
    post_to_discord(message)
    # CRITICAL: Sleep to avoid hitting Discord Rate Limits (30 requests/min)
    time.sleep(2)

logging.info("--- Pipeline Finished ---")

Payload delivered successfully, code 204
Consummatum est!
