In [1]:
!pip install -q selenium beautifulsoup4 webdriver-manager

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import json
import os
from collections import deque
import urllib.parse # to manipulate urls


In [3]:
# Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-http2")
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--enable-features=NetworkServiceInProcess")
chrome_options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36")
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
# chrome_options.add_argument('--headless')

In [4]:
def wait_for_page_to_load(driver, wait):
    title = driver.title
    try:
        wait.until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )
        print(f"The Webpage {title} got fully loaded.")
    except:
        print(f"The Webpage {title} did not get loaded.")

In [13]:
def scraper(base_url="https://www.occamsadvisory.com/"):
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
    wait = WebDriverWait(driver, 10)
    driver.maximize_window()
    
    data  = [] # to store: {'url':str, 'content':str}
    visited = set() # to track already visited urls
    queue = deque([base_url]) # starting with homepage
    
    print("Starting scraping .....")
    while queue:
        current_url = queue.popleft() # getting next url (bfs)
        if current_url in visited:
            continue
        try:
            print(f"DEBUG: scraping url -> {current_url}")
            driver.get(current_url)
            wait_for_page_to_load(driver, wait)
            time.sleep(2) # extra delay for js rendering
            
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            content = soup.get_text(separator="\n", strip=True)
            
            # adding url to data
            data.append({"url": current_url, "content": content})
            visited.add(current_url)
            
            # PREVIEW
            print(f"content length: {len(content)} chars\n")
            print(f"content snippet: {content[:200]}\n")
            print("-"*50 + "\n")
            
            # Find internal links 
            links = soup.find_all('a', href=True)
            for a in links:
                href = a['href']
                if not href or href.startswith("#") or href.startswith("mailto:") or href.startswith("tel:"):
                    continue # skipping achors, emails, phone numbers
                full_url = urllib.parse.urljoin(base_url, href)
                if "/blog/" in full_url:
                    print(f"DEBUG: skipping blog page -> {full_url}")
                    continue
                if "/podcasts" in full_url:
                    print(f"DEBUG: skipping podcast -> {full_url}")
                    continue
                if full_url.startswith(base_url) and full_url not in visited and full_url not in queue:
                    queue.append(full_url)
                    print(f"DEBUG: adding to the queue -> {full_url}")
                    
        except Exception as e:
            print(f"Can't scrape -> {current_url}: {e}\n")
            
    driver.quit()
    print(f"Scarping completed......")
    print(f"No of pages scraped: {len(data)}")
    return data

In [14]:
# exectuing scraping function
scraped_data = scraper()

Starting scraping .....
DEBUG: scraping url -> https://www.occamsadvisory.com/
The Webpage Occams Advisory | Global Financing Advisory & Professional Services got fully loaded.
content length: 37859 chars

content snippet: Occams Advisory | Global Financing Advisory & Professional Services
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
          

--------------------------------------------------

DEBUG: adding to the queue -> https://www.occamsadvisory.com/career
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: adding to the queue -> https://www.occamsadvisory.com/awards
DEBUG: adding to the queue -> https://www.occamsadvisory.com/about
DEBUG: adding to the queue -> https://www.occamsadvisory.com/our-services
DEBUG: adding to the queue -> https://www.occamsadvisory.com/business-services-growth-incubation
DEBUG: adding to the queue -> https://www.occamsadvisory.com/financial-technology-payme

The Webpage Your Trusted Partner for Financial Advisory - Occams Advisory got fully loaded.
content length: 32424 chars

content snippet: Your Trusted Partner for Financial Advisory - Occams Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentio

The Webpage Capital Markets & Investment Banking Solutions | Occam's Advisory got fully loaded.
content length: 49635 chars

content snippet: Capital Markets & Investment Banking Solutions | Occam's Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
            

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-me

The Webpage Process Efficiency, Compliance & Tax Planning | Occam's Advisory got fully loaded.
content length: 47252 chars

content snippet: Process Efficiency, Compliance & Tax Planning | Occam's Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
             

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-men

The Webpage Customized Payment Risk Management & Analytics | Occam's Advisory got fully loaded.
content length: 45176 chars

content snippet: Customized Payment Risk Management & Analytics | Occam's Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
            

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-me

The Webpage Sell-Side M&A Services | Maximize Value with Occam's Advisory got fully loaded.
content length: 45396 chars

content snippet: Sell-Side M&A Services | Maximize Value with Occam's Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentio

The Webpage Maximize Your Savings with the Research & Development Tax Credit for 2024 got fully loaded.
content length: 19436 chars

content snippet: Maximize Your Savings with the Research & Development Tax Credit for 2024
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
    

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/

The Webpage  got fully loaded.
content length: 13101 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/bl

The Webpage Occam's Advisory FAQs | Answers to Your Business Consulting Questions got fully loaded.
content length: 9500 chars

content snippet: Occam's Advisory FAQs | Answers to Your Business Consulting Questions
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
        

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media

The Webpage  got fully loaded.
content length: 9043 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/blo

The Webpage Financial Technology & Payment Solutions | Occam's Advisory got fully loaded.
content length: 49498 chars

content snippet: Financial Technology & Payment Solutions | Occam's Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                  

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions

The Webpage Maximize Your Business Profits with Our Tax Credit Services got fully loaded.
content length: 49147 chars

content snippet: Maximize Your Business Profits with Our Tax Credit Services
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                  

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions

The Webpage  got fully loaded.
content length: 10184 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/bl

The Webpage Strategic Leadership & Articulate Thinking (SLAT) got fully loaded.
content length: 45133 chars

content snippet: Strategic Leadership & Articulate Thinking (SLAT)
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                            

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: sk

The Webpage Branding and Marketing Acumen and Performance (BMAP) got fully loaded.
content length: 55309 chars

content snippet: Branding and Marketing
    Acumen and Performance (BMAP)
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                     

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG:

The Webpage  got fully loaded.
content length: 9921 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/blo

The Webpage Occams Advisory | Global Financing Advisory & Professional Services got fully loaded.
content length: 10770 chars

content snippet: Occams Advisory | Global Financing Advisory & Professional Services
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
          

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-

The Webpage  got fully loaded.
content length: 9200 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/blo

The Webpage  got fully loaded.
content length: 8568 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/blo

The Webpage Privacy Policy | Occams Advisory got fully loaded.
content length: 27301 chars

content snippet: Privacy Policy | Occams Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                             

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page 

The Webpage Related Employee Retention Credits: Determining Which Employers are Eligible to Claim the Employee Retention Credit FAQs | Occams Advisory got fully loaded.
content length: 17424 chars

content snippet: Related Employee Retention Credits: Determining Which Employers are Eligible to Claim the Employee Retention Credit FAQs | Occams Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Serv

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podca

The Webpage Related Employee Retention Credits: Amount of Allocable Qualified Health Plan Expenses FAQs | Occams Advisory got fully loaded.
content length: 26054 chars

content snippet: Related Employee Retention Credits: Amount of Allocable Qualified Health Plan Expenses FAQs | Occams Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page

The Webpage Brand Building, Mobile Marketing & Data Analytics | Occam's Advisory got fully loaded.
content length: 46790 chars

content snippet: Brand Building, Mobile Marketing & Data Analytics | Occam's Advisory
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
         

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media

The Webpage  got fully loaded.
content length: 10423 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/bl

The Webpage World Culture Festival by Art of Living got fully loaded.
content length: 11248 chars

content snippet: World Culture Festival by Art of Living
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                      

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blo

The Webpage Greater Hollywood Chamber of Commerce got fully loaded.
content length: 10566 chars

content snippet: Greater Hollywood Chamber of Commerce
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                        

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog 

The Webpage  got fully loaded.
content length: 8568 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/blo

The Webpage  got fully loaded.
content length: 8568 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/blo

The Webpage Occams Advisory | Global Financing Advisory & Professional Services got fully loaded.
content length: 10412 chars

content snippet: Occams Advisory | Global Financing Advisory & Professional Services
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
          

--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-

The Webpage  got fully loaded.
content length: 8568 chars

content snippet: Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions


--------------------------------------------------

DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/press-release/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/
DEBUG: skipping podcast -> https://www.occamsadvisory.com/podcasts
DEBUG: skipping blog page -> https://occamsadvisory.com/blog/media-mentions
DEBUG: skipping blog page -> https://occamsadvisory.com/blo

In [15]:
print(f'TOTAL URLs scraped: {len(scraped_data)}')

TOTAL URLs scraped: 156


In [20]:
for i in scraped_data[:3]:
    print(f"URL: {i['url']}\n Snippet: {i['content'][:10000]}\n {'-'*50}")

URL: https://www.occamsadvisory.com/
 Snippet: Occams Advisory | Global Financing Advisory & Professional Services
Career
Blog
Awards
About
Services
Services
BSGI
Business Services & Growth Incubation
FTPS
Financial Technology & Payment
                                                                    Solutions
CMIB
Capital Market & Investment Banking
TC
Tax Advisory and Tax Credits
Employer of
                                                                    Record (EOR)
We handle payroll, contracts, taxes, and benefits, so
                                                                    you can focus on growth.
Structuring,
                                                                    Incorporation & Accounting Advisory
Expert guidance for business formation, legal
                                                                    structuring, and financial setup.
Process
                                                                    Efficiency, Compliance, Tax Pla

In [21]:
with open('occams_scraped_data.json', 'w') as f:
    json.dump(scraped_data, f, indent=4)
print("saved as json ....")

saved as json ....
