In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Initialize an empty list to store all blog posts
all_blog_posts = []

categories = ['hosts', 'property-managers', 'revenue-managers', 'hotels', 'market-insights', 
              'pricelabs-product-updates', 'data-science-at-pricelabs', 'pricelabs-news']

for category in categories:
    for page_num in range(1, 101):
        # Step 1: Construct the URL for the current page
        url = f"https://hello.pricelabs.co/category/{category}/page/{page_num}/"

        # Step 2: Send a GET request to the webpage
        response = requests.get(url)

        # Check if the page exists (status code 200 indicates success)
        if response.status_code != 200:
            print(f"Page {page_num} does not exist. Stopping the loop.")
            break

        # Step 3: Parse the webpage content with BeautifulSoup
        soup = BeautifulSoup(response.content, "html.parser")

        # Step 4: Find all the elements with the specified class
        post_elements = soup.find_all("h3", class_="elementor-post__title")

        # Step 5: Extract the link, title, and category from each element
        for post in post_elements:
            link = post.find("a")["href"]
            title = post.find("a").get_text(strip=True)        
            all_blog_posts.append({"title": title, "link": link})

        # Optional: Print progress
        print(f"Scraped page {page_num} of {category}")

df = pd.DataFrame(all_blog_posts)
df_links = df.drop_duplicates(subset=['title', 'link'])
df_links.reset_index(drop=True, inplace=True)
df_links



Scraped page 1 of hosts
Scraped page 2 of hosts
Scraped page 3 of hosts
Scraped page 4 of hosts
Scraped page 5 of hosts
Scraped page 6 of hosts
Scraped page 7 of hosts
Scraped page 8 of hosts
Scraped page 9 of hosts
Scraped page 10 of hosts
Page 11 does not exist. Stopping the loop.
Scraped page 1 of property-managers
Scraped page 2 of property-managers
Scraped page 3 of property-managers
Scraped page 4 of property-managers
Page 5 does not exist. Stopping the loop.
Scraped page 1 of revenue-managers
Page 2 does not exist. Stopping the loop.
Scraped page 1 of hotels
Page 2 does not exist. Stopping the loop.
Scraped page 1 of market-insights
Scraped page 2 of market-insights
Page 3 does not exist. Stopping the loop.
Scraped page 1 of pricelabs-product-updates
Scraped page 2 of pricelabs-product-updates
Scraped page 3 of pricelabs-product-updates
Scraped page 4 of pricelabs-product-updates
Page 5 does not exist. Stopping the loop.
Scraped page 1 of data-science-at-pricelabs
Page 2 does no

Unnamed: 0,title,link
0,What is Airbnb Superhost & How to Become a Sup...,https://hello.pricelabs.co/airbnb-superhost/
1,Instant Booking: All You Need To Know About It,https://hello.pricelabs.co/instant-booking-all...
2,15 Airbnb Hosting Tips To Make Your Listing Su...,https://hello.pricelabs.co/airbnb-hosting-tips/
3,Vrbo Listing Requirements: Everything Hosts Ne...,https://hello.pricelabs.co/vrbo-listing-requir...
4,5 Steps to Success in Airbnb Investing for Beg...,https://hello.pricelabs.co/airbnb-investing-fo...
...,...,...
247,PriceLabs Launches Spanish Version for Dynamic...,https://hello.pricelabs.co/pricelabs-spanish-d...
248,PriceLabs Launches Spanish-Language Version of...,https://hello.pricelabs.co/pricelabs-launches-...
249,Welcoming Rental Scale-Up and Thibault Masson ...,https://hello.pricelabs.co/pricelabs-joins-han...
250,How vacation rental managers benefit from the ...,https://hello.pricelabs.co/pricelabs-x-key-dat...
