In [3]:
import requests
from bs4 import BeautifulSoup
import re
import time


class WebCrawler:
    def __init__(self, url, max_depth=1, max_pages=100):
        self.url = url
        self.max_depth = max_depth
        self.max_pages = max_pages
        self.visited = set()
        self.pages = []

    def crawl(self):
        self.pages.append(self.url)
        self.visited.add(self.url)

        while self.pages and len(self.pages) <= self.max_pages:
            current_page = self.pages.pop(0)
            print(f"Crawling: {current_page}")

            try:
                response = requests.get(current_page)
                response.raise_for_status()

                soup = BeautifulSoup(response.content, 'html.parser')

                for link in soup.find_all('a', href=True):
                    href = link['href']

                    if href.startswith('http'):
                        href = re.sub(r'^https?://', '', href)

                    if href not in self.visited and href not in self.pages:
                        self.pages.append(href)
                        self.visited.add(href)

                        if self.max_depth > 0:
                            self.crawl()

            except requests.exceptions.RequestException as e:
                print(f"Error: {e}")

    def get_feed(self):
        feed = []
        for page in self.pages:
            response = requests.get(page)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, 'html.parser')

            for title in soup.find_all('title'):
                feed.append(title.text)

        return feed


crawler = WebCrawler(url='https://blog.funning.top/article/112250.html',
                     max_depth=2,
                     max_pages=50)
crawler.crawl()

feed = crawler.get_feed()
print(feed)

Crawling: https://blog.funning.top/article/112250.html
Crawling: v1.funning.top/
Error: Invalid URL 'v1.funning.top/': No scheme supplied. Perhaps you meant https://v1.funning.top/?
Crawling: https://blog.funning.top/article/112250.html
Crawling: funning.x10.mx
Error: Invalid URL 'funning.x10.mx': No scheme supplied. Perhaps you meant https://funning.x10.mx?
Crawling: https://blog.funning.top/article/112250.html
Crawling: hot.funning.top/
Error: Invalid URL 'hot.funning.top/': No scheme supplied. Perhaps you meant https://hot.funning.top/?
Crawling: https://blog.funning.top/article/112250.html
Crawling: gemini.funning.top/
Error: Invalid URL 'gemini.funning.top/': No scheme supplied. Perhaps you meant https://gemini.funning.top/?
Crawling: https://blog.funning.top/article/112250.html
Crawling: lobe-chat.funning.top/
Error: Invalid URL 'lobe-chat.funning.top/': No scheme supplied. Perhaps you meant https://lobe-chat.funning.top/?
Crawling: https://blog.funning.top/article/112250.html
Cr


KeyboardInterrupt

