In [6]:
import requests
import time
from bs4 import BeautifulSoup

def get_web_page(url):
    resp = requests.get(
        url = url,
        cookies = {'over18': '1'} # 18+ 驗證
    )

    if resp.status_code != 200: # HTTP status code 200 OK 請求成功
        print("fail to get:", resp.url)
        return None
    else:
        return resp.text

def get_articles(dom, date):
    
    # 解析 html dom 架構
    soup = BeautifulSoup(dom, 'lxml')
    
    # 取得上一頁的連結
    paging_div = soup.find('div', 'btn-group btn-group-paging') 
    prev_url = paging_div.find_all('a')[1]['href']

    # 儲存取得的文章
    articles = []
    
    # 取文章
    divs = soup.find_all('div', 'r-ent')
    for d in divs:
        
        if d.find('div','date').text.strip() == date: # 日期符合條件
            
            # 取推文數
            push_count = 0 
            push_str = d.find('div', 'nrec').text
            if push_str:
                try:
                    push_count = int(push_str)
                except ValueError:
                    if push_str == '爆':
                        push_count = 99
                    elif push_str.startswith('X'):
                        push_count = -1
            
            # 取文章連結、標題、作者
            if d.find('a'):
                href = d.find('a')['href']
                title = d.find('div', 'title').text.strip()
                author = d.find('div', 'author').text if d.find('div', 'author').text else ''
            
                articles.append({
                    'title': title,
                    'author': author,
                    'push count': push_count,
                    'href': 'https://www.ptt.cc/' + href
                })
    return articles, prev_url


def main():
    
    current_page = get_web_page('https://www.ptt.cc/' + 'bbs/gossiping/index.html')

    if current_page:

        articles = []

        # 取今天日期
        today = time.strftime('%m/%d').lstrip('0')

        # 目前頁面的今天文章
        current_articles, prev_url = get_articles(current_page, today)

        while current_articles:
            articles += current_articles
            current_page = get_web_page('https://www.ptt.cc/' + prev_url)
            current_articles, prev_url = get_articles(current_page, today)

        # 統整
        print('今天有', len(articles), '篇文章')
        push = 30
        print('熱門文章（> %d 推）：' % (push))
        
        hot = 0
        for i in articles:       
            if int(i['push count']) > push:
                print(i)
                hot = hot + 1

        print('熱門文章（> %d 推）： 共'% (push), hot, '篇')    

In [7]:
main()

今天有 74 篇文章
熱門文章（> 30 推）：
{'title': '[問卦] 葡萄牙的戰犯是誰？', 'author': 'ijk77692', 'push count': 46, 'href': 'https://www.ptt.cc//bbs/Gossiping/M.1670691051.A.81E.html'}
{'title': '[爆卦] 葡萄牙回家囉', 'author': 'knight45683', 'push count': 37, 'href': 'https://www.ptt.cc//bbs/Gossiping/M.1670691403.A.2A2.html'}
熱門文章（> 30 推）： 共 2 篇
