In [65]:
import requests
from bs4 import BeautifulSoup

# 設置PTT雅房版的URL模板
url_template = 'https://www.ptt.cc/bbs/Rent_ya/index{}.html'

# 建立一個Session物件以處理PTT的年齡認證
session = requests.Session()
payload = {'from': url_template.format(''), 'yes': 'yes'}
session.post('https://www.ptt.cc/ask/over18', data=payload)

# 今天想要找大安區的雅房，因此設立關鍵字"大安"
keyword = "大安"

# 建立一個空的清單用於儲存匹配的帖子
matched_posts = []

# 從最新頁面往前遍歷前23頁內容
for page in range(1, 24):
    url = url_template.format(page)
    
    # 發送GET請求並取得頁面內容
    response = session.get(url)

    if response.status_code == 200:
        # 使用Beautiful Soup解析頁面內容
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 找到貼文的元素，通常它們有一個特定的class
        posts = soup.find_all(class_='r-ent')

        # 遍歷帖子元素並提取訊息
        for post in posts:
            title = post.find(class_='title').text.strip()
            author = post.find(class_='author').text
            date = post.find(class_='date').text
            
            # 檢查貼文標題是否包含關鍵字且不包含"男"
            if keyword in title and "男" not in title:
                # 將匹配的帖子資料存入清單，僅當標題是唯一的時候
                if title not in [d['Title'] for d in matched_posts]:
                    print(f'Title: {title}')
                    print(f'Author: {author}')
                    print(f'Date: {date}')
                    print('\n')

                    matched_posts.append({'Title': title, 'Author': author, 'Date': date})
    
    # 如果回應是 404，表示該頁面不存在，停止迴圈
    elif response.status_code == 404:
        break

    else:
        print(f'Failed to retrieve page {page}. Status code:', response.status_code)

Title: [徵/新北/新店] 板橋、文山、中正、大安皆可
Author: lingmygt
Date:  2/18


Title: [無/台北/大安] 師大泰順街乾淨便利公寓雅房
Author: eva79528
Date:  2/27


Title: [無/台北/大安] 近師大夜市家庭式雅房
Author: bxzz
Date:  3/08


Title: [女/台北/大安] 雅房出租
Author: oasiis
Date:  3/16


Title: [無/台北/大安] 古亭冷氣雅房 生活機能超方便
Author: dogswang
Date:  3/27


Title: [女/台北/大安] 六張犁分租公寓5000雅房徵室友 機能佳
Author: gemini0602
Date:  3/31


Title: [無/台北/大安] 信義路三段雅房出租(近大安站)
Author: smartvision
Date:  6/07


Title: [無/台北/大安] 家庭式雅房徵室友
Author: castlepig
Date:  6/13


Title: [無/台北/大安] 近台大後門 師大夜市附近靜巷
Author: bxzz
Date:  6/20


Title: [女/台北/大安] 近捷運東門古亭站 師大台大旁
Author: lecl
Date:  7/19


Title: [女/台北/大安] ★★師大小木吉★★
Author: holytwo
Date:  7/28


Title: [女/台北/大安] 捷運古亭站 近台大師大 含水電
Author: sv
Date:  8/08


Title: [無/台北/大安] 泰順雅房 近師大,台大,大安公똠
Author: ccharlie
Date:  8/23


Title: [徵/台北/大安] 東區上班愛乾淨女生找房
Author: chichier
Date:  9/01


Title: [無/台北/大安] 泰順雅房 近師大,台大,大安公園
Author: ccharlie
Date:  9/03


Title: [女/台北/大安] [和式雅房6500]近師大台大淡大
Author: mycorrhiza
Date:  9/03


Title: [女/台北/大安] 近台電

In [67]:
# 將匹配的帖子資料存成CSV檔 (使用 utf-16 編碼)
csv_filename = 'matched_posts.csv'
with open(csv_filename, 'w', newline='', encoding='utf-16') as csvfile:
    fieldnames = ['Title', 'Author', 'Date']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    # 寫入CSV標題
    writer.writeheader()
    
    # 寫入匹配的帖子資料
    writer.writerows(matched_posts)

print(f'Saved matched posts to {csv_filename}')


# 將匹配的帖子資料存成JSON檔
json_filename = 'matched_posts.json'
with open(json_filename, 'w', encoding='utf-8') as jsonfile:
    json.dump(matched_posts, jsonfile, ensure_ascii=False, indent=2)

print(f'Saved matched posts to {json_filename}')

Saved matched posts to matched_posts.csv
Saved matched posts to matched_posts.json
