# 範例

以正確的副檔名下載網頁中的圖片
範例網頁:https://www.ptt.cc/bbs/Beauty/M.1574854555.A.E5C.html

Note：因為 PTT 會詢問「是否滿 18 歲」，這邊可以用 cookies 繞過

`requests.get(URL, cookies={'over18': '1'}`

In [1]:
import requests
import os

from bs4 import BeautifulSoup
from PIL import Image

In [2]:
url = 'https://www.ptt.cc/bbs/Beauty/M.1574854555.A.E5C.html'
resp = requests.get(url, cookies={'over18': '1'})
soup = BeautifulSoup(resp.text)

In [3]:
# 決定要儲存的資料夾
output_dir = 'downloads'

# 假如資料夾不存在就新增一個資料夾
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# 定位所有圖片的 tag
image_tags = soup.find(id='main-content').findChildren('a', recursive=False)
for img_tag in image_tags:
    # 取得所有圖片在第三方服務的 id
    if 'imgur' not in img_tag['href']:
        continue
    img_id = img_tag['href'].split('/')[-1]
    # 組合圖片而非網站的網址
    img_url = 'https://i.imgur.com/{}.jpg'.format(img_id)
    # 對圖片送出請求
    with requests.get(img_url, stream=True) as r:
        r.raise_for_status()
        # 檢查圖片副檔名
        img = Image.open(r.raw)
        img_savename = '{outdir}/{img_id}.{img_ext}'.format(
            outdir=output_dir, img_id=img_id, img_ext=img.format.lower())
        img.save(img_savename)
        print('Save image {}'.format(img_savename))

Save image downloads/Yvqa9MC.jpg.jpeg
Save image downloads/4aqPUFf.jpg.jpeg
Save image downloads/hG9seBk.jpg.jpeg
Save image downloads/2M5b3z4.jpg.jpeg
Save image downloads/ncyh1pY.jpg.jpeg
Save image downloads/Z8L1vqU.jpg.jpeg
Save image downloads/tJyl8gf.jpg.jpeg
Save image downloads/hwry1vC.jpg.jpeg
Save image downloads/WaPmVsl.jpg.jpeg
Save image downloads/u3a9fBO.jpg.jpeg


In [4]:
test = soup.find(id='main-content')

In [16]:
test.find_all('a', recursive = False)

[<a href="https://www.oricon.co.jp/special/53912/" rel="nofollow" target="_blank">https://www.oricon.co.jp/special/53912/</a>,
 <a href="https://i.imgur.com/Yvqa9MC.jpg" rel="nofollow" target="_blank">https://i.imgur.com/Yvqa9MC.jpg</a>,
 <a href="https://i.imgur.com/4aqPUFf.jpg" rel="nofollow" target="_blank">https://i.imgur.com/4aqPUFf.jpg</a>,
 <a href="https://i.imgur.com/hG9seBk.jpg" rel="nofollow" target="_blank">https://i.imgur.com/hG9seBk.jpg</a>,
 <a href="https://i.imgur.com/2M5b3z4.jpg" rel="nofollow" target="_blank">https://i.imgur.com/2M5b3z4.jpg</a>,
 <a href="https://i.imgur.com/ncyh1pY.jpg" rel="nofollow" target="_blank">https://i.imgur.com/ncyh1pY.jpg</a>,
 <a href="https://i.imgur.com/Z8L1vqU.jpg" rel="nofollow" target="_blank">https://i.imgur.com/Z8L1vqU.jpg</a>,
 <a href="https://i.imgur.com/tJyl8gf.jpg" rel="nofollow" target="_blank">https://i.imgur.com/tJyl8gf.jpg</a>,
 <a href="https://i.imgur.com/hwry1vC.jpg" rel="nofollow" target="_blank">https://i.imgur.com/hw

In [13]:
test.findChildren('a', recursive=False)

[<a href="https://www.oricon.co.jp/special/53912/" rel="nofollow" target="_blank">https://www.oricon.co.jp/special/53912/</a>,
 <a href="https://i.imgur.com/Yvqa9MC.jpg" rel="nofollow" target="_blank">https://i.imgur.com/Yvqa9MC.jpg</a>,
 <a href="https://i.imgur.com/4aqPUFf.jpg" rel="nofollow" target="_blank">https://i.imgur.com/4aqPUFf.jpg</a>,
 <a href="https://i.imgur.com/hG9seBk.jpg" rel="nofollow" target="_blank">https://i.imgur.com/hG9seBk.jpg</a>,
 <a href="https://i.imgur.com/2M5b3z4.jpg" rel="nofollow" target="_blank">https://i.imgur.com/2M5b3z4.jpg</a>,
 <a href="https://i.imgur.com/ncyh1pY.jpg" rel="nofollow" target="_blank">https://i.imgur.com/ncyh1pY.jpg</a>,
 <a href="https://i.imgur.com/Z8L1vqU.jpg" rel="nofollow" target="_blank">https://i.imgur.com/Z8L1vqU.jpg</a>,
 <a href="https://i.imgur.com/tJyl8gf.jpg" rel="nofollow" target="_blank">https://i.imgur.com/tJyl8gf.jpg</a>,
 <a href="https://i.imgur.com/hwry1vC.jpg" rel="nofollow" target="_blank">https://i.imgur.com/hw

In [12]:
test.findChildren('a')

[<a href="https://www.oricon.co.jp/special/53912/" rel="nofollow" target="_blank">https://www.oricon.co.jp/special/53912/</a>,
 <a href="https://i.imgur.com/Yvqa9MC.jpg" rel="nofollow" target="_blank">https://i.imgur.com/Yvqa9MC.jpg</a>,
 <a href="//imgur.com/Yvqa9MC"></a>,
 <a href="https://i.imgur.com/4aqPUFf.jpg" rel="nofollow" target="_blank">https://i.imgur.com/4aqPUFf.jpg</a>,
 <a href="//imgur.com/4aqPUFf"></a>,
 <a href="https://i.imgur.com/hG9seBk.jpg" rel="nofollow" target="_blank">https://i.imgur.com/hG9seBk.jpg</a>,
 <a href="//imgur.com/hG9seBk"></a>,
 <a href="https://i.imgur.com/2M5b3z4.jpg" rel="nofollow" target="_blank">https://i.imgur.com/2M5b3z4.jpg</a>,
 <a href="//imgur.com/2M5b3z4"></a>,
 <a href="https://i.imgur.com/ncyh1pY.jpg" rel="nofollow" target="_blank">https://i.imgur.com/ncyh1pY.jpg</a>,
 <a href="//imgur.com/ncyh1pY"></a>,
 <a href="https://i.imgur.com/Z8L1vqU.jpg" rel="nofollow" target="_blank">https://i.imgur.com/Z8L1vqU.jpg</a>,
 <a href="//imgur.com/