### 웹툰 이미지를 다운로드하여 로컬에 저장하기

In [17]:
import requests
import os

req_header = {
    'referer' : 'https://comic.naver.com/webtoon/detail?titleId=753478&no=73&weekday=fri'
}

img_url_list = {
    'https://image-comic.pstatic.net/webtoon/753478/73/20220106144816_0f69de276fca4a9e6ff0d7490f55b315_IMAG01_1.jpg',
    'https://image-comic.pstatic.net/webtoon/753478/73/20220106144816_0f69de276fca4a9e6ff0d7490f55b315_IMAG01_2.jpg',
    'https://image-comic.pstatic.net/webtoon/753478/73/20220106144816_0f69de276fca4a9e6ff0d7490f55b315_IMAG01_3.jpg'
}
for img_url in img_url_list:
    res = requests.get(img_url,headers = req_header)
    print(res.status_code)
    
    img_data = res.content
    fileName = os.path.basename(img_url)
    fileName = 'data/'+fileName
    with open(fileName, 'wb') as file:
        print(f'write to file {fileName}({len(img_data)}bytes')
        file.write(img_data)
          

200
write to file data/20220106144816_0f69de276fca4a9e6ff0d7490f55b315_IMAG01_2.jpg(179361bytes
200
write to file data/20220106144816_0f69de276fca4a9e6ff0d7490f55b315_IMAG01_1.jpg(135069bytes
200
write to file data/20220106144816_0f69de276fca4a9e6ff0d7490f55b315_IMAG01_3.jpg(159960bytes


### 웹툰의 특정회차의 모든 image 다운로드 하기
* Attribute Selector 를 사용하여 jpg파일명을 모두 추출하고 리스트에 저장하기
* 리스트를 순회하면서 image다운받기

In [25]:
import requests
from bs4 import BeautifulSoup

main_url = 'https://comic.naver.com/webtoon/detail?titleId=764040&no=54&weekday=fri'
res = requests.get(main_url)

if res.ok:
    soup = BeautifulSoup(res.text, 'html.parser')
    img_tag = soup.select("img[src$='.jpg']")
    print(len(img_tag), type(img_tag))
    
    img_url_list = []
    for img_tag in img_tag:
        # print(type(img_tag),img_tag)
        img_url = img_tag['src']
        img_url_list.append(img_url)

print(len(img_url_list))
print(img_url_list)
    

72 <class 'bs4.element.ResultSet'>
72
['https://shared-comic.pstatic.net/thumb/webtoon/764040/thumbnail/thumbnail_IMAG04_8afc9026-2558-49e9-b675-93381d9ef009.jpg', 'https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_1.jpg', 'https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_2.jpg', 'https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_3.jpg', 'https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_4.jpg', 'https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_5.jpg', 'https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_6.jpg', 'https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_7.jpg', 'https://image-comic.pstatic.net/webto

### 디렉토리 생성하는 2가지 함수
- os.makedir()/os.makedirs()
- os.makedir() 한개의폴더만

In [26]:
# img 디렉토리 생성하기

import os


dir_path = 'img'

# img디렉토리가 없으면
if not os.path.isdir(dir_path):
    os.makedir(dir_path)

In [29]:
#리스트를 순회하면서 image다운받기

for idx, img_url in enumerate(img_url_list,1):
    print(f'다운로드 번호 {idx} url {img_url}')
    req_header = {'referer' : main_url}
    res = requests.get(img_url, headers = req_header)
    if res.ok:
        img_data = res.content
        file_name = os.path.basename(img_url)
        file_name = 'img/'+file_name
        with open(file_name, 'wb') as file:
            file.write(img_data)

다운로드 번호 1 url https://shared-comic.pstatic.net/thumb/webtoon/764040/thumbnail/thumbnail_IMAG04_8afc9026-2558-49e9-b675-93381d9ef009.jpg
다운로드 번호 2 url https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_1.jpg
다운로드 번호 3 url https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_2.jpg
다운로드 번호 4 url https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_3.jpg
다운로드 번호 5 url https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_4.jpg
다운로드 번호 6 url https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_5.jpg
다운로드 번호 7 url https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IMAG01_6.jpg
다운로드 번호 8 url https://image-comic.pstatic.net/webtoon/764040/54/20220127142619_b148e6dfbf0f74b0c384744f9e46035c_IM

In [31]:
import shutil
import os

dir_path = 'img'

if os.path.exists(dir_path):
    shutil.rmtree(dir_path)

### 웹툰의 타이틀과 특정회차 url을 아규먼트 받아서 다운로드하는 함수 구하기

In [57]:
def download_image(title, round_url):
    import requests
    from bs4 import BeautifulSoup
    import os
    import shutil
    
    #img 폴더가 있으면 삭제하고
    dir_path = 'img'
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)
    if not os.path.exists(dir_path):
        title_path = os.path.join(dir_path, title)
        print(title_path)
        os.makedirs(title_path)
        
    # res text - 소스보기 F12
    # img url 목록을 알아내기 위한 요청 보내기
    res = requests.get(round_url)
    if res.ok:
        soup = BeautifulSoup(res.text, 'html.parser')
        img_tag = soup.select("img[src$='.jpg']")
        print(len(img_tag), type(img_tag))
        
        img_url_list = []
        for img_tag in img_tag:
            # print(type(img_tag),img_tag)
            img_url = img_tag['src']

            res_img = requests.get(img_url,headers =req_header)
            if res_img.ok:
                #image 데이터 가져오기
                img_data = res_img.content
                file_name = title_path +'\\'+ os.path.basename(img_url)
            with open(file_name, 'wb') as file:
                print(f'{file_name} ({len(img_data)}) byte')
                file.write(img_data)


In [58]:
download_image('외모지상주의', 'https://comic.naver.com/webtoon/list?titleId=641253&weekday=fri')

img\외모지상주의
19 <class 'bs4.element.ResultSet'>
img\외모지상주의\thumbnail_IMAG06_7c62962d-f52d-4757-b640-4d2abcc05b29.jpg (11555) byte
img\외모지상주의\thumbnail_202x120_51e18012-17a9-4c52-a49d-802263ac3f4b.jpg (9604) byte
img\외모지상주의\thumbnail_202x120_e93667d3-2410-4b26-b974-0f013ceca2d2.jpg (23222) byte
img\외모지상주의\thumbnail_202x120_23c9a01a-ee03-48aa-a883-4f64515fb205.jpg (31457) byte
img\외모지상주의\thumbnail_202x120_cf5cf4ac-d6d6-4cb2-bb8b-d3a4398a4314.jpg (20934) byte
img\외모지상주의\thumbnail_202x120_4a780a9f-72d2-4ba1-9b9e-33974ac88472.jpg (6389) byte
img\외모지상주의\thumbnail_202x120_f521fa5e-f35a-4ea2-b861-c6dfd73354d1.jpg (14705) byte
img\외모지상주의\thumbnail_202x120_47357aba-1428-4a46-aa66-07471b622133.jpg (17589) byte
img\외모지상주의\thumbnail_202x120_5f3a141f-3a2a-4f13-b2c3-47ee791ee97d.jpg (17613) byte
img\외모지상주의\thumbnail_202x120_ef1682e0-254e-43f3-90f1-a5fab527739d.jpg (17384) byte
img\외모지상주의\thumbnail_202x120_0b67531e-1798-41db-92a8-c7197781e884.jpg (18004) byte
img\외모지상주의\thumbnail_IMAG10_d4d1a799-d193-4e

## Image File Upload
* http://httpbin.org/post URL로 요청을 보내서 다운로드 받은 img 파일을 업로드 하기
* requests.post()함수를 사용하고, file속성에 image data를 지정한다.
* file-(optional)Directonary of 'filename':file-like-objects for multiple encoding upload.

In [1]:
import requests

upload_file_dict = {
    'img1' : open('data\\f1.jpg','rb'),
    'img2' : open('data\\f2.jpg','rb'),
    'img3' : open('data\\f3.jpg','rb'),
}

url = 'http://httpbin.org/post'
res = requests.post(url, file = upload_file_dict)
print(res.status_code)

TypeError: request() got an unexpected keyword argument 'file'

In [12]:
def webtoon_image(title, round_url):
    import requests
    from bs4 import BeautifulSoup
    import os
    import shutil

    header = { 'referer' : round_url }

    path= 'img'
    title_path = os.path.join(path, title)
    os.makedirs(title_path)

    res = requests.get(round_url)
    if res.ok:
        soup = BeautifulSoup(res.text, 'html.parser')
        img_tag = soup.select("img[src$='.jpg']")

        img_url_list = []
        for img_tag in img_tag:
            img_url = img_tag['src']

            res_img = requests.get(img_url,headers =header)
            if res_img.ok:
                #image 데이터 가져오기
                img_data = res_img.content
                file_name = title_path+'\\'+ os.path.basename(img_url)
            with open(file_name, 'wb') as file:
                print(f'{file_name} ({len(img_data)}) byte')
                file.write(img_data)

num=input("이미지 복사할려는 화는?")
mUrl ='https://comic.naver.com/webtoon/list?titleId=748105&no='+num+'&weekday=thu'

webtoon_image('독립일기', mUrl)


img\독립일기\thumbnail_IMAG06_fa3bf10d-1b8f-40cd-a8eb-01caf9bbc3e4.jpg (9905) byte
img\독립일기\thumbnail_202x120_0d81306a-5cc2-40cc-95b7-26960d56af21.jpg (26367) byte
img\독립일기\thumbnail_202x120_60e88c76-8eab-488b-8e85-5f8a98cee8f3.jpg (27661) byte
img\독립일기\thumbnail_202x120_b3ebf09a-c025-423e-8dfe-852b00f91616.jpg (21009) byte
img\독립일기\thumbnail_202x120_0b8733fd-f346-4819-93db-9501c5b36d95.jpg (13449) byte
img\독립일기\thumbnail_202x120_eba13c9e-3648-459b-8556-84ea50892936.jpg (19687) byte
img\독립일기\thumbnail_202x120_c95ea04c-9f4f-402f-ae88-c5b5891afc2b.jpg (23417) byte
img\독립일기\thumbnail_202x120_c5cb222e-086e-4d41-99a6-6131a3e06dbf.jpg (17850) byte
img\독립일기\thumbnail_202x120_a9145bf1-8a2b-45f3-90cd-a62ad6b95c7a.jpg (11585) byte
img\독립일기\thumbnail_202x120_3bc6f5c9-3ca5-4872-b20b-71da679db2fc.jpg (23730) byte
img\독립일기\thumbnail_202x120_17f9c9d9-7980-4a32-9f29-5120600e5806.jpg (23228) byte
img\독립일기\thumbnail_IMAG10_7be4eb2a-35e4-4c0f-ac4f-70cee159ccbd.jpg (10321) byte
img\독립일기\thumbnail_IMAG10_9a5f9