### 1. requests 모듈 사용방법

In [5]:
pip show requests

Name: requests
Version: 2.26.0
Summary: Python HTTP for Humans.
Home-page: https://requests.readthedocs.io
Author: Kenneth Reitz
Author-email: me@kennethreitz.org
License: Apache 2.0
Location: c:\users\ldy\appdata\local\programs\python\python310\lib\site-packages
Requires: certifi, charset-normalizer, idna, urllib3
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [3]:
import requests
res = requests.get("https://naver.com")
print("res: ", res.status_code)

print(len(res.text))

res:  200
198434


In [4]:
# 파일로 저장
with open('crawledPage1.html', 'w', encoding='utf8') as f:
    f.write(res.text)   

In [None]:
print(res.content)

### 2. beautifulSoup4 모듈 사용방법

In [60]:
import requests
from bs4 import BeautifulSoup

url = 'https://comic.naver.com/webtoon/weekday'     # 네이버 웹툰
res = requests.get(url)
print(res.status_code)
res.raise_for_status()
soup = BeautifulSoup(res.text, 'lxml')

200


#### 태그로 값 가져오기

In [15]:
print(soup.title)                                   # title 태그 값 가져오기

<title>네이버 웹툰 &gt; 요일별  웹툰 &gt; 전체웹툰</title>


In [9]:
print(soup.a)                                       # 가장 먼저 발견되는 a 태그 가져오기

<a href="#menu" onclick="document.getElementById('menu').tabIndex=-1;document.getElementById('menu').focus();return false;"><span>메인 메뉴로 바로가기</span></a>


In [14]:
print(soup.title.get_text())                        # title 태그에서 텍스트만 추출하기

네이버 웹툰 > 요일별  웹툰 > 전체웹툰


In [10]:
print(soup.a.attrs)                                 # a 태그의 속성 가져오기

{'href': '#menu', 'onclick': "document.getElementById('menu').tabIndex=-1;document.getElementById('menu').focus();return false;"}


In [11]:
print(soup.a['href'])                               # a 태그의 'href' 속성값 가져오기

#menu


In [12]:
print(soup.a.get_text())                            # a 태그의 innerHTML값 가져오기

메인 메뉴로 바로가기


#### find()로 값 가져오기

In [22]:
print(soup.find('a'))

<a href="#menu" onclick="document.getElementById('menu').tabIndex=-1;document.getElementById('menu').focus();return false;"><span>메인 메뉴로 바로가기</span></a>


In [16]:
# soun.find() 함수: 태그와 속성으로 [웹툰 올리기] 버튼 가져오기
print(soup.find('a', attrs={'class':'Nbtn_upload'}))    # a태그 중 class값이 'Nbtn_upload'인 요소
print(soup.find(attrs={'class':'Nbtn_upload'}))         # class값이 'Nbtn_upload'인 요소

<a class="Nbtn_upload" href="/mypage/myActivity" onclick="nclk_v2(event,'olk.upload');">웹툰 올리기</a>
<a class="Nbtn_upload" href="/mypage/myActivity" onclick="nclk_v2(event,'olk.upload');">웹툰 올리기</a>


In [23]:
# inner 텍스트로 가져오기
print(soup.find('a', text='참교육'))

<a class="title" href="/webtoon/list?titleId=758037&amp;weekday=mon" onclick="nclk_v2(event,'thm*m.tit','','1')" title="참교육">참교육</a>


In [61]:
# 인기 급상승 웹툰 목록 가져오기
rank1 = soup.find('li', attrs={"class":"rank01"})
print(rank1.a.text)

싸움독학-138화 : 적인가?!


In [None]:
# 요일별 전체 웹툰의 모든 제목 가져오기
import requests
from bs4 import BeautifulSoup

url = 'https://comic.naver.com/webtoon/weekday'     
res = requests.get(url)
res.raise_for_status()

# soup = BeautifulSoup(res.content, 'html.parser')
soup = BeautifulSoup(res.text, 'lxml')

# a 태그 중 class='title'인 모든 요소 가져오기
webtoon_title = soup.findAll('a', attrs={'class':'title'})     

for i in webtoon_title:
    print(i.get_text(), end=', ')

In [75]:
# 인기급상승 웹툰 목록 가져오기
import requests
from bs4 import BeautifulSoup

url = 'https://comic.naver.com/webtoon/weekday'     
res = requests.get(url)
res.raise_for_status()

soup = BeautifulSoup(res.text, 'lxml')

webtoon_list = soup.find('ol', attrs={'id':'realTimeRankFavorite'})
# print(type(webtoon_list))
# print(webtoon_list)
lst = webtoon_list.findAll('li')
# print(lst[1])
for i in lst:
  print(i.a.get_text())
  


싸움독학-138화 : 적인가?!
독립일기-시즌2 86화 미친 습도와 미친 호르몬
입학용병-90화
이번 생도 잘 부탁해-102화
투신전생기-57화
사형소년-10화_저승사자
약한영웅-201화
2022 스크롤금지-21.거꾸로 서는 자 - 단우 작가
존망코인-39화 왓유어네임?
수희0(tngmlek0)-57화


### 3. 네이버 웹툰 크롤링

In [50]:
# 목요 웹툰 '독립일기' 페이지의 최신 목록 가져오기
import requests
from bs4 import BeautifulSoup

url = 'https://comic.naver.com/webtoon/list?titleId=748105&weekday=thu'     
res = requests.get(url)
res.raise_for_status()

soup = BeautifulSoup(res.content, 'html.parser')

# a 태그 중 class='title'인 모든 요소 가져오기
rows = soup.findAll('td', attrs={'class':'title'})    
# print(rows)
print(rows[0].a.get_text())                       # 첫 번째 요소만 출력해보기
link = rows[0].a['href']
print(link)
print('https://comic.naver.com/' + link)

for row in rows:
    title = row.a.get_text()
    link = row.a['href']
    print(title, 'https://comic.naver.com/' + link)

시즌2 86화 미친 습도와 미친 호르몬
/webtoon/detail?titleId=748105&no=189&weekday=thu
https://comic.naver.com//webtoon/detail?titleId=748105&no=189&weekday=thu
시즌2 86화 미친 습도와 미친 호르몬 https://comic.naver.com//webtoon/detail?titleId=748105&no=189&weekday=thu
시즌2 85화 빨래에서 뭔가 나왔다 https://comic.naver.com//webtoon/detail?titleId=748105&no=188&weekday=thu
시즌2 84화 어서 와 우리 집은 처음이지 https://comic.naver.com//webtoon/detail?titleId=748105&no=187&weekday=thu
시즌2 83화 이사 준비 https://comic.naver.com//webtoon/detail?titleId=748105&no=186&weekday=thu
시즌2 82화 추억 팔이 https://comic.naver.com//webtoon/detail?titleId=748105&no=185&weekday=thu
시즌2 81화 잠자는 집 속의 강아지 https://comic.naver.com//webtoon/detail?titleId=748105&no=184&weekday=thu
시즌2 80화 제주여행 6일차 https://comic.naver.com//webtoon/detail?titleId=748105&no=183&weekday=thu
시즌2 79화 제주여행 5일차 https://comic.naver.com//webtoon/detail?titleId=748105&no=182&weekday=thu
시즌2 78화 제주여행 4일차 https://comic.naver.com//webtoon/detail?titleId=748105&no=181&weekday=thu
시즌2 77화 제주여행 3일차 https

In [51]:
# 목요 웹툰 '독립일기'의 평점 평균 계산하기
import requests
from bs4 import BeautifulSoup

url = 'https://comic.naver.com/webtoon/list?titleId=748105&weekday=thu'     
res = requests.get(url)
res.raise_for_status()

soup = BeautifulSoup(res.content, 'html.parser')

rows = soup.find_all('div', attrs={'class':'rating_type'})    

sum_rating = 0
for row in rows:
    rating = row.find('strong').get_text()
    print(rating)
    sum_rating += float(rating)

print(f'평점 평균: {sum_rating / len(rows)}')

9.96
9.96
9.97
9.98
9.98
9.97
9.97
9.97
9.97
9.97
평점 평균: 9.97


In [52]:
# 목요 웹툰 '독립일기'의 1화 페이지 목록 출력하기
import requests
from bs4 import BeautifulSoup

url = 'https://comic.naver.com/webtoon/list?titleId=748105&weekday=thu&page=19'     
res = requests.get(url)
print(res.status_code)
res.raise_for_status()

soup = BeautifulSoup(res.text, 'lxml')

rows = soup.find_all('td', attrs={'class':'title'})    

for row in rows:
    title = row.find('a').get_text()
    link = 'https://comic.naver.com' + row.find('a')['href']
    print(title, link)

200
8화 1인 가구 요리 꿀팁 https://comic.naver.com/webtoon/detail?titleId=748105&no=9&weekday=thu
7화 집들이 선물 https://comic.naver.com/webtoon/detail?titleId=748105&no=8&weekday=thu
6화 나홀로 집에 https://comic.naver.com/webtoon/detail?titleId=748105&no=7&weekday=thu
5화 이사 셋째 날 https://comic.naver.com/webtoon/detail?titleId=748105&no=6&weekday=thu
4화 이사 둘째 날 https://comic.naver.com/webtoon/detail?titleId=748105&no=5&weekday=thu
3화 이사 첫날 https://comic.naver.com/webtoon/detail?titleId=748105&no=4&weekday=thu
2화 부동산 대모험 https://comic.naver.com/webtoon/detail?titleId=748105&no=3&weekday=thu
1화 나도 혼자 산다 https://comic.naver.com/webtoon/detail?titleId=748105&no=2&weekday=thu
예고편 https://comic.naver.com/webtoon/detail?titleId=748105&no=1&weekday=thu


In [55]:
# 목요 웹툰 '독립일기'의 모든 회차 목록 출력하기
import requests
from bs4 import BeautifulSoup

for i in range(19, 0, -1):
    url = 'https://comic.naver.com/webtoon/list?titleId=748105&weekday=thu&page='     
    res = requests.get(url + str(i))
    res.raise_for_status()

    soup = BeautifulSoup(res.text, 'lxml')

    rows = soup.find_all('td', attrs={'class':'title'})    

    for row in rows:
        title = row.find('a').get_text()
        link = 'https://comic.naver.com' + row.find('a')['href']
        print(title, link)

8화 1인 가구 요리 꿀팁 https://comic.naver.com/webtoon/detail?titleId=748105&no=9&weekday=thu
7화 집들이 선물 https://comic.naver.com/webtoon/detail?titleId=748105&no=8&weekday=thu
6화 나홀로 집에 https://comic.naver.com/webtoon/detail?titleId=748105&no=7&weekday=thu
5화 이사 셋째 날 https://comic.naver.com/webtoon/detail?titleId=748105&no=6&weekday=thu
4화 이사 둘째 날 https://comic.naver.com/webtoon/detail?titleId=748105&no=5&weekday=thu
3화 이사 첫날 https://comic.naver.com/webtoon/detail?titleId=748105&no=4&weekday=thu
2화 부동산 대모험 https://comic.naver.com/webtoon/detail?titleId=748105&no=3&weekday=thu
1화 나도 혼자 산다 https://comic.naver.com/webtoon/detail?titleId=748105&no=2&weekday=thu
예고편 https://comic.naver.com/webtoon/detail?titleId=748105&no=1&weekday=thu
18화 엄마의 방문 https://comic.naver.com/webtoon/detail?titleId=748105&no=19&weekday=thu
17화 브라이덜 샤워 https://comic.naver.com/webtoon/detail?titleId=748105&no=18&weekday=thu
16화 점잖개 https://comic.naver.com/webtoon/detail?titleId=748105&no=17&weekday=thu
15화 빨래 https://comic.

In [56]:
# 목요 웹툰 '독립일기'의 모든 회차 목록 출력하기
import requests
from bs4 import BeautifulSoup

for i in range(19, 0, -1):
    url = 'https://comic.naver.com/webtoon/list?titleId=748105&weekday=thu&page='     
    res = requests.get(url + str(i))
    res.raise_for_status()

    soup = BeautifulSoup(res.text, 'lxml')

    rows = soup.find_all('td', attrs={'class':'title'})    

    lst = []
    for row in rows:
        title = row.find('a').get_text()
        link = 'https://comic.naver.com' + row.find('a')['href']
        # print(title, link)
        lst.append([title, link])
    lst.reverse()
    for i in lst:
        print(i[0], i[1])

예고편 https://comic.naver.com/webtoon/detail?titleId=748105&no=1&weekday=thu
1화 나도 혼자 산다 https://comic.naver.com/webtoon/detail?titleId=748105&no=2&weekday=thu
2화 부동산 대모험 https://comic.naver.com/webtoon/detail?titleId=748105&no=3&weekday=thu
3화 이사 첫날 https://comic.naver.com/webtoon/detail?titleId=748105&no=4&weekday=thu
4화 이사 둘째 날 https://comic.naver.com/webtoon/detail?titleId=748105&no=5&weekday=thu
5화 이사 셋째 날 https://comic.naver.com/webtoon/detail?titleId=748105&no=6&weekday=thu
6화 나홀로 집에 https://comic.naver.com/webtoon/detail?titleId=748105&no=7&weekday=thu
7화 집들이 선물 https://comic.naver.com/webtoon/detail?titleId=748105&no=8&weekday=thu
8화 1인 가구 요리 꿀팁 https://comic.naver.com/webtoon/detail?titleId=748105&no=9&weekday=thu
9화 벌레 퇴치 https://comic.naver.com/webtoon/detail?titleId=748105&no=10&weekday=thu
10화 창문 필름 셀프 시공 https://comic.naver.com/webtoon/detail?titleId=748105&no=11&weekday=thu
11화 밥공기 딜레마 https://comic.naver.com/webtoon/detail?titleId=748105&no=12&weekday=thu
12화 수챗구멍 청소 htt

In [76]:
# 목요 웹툰 '독립일기'의 모든 회차 정보 csv 파일로 저장하기
import requests
from bs4 import BeautifulSoup
import csv

# 결과 csv 파일 생성
file_name = 'webtoon.csv'
f = open(file_name, 'w', encoding='utf-8-sig', newline='')
writer = csv.writer(f)

for i in range(19, 0, -1):
    url = 'https://comic.naver.com/webtoon/list?titleId=748105&weekday=thu&page='     
    res = requests.get(url + str(i))
    res.raise_for_status()

    soup = BeautifulSoup(res.text, 'lxml')

    rows = soup.find_all('td', attrs={'class':'title'})    

    lst = []
    for row in rows:
        title = row.find('a').get_text()
        link = 'https://comic.naver.com' + row.find('a')['href']
        # print(title, link)
        lst.append([title, link])
    lst.reverse()
    
    for i in lst:
        writer.writerow([i[0], i[1]])
        # writer.writerow([title, link])

f.close()