forked from zzsza/github-action-with-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawling_yes24.py
39 lines (29 loc) · 1014 Bytes
/
crawling_yes24.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import requests
from bs4 import BeautifulSoup
def parsing_beautifulsoup(url):
"""
뷰티풀 수프로 파싱하는 함수
:param url: paring할 URL. 여기선 YES24 Link
:return: BeautifulSoup soup Object
"""
data = requests.get(url)
html = data.text
soup = BeautifulSoup(html, 'html.parser')
return soup
def extract_book_data(soup):
"""
BeautifulSoup Object에서 book data를 추출하는 함수
:param soup: BeautifulSoup soup Object
:return: contents(str)
"""
upload_contents = ''
new_books = soup.select(".goodsTxtInfo")
url_prefix = "http://www.yes24.com"
for new_book in new_books:
book_name = new_book.select("a")[0].text
url_suffix = new_book.select("a")[1].attrs['href']
url = url_prefix + url_suffix
price = new_book.select(".priceB")[0].text
content = f"<a href={url}>" + book_name + "</a>" + ", " + price + "<br/>\n"
upload_contents += content
return upload_contents