[컴퓨터 공학 - 예스24](https://www.yes24.com/24/Category/Display/001001003031?FetchSize=100&PageNumber=71)

In [None]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time

category_dict = {
    # "게임": "001001003027",
    # "그래픽/디자인/멀티미디어": "001001003028",
    # "네트워크/해킹/보안": "001001003024",
    # "모바일 프로그래밍": "001001003023",
    # "모바일/태블릿/SNS": "001001003021",
    # "오피스 활용": "001001003029",
    # "웹사이트": "001001003020",
    # "인공지능": "001001003032",
    # "인터넷 비즈니스": "001001003026",
    "컴퓨터 공학": "001001003031",
    # "컴퓨터 수험서": "001001003030",
    # "컴퓨터 입문/활용": "001001003019",
    # "프로그래밍 언어": "001001003022",
    # "OS/데이터베이스": "001001003025",
}

def get_text_safe(soup_element):
    return soup_element.text.strip() if soup_element else ""

def get_href_safe(soup_element):
    return soup_element.get("href").strip() if soup_element and soup_element.get("href") else ""

def fetch_books_from_page(dispNo: str, category: str, page: int):
    url = "https://www.yes24.com/product/category/CategoryProductContents"
    params = {
        "dispNo": dispNo,
        "order": "SINDEX_ONLY",
        "addOptionTp": "0",
        "page": page,
        "size": "120",
        "statGbYn": "N",
        "viewMode": "",
        "_options": "",
        "directDelvYn": "",
        "usedTp": "0",
        "elemNo": "0",
        "elemSeq": "0",
        "seriesNumber": "0"
    }
    headers = {
        "User-Agent": "Mozilla/5.0"
    }

    response = requests.get(url, params=params, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    items = soup.select("div.item_info")
    if len(items) < 1:
        return []

    book_list = []
    for item in items:
        title_tag = item.select_one("a.gd_name")
        book_list.append({
            "category": category,
            "title": get_text_safe(title_tag),
            "book_url": get_href_safe(title_tag),
            "author": get_text_safe(item.select_one(".info_auth a")),
            "publisher": get_text_safe(item.select_one(".info_pub a")),
            "pub_date": get_text_safe(item.select_one(".info_date")),
            "sale_price": get_text_safe(item.select_one(".txt_num em.yes_b")),
            "original_price": get_text_safe(item.select_one(".txt_num.dash em.yes_m")),
            "point": get_text_safe(item.select_one("span.yPoint")).replace("\n", "").strip(),
            "sales_index": get_text_safe(item.select_one("span.saleNum")),
            "rating": get_text_safe(item.select_one("span.rating_grade em.yes_b")),
            "reviews": get_text_safe(item.select_one("span.rating_rvCount")),
            "delivery": get_text_safe(item.select_one("div.info_deli span.deli_date")),
            "summary": get_text_safe(item.select_one("div.info_read")),
            "used_info": get_text_safe(item.select_one("div.info_relG a[href*='UsedShopHub']")),
            "ebook_info": get_text_safe(item.select_one("div.info_relG a[href*='ebook']")),
            "page": page
        })

    return book_list

def save_books_to_db(book_list, db_name="yes24_books.db"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS books (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            category TEXT,
            title TEXT,
            book_url TEXT,
            author TEXT,
            publisher TEXT,
            pub_date TEXT,
            sale_price TEXT,
            original_price TEXT,
            point TEXT,
            sales_index TEXT,
            rating TEXT,
            reviews TEXT,
            delivery TEXT,
            summary TEXT,
            used_info TEXT,
            ebook_info TEXT,
            page INTEGER
        )
    """)

    for book in book_list:
        cursor.execute("""
            INSERT INTO books (
                category, title, book_url, author, publisher, pub_date,
                sale_price, original_price, point, sales_index, rating,
                reviews, delivery, summary, used_info, ebook_info, page
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, tuple(book.values()))

    conn.commit()
    conn.close()

# 실행 부분
if __name__ == "__main__":
    for category, dispNo in category_dict.items():
        print(f"\n🔍 카테고리: {category} (dispNo={dispNo}) 수집 시작")
        page = 1
        while True:
            print(f"  - {page}페이지 크롤링 중...")
            books = fetch_books_from_page(dispNo, category, page)
            if not books:
                print(f"  ✅ 마지막 페이지 도달: {page-1}페이지까지 완료")
                break
            save_books_to_db(books)
            print(f"  ✔ {len(books)}건 저장 완료 (Page {page})")
            page += 1
            time.sleep(2)

In [None]:
import pandas as pd

# DB 연결
conn = sqlite3.connect("yes24_books.db")

# books 테이블의 전체 데이터를 DataFrame으로 불러오기
df = pd.read_sql_query("SELECT * FROM books", conn)

# 연결 종료
conn.close()

print(df.shape)
# 출력
df.head()