In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_amazon_books(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept-Language': 'en-US, en;q=0.5'
    }

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    books = []

    # 모든 도서 항목 찾기
    book_items = soup.find_all('div', {'data-component-type': 's-search-result'})

    for item in book_items:
        # 제목과 상세 페이지 링크
        title_element = item.find('h2', class_='a-size-mini')
        if title_element:
            title = title_element.find('span').text.strip()
            link_element = title_element.find('a', class_='a-link-normal')
            link = 'https://www.amazon.com' + link_element['href'] if link_element else 'N/A'
        else:
            title = 'N/A'
            link = 'N/A'

        # 저자
        author_element = item.find('a', class_='a-size-base a-link-normal s-underline-text s-underline-link-text s-link-style')
        author = author_element.text.strip() if author_element else 'N/A'

        # 가격
        price_element = item.find('span', class_='a-price-whole')
        price = price_element.text.strip() if price_element else 'N/A'

        # 평점
        rating_element = item.find('span', class_='a-icon-alt')
        rating = rating_element.text.split()[0] if rating_element else 'N/A'

        books.append({
            'Title': title,
            'Author': author,
            'Price': price,
            'Rating': rating,
            'Link': link
        })

    return books

# Amazon 검색 URL
url = "https://www.amazon.com/s?k=python&ref=nb_sb_noss"

# 도서 정보 스크래핑
book_data = scrape_amazon_books(url)

# DataFrame 생성
df = pd.DataFrame(book_data)

# CSV 파일로 저장
df.to_csv('amazon_python_books.csv', index=False)

print(f"총 {len(book_data)}개의 도서 정보가 수집되어 'amazon_python_books.csv' 파일로 저장되었습니다.")

In [None]:
df