In [24]:
import requests
from bs4 import BeautifulSoup
import csv

In [25]:
def extract_page_data(soup):
    items = soup.find_all('dl', {'class': 'baby-product-wrap'})
    data = []
    for item in items:
        # 제품 이름
        name_tag = item.find('div', {'class': 'name'})
        item_name = name_tag.get_text(strip=True) if name_tag else 'No Title'

        # 할인율
        discount_tag = item.find('span', {'class': 'discount-percentage'})
        discount = discount_tag.text.strip() if discount_tag else 'No discount'

        # 원래 가격
        original_price_tag = item.find('del', {'class': 'base-price'})
        original_price = original_price_tag.text.strip() if original_price_tag else 'No original price'

        # 판매 가격
        sale_price_tag = item.find('strong', {'class': 'price-value'})
        sale_price = sale_price_tag.text.strip() if sale_price_tag else 'No sale price'

        # 배송 정보
        delivery_tag = item.find('div', {'class': 'delivery'})
        delivery_info = delivery_tag.text.strip() if delivery_tag else 'No delivery info'

        # 리뷰 점수와 총 리뷰 수
        rating_tag = item.find('div', {'class': 'rating-star'})
        if rating_tag:
            rating_score = rating_tag.find('em', {'class': 'rating'}).get('style', '').split(':')[1].strip('%')
            total_reviews = rating_tag.find('span', {'class': 'rating-total-count'}).text.strip('()')
        else:
            rating_score, total_reviews = 'No rating', 'No reviews'
        
        data.append([item_name, discount, original_price, sale_price, delivery_info, rating_score, total_reviews])
    return data


In [26]:
def scrape_coupang(url, pages_to_scrape, headers):
    all_data = []
    for page in range(1, pages_to_scrape + 1):
        print(f'Scraping page {page}...')

        # 페이지 URL 구성
        page_url = f'{url}?page={page}'

        # 페이지 내용 요청 (헤더 포함)
        response = requests.get(page_url, headers=headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            page_data = extract_page_data(soup)
            all_data.extend(page_data)
        else:
            print(f"Failed to retrieve page {page}. Status code: {response.status_code}")

    return all_data

In [27]:
url = 'https://www.coupang.com/np/categories/221934'
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept-Language': 'ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3'
}
pages_to_scrape = 300

# Scraping data
data = scrape_coupang(url, pages_to_scrape, headers)


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping page 40...
Scraping page 41...
Scraping page 42...
Scraping page 43...
Scraping page 44...
Scraping page 45...
Scraping page 46...
Scraping page 47...
Scraping page 48...
Scraping page 49...
Scraping page 50...
Scraping 

In [30]:
# 데이터를 CSV 파일로 저장
with open('coupang_baby.csv', 'w', newline='', encoding='UTF-8-sig') as file:
    writer = csv.writer(file)
    writer.writerow(['Item Name', 'Discount', 'Original Price', 'Sale Price', 'Delivery Info', 'Rating Score', 'Total Reviews'])
    writer.writerows(data)

print('Scraping completed and data saved to newitems.csv')


Scraping completed and data saved to newitems.csv
