In [1]:
from bs4 import BeautifulSoup as bs
from pathlib import Path
import time
import os
import re
import requests as rq
import json

# json file ex)
'''{
    "headers": {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36",
        "accept-language": "en-US,en;q=0.5",
        "accept-encoding": "gzip, deflate, br",
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "referer": "https://www.example.com"
    }
}'''
def get_headers(key, default_value=None):
    """ Get Headers """
    JSON_FILE = 'json/headers.json'

    with open(JSON_FILE, 'r', encoding='UTF-8') as file:
        headers = json.loads(file.read())

    try:
        return headers[key]
    except:
        if default_value:
            return default_value
        raise EnvironmentError(f'Set the {key}')

class Coupang:
    @staticmethod
    def get_product_code(url):
        """ 입력받은 URL 주소의 PRODUCT CODE 추출하는 메소드 """
        prod_code = url.split('products/')[-1].split('?')[0]
        return prod_code

    def __init__(self):
        self.headers = get_headers(key='headers')

    def main(self):
        # URL 주소
        URL = self.input_review_url()

        # URL의 Product Code 추출
        prod_code = self.get_product_code(url=URL)

        # URL 주소 재가공
        URLS = [f'https://www.coupang.com/vp/product/reviews?productId={prod_code}&page={page}&size=5&sortBy=ORDER_SCORE_ASC&ratings=&q=&viRoleCode=3&ratingSummary=true' for page in range(1, self.input_page_count() + 1)]

        # headers에 referer 키 추가
        self.headers['referer'] = URL

        with rq.Session() as session:
            return [self.fetch(url=url, session=session) for url in URLS]

    def fetch(self, url, session):
        save_data = []

        with session.get(url=url, headers=self.headers) as response:
            html = response.text
            soup = bs(html, 'html.parser')

            # Article Boxes
            article_lenth = len(soup.select('article.sdp-review__article__list'))

            for idx in range(article_lenth):
                dict_data = {}
                articles = soup.select('article.sdp-review__article__list')

                # 구매자 이름
                user_name = articles[idx].select_one('span.sdp-review__article__list__info__user__name')
                if user_name is None or user_name.text == '':
                    user_name = '-'
                else:
                    user_name = user_name.text.strip()

                # 평점
                rating = articles[idx].select_one('div.sdp-review__article__list__info__product-info__star-orange')
                if rating is None:
                    rating = 0
                else:
                    rating = int(rating.attrs['data-rating'])

                # 구매자 상품명
                prod_name = articles[idx].select_one('div.sdp-review__article__list__info__product-info__name')
                if prod_name is None or prod_name.text == '':
                    prod_name = '-'
                else:
                    prod_name = prod_name.text.strip()

                # 헤드라인(타이틀)
                headline = articles[idx].select_one('div.sdp-review__article__list__headline')
                if headline is None or headline.text == '':
                    headline = '등록된 헤드라인이 없습니다'
                else:
                    headline = headline.text.strip()

                # 리뷰 내용
                review_content = articles[idx].select_one('div.sdp-review__article__list__review > div')
                if review_content is None:
                    review_content = '등록된 리뷰내용이 없습니다'
                else:
                    review_content = re.sub('[\n\t]', '', review_content.text.strip())

                # 맛 만족도
                answer = articles[idx].select_one('span.sdp-review__article__list__survey__row__answer')
                if answer is None or answer.text == '':
                    answer = '맛 평가 없음'
                else:
                    answer = answer.text.strip()

                dict_data['prod_name'] = prod_name
                dict_data['user_name'] = user_name
                dict_data['rating'] = rating
                dict_data['headline'] = headline
                dict_data['review_content'] = review_content
                dict_data['answer'] = answer

                save_data.append(dict_data)

                print(dict_data, '\n')

            time.sleep(1)

            return save_data

    @staticmethod
    def clear_console():
        command = 'clear'
        if os.name in ('nt', 'dos'):
            command = 'cls'
        os.system(command=command)

    def input_review_url(self):
        while True:
            self.clear_console()

            # Review URL
            review_url = input('원하시는 상품의 URL 주소를 입력해주세요\n\nEx)\nhttps://www.coupang.com/vp/products/7335597976?itemId=18741704367&vendorItemId=85873964906&q=%ED%9E%98%EB%82%B4%EB%B0%94+%EC%B4%88%EC%BD%94+%EC%8A%A4%EB%8B%88%EC%BB%A4%EC%A6%88&itemsCount=36&searchId=0c5c84d537bc41d1885266961d853179&rank=2&isAddedCart=\n\n:')
            if not review_url:
                os.system('cls')
                print('URL 주소가 입력되지 않았습니다')
                continue
            return review_url

    def input_page_count(self):
            self.clear_console()

            while True:
                page_count = input('페이지 수를 입력하세요\n\n:')
                if not page_count:
                    print('페이지 수가 입력되지 않았습니다\n')
                    continue

                return int(page_count)

In [6]:
type(os.system('cls'))

int