## amazon.jp 에서 특정 키워드 도서 목록 수집하기

https://www.amazon.co.jp/s?k=python&crid=NTE3QB916YZK&sprefix=python%2Caps%2C179&ref=nb_sb_noss_1

In [None]:
!pip install -q googletrans==4.0.0-rc1

https://github.com/ssut/py-googletrans

In [None]:
from googletrans import Translator

# 번역기 객체 생성
translator = Translator()

# 번역할 텍스트
text_to_translate = "スッキリわかるPython入門 第2版 (スッキリわかる入門シリーズ)"

# 한국어로 번역
translated_text = translator.translate(text_to_translate, src='ja', dest='ko')

# 결과 출력
print(f"Original text: {text_to_translate}")
print(f"Translated text: {translated_text.text}")

In [None]:
def translate_ja_to_ko(japanese_text):
    # 번역기 객체 생성
    translator = Translator()

    # 일본어에서 한국어로 번역
    translated_text = translator.translate(japanese_text, src='ja', dest='ko')

    # 번역된 결과 반환
    return translated_text.text

# 테스트를 위한 예제 일본어 텍스트
japanese_text = "スッキリわかるPython入門 第2版 (スッキリわかる入門シリーズ)"

# 함수 호출하여 번역 결과 출력
translated_text = translate_ja_to_ko(japanese_text)
print(f"일본어 원본 텍스트: {japanese_text}")
print(f"한국어 번역 결과: {translated_text}")

In [None]:
import requests
from bs4 import BeautifulSoup
import time
import random

def get_amazon_page(url, max_retries=5):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Connection': 'keep-alive',
        'Referer': 'https://www.amazon.co.jp/'
    }

    for attempt in range(max_retries):
        try:
            response = requests.get(url, headers=headers, timeout=10)
            if response.status_code == 200:
                return response.text
            elif response.status_code == 503:
                print(f"Received 503 error. Retrying in {attempt + 1} seconds...")
                time.sleep(attempt + 1)
            else:
                print(f"Received status code {response.status_code}. Retrying in {attempt + 1} seconds...")
                time.sleep(attempt + 1)
        except requests.RequestException as e:
            print(f"Request failed: {e}. Retrying in {attempt + 1} seconds...")
            time.sleep(attempt + 1)

    raise Exception("Failed to retrieve the page after multiple attempts")

def extract_book_info(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')

    books = []
    for item in soup.find_all('div', class_='s-result-item'):
        title = item.find('span', class_='a-size-medium a-color-base a-text-normal')
        price = item.find('span', class_='a-price-whole')

        if title and price:
            books.append({
                'Title': title.text.strip(),
                'Price': price.text.strip()
            })

    return books

# Amazon Japan Python books search URL
url = "https://www.amazon.co.jp/s?k=python&i=stripbooks&crid=2B22VXGMPF3RO&sprefix=python%2Cstripbooks%2C183&ref=nb_sb_noss_1"

try:
    html_content = get_amazon_page(url)
    book_info = extract_book_info(html_content)

    for book in book_info:
        print(f"Title: {book['Title']}")
        # 일본어에서 한국어로 번역
        translated_text = translator.translate(book['Title'], src='ja', dest='ko')
        print(f"한국어 제목: {translated_text.text}")
        print(f"Price: {book['Price']}")
        print("---")
except Exception as e:
    print(f"An error occurred: {e}")