In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd
from datetime import datetime
import asyncio  # 이 줄을 추가
# 브라우저 설정
chrome_options = Options()
chrome_options.add_argument('--headless')  # 헤드리스 모드
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-gpu')  # Linux에서 필요
chrome_options.add_argument('--window-size=1920,1080')
chrome_options.add_argument('--remote-debugging-port=9222')  # 디버깅 포트 추가
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')

# 추가 설정
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
chrome_options.add_experimental_option('useAutomationExtension', False)

In [2]:
def get_pdf_download_url(file_name):
    base_url = "https://www.hanwhalife.com/main/disclosure/goods/download_chk.asp"
    return f"{base_url}?file_name={file_name}"

async def get_product_details(driver, sell_type, goods_type):
    js_code = """
        var done = arguments[arguments.length - 1];
        
        fetch('/main/disclosure/goods/goodslist/getList.do', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/x-www-form-urlencoded',
                'X-Requested-With': 'XMLHttpRequest'
            },
            body: new URLSearchParams({
                'PType': '1',
                'sellFlag': 'Y',
                'sellType': arguments[0],
                'goodsType': arguments[1]
            })
        })
        .then(response => response.json())
        .then(data => done(data))
        .catch(error => done({error: error.message}));
    """
    return driver.execute_async_script(js_code, sell_type, goods_type)

def get_product_specific_info(driver, idx, sell_type, goods_type):
    try:
        # JavaScript로 데이터 요청 (fetch API 사용)
        js_code = """
            var done = arguments[arguments.length - 1];
            
            fetch('/main/disclosure/goods/goodslist/getGoodsInfo.do', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/x-www-form-urlencoded',
                    'X-Requested-With': 'XMLHttpRequest'
                },
                body: new URLSearchParams({
                    'PType': '1',
                    'sellFlag': 'Y',
                    'sellType': arguments[0],
                    'goodsType': arguments[1],
                    'goodsIndex': arguments[2]
                })
            })
            .then(response => response.json())
            .then(data => done(data))
            .catch(error => done({error: error.message}));
        """
        
        result = driver.execute_async_script(js_code, sell_type, goods_type, idx)
        return result
        
    except Exception as e:
        print(f"세부 정보 요청 중 오류: {str(e)}")
        return None

try:
    # 브라우저 시작
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    wait = WebDriverWait(driver, 30)
    
    # URL 설정
    url = 'https://www.hanwhalife.com/main/disclosure/goods/goodslist/DF_GDGL000_P10000.do'
    driver.get(url)
    print("페이지 접속 완료")
    
    # JavaScript 로딩 대기
    wait.until(EC.presence_of_element_located((By.ID, "LIST_GRID1")))
    time.sleep(3)
    
    # fetch API를 사용한 데이터 요청
    js_code = """
        var done = arguments[arguments.length - 1];
        
        fetch('/main/disclosure/goods/goodslist/getList.do', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/x-www-form-urlencoded',
                'X-Requested-With': 'XMLHttpRequest',
                'Accept': 'application/json, text/javascript, */*; q=0.01'
            },
            body: new URLSearchParams({
                'PType': '1',
                'sellFlag': 'Y',
                'schText': ''
            })
        })
        .then(response => response.json())
        .then(data => done(data))
        .catch(error => done({error: error.message}));
    """
    
    print("데이터 요청 시작")
    data = driver.execute_async_script(js_code)
    print("응답 데이터:", data)
    
    if isinstance(data, dict) and not 'error' in data:
        if 'list1' in data:
            excel_data = []
            
            for product_type in data['list1']:
                sell_type = product_type['SELL_TYPE']
                goods_type = product_type['GOODS_TYPE']
                category = f"{product_type['SELL_TYPE_NM']} {product_type['GOODS_TYPE_NM']}"
                print(f"\n처리 중: {category}")
                
                # 상품 목록 가져오기
                details = await get_product_details(driver, sell_type, goods_type)
                
                if details and 'list2' in details:
                    print(f"발견된 상품 수: {len(details['list2'])}")
                    
                    # 각 상품별로 처리
                    for product in details['list2']:
                        print(f"\n상품 처리 중: {product['GOODS_NAME']} (IDX: {product['IDX']})")
                        
                        try:
                            # 상품별 세부 정보 가져오기
                            specific_info = get_product_specific_info(
                                driver, 
                                product['IDX'],
                                sell_type,
                                goods_type
                            )
                            
                            if specific_info and isinstance(specific_info, dict):
                                if 'list3' in specific_info and specific_info['list3']:
                                    for doc in specific_info['list3']:
                                        row = {
                                            '판매구분': '판매중' if doc.get('SELL_END_DT', '').strip() == '' else '판매중지',
                                            '판매사': '한화생명',
                                            '분류': category,
                                            '상품명': product['GOODS_NAME'],
                                            '판매기간': f"{doc.get('SELL_START_DT', '')} ~ {'현재' if doc.get('SELL_END_DT', '').strip() == '' else doc['SELL_END_DT']}",
                                            '요약서': get_pdf_download_url(doc.get('FILE_NAME1', '')) if doc.get('FILE_NAME1') else 'X',
                                            '방법서': get_pdf_download_url(doc.get('FILE_NAME2', '')) if doc.get('FILE_NAME2') else 'X',
                                            '약관': get_pdf_download_url(doc.get('FILE_NAME3', '')) if doc.get('FILE_NAME3') else 'X'
                                        }
                                        excel_data.append(row)
                                        print(f"추가된 행: {row}")
                                else:
                                    print(f"상품 {product['GOODS_NAME']}의 세부 정보(list3)가 없습니다.")
                            else:
                                print(f"상품 {product['GOODS_NAME']}의 세부 정보 응답이 올바르지 않습니다.")
                        
                        except Exception as e:
                            print(f"상품 처리 중 오류 발생: {str(e)}")
                        
                        time.sleep(1)  # 개별 상품 요청 간격
                    
                    time.sleep(2)  # 상품 유형별 요청 간격
            
            if excel_data:
                # DataFrame 생성 및 저장
                df = pd.DataFrame(excel_data)
                current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
                excel_filename = f'hanwhalife_products_{current_time}.xlsx'
                
                with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
                    df.to_excel(writer, index=False, sheet_name='상품목록')
                    
                    # 열 너비 자동 조정
                    worksheet = writer.sheets['상품목록']
                    for idx, col in enumerate(df.columns):
                        max_length = max(
                            df[col].astype(str).apply(len).max(),
                            len(col)
                        )
                        worksheet.column_dimensions[chr(65 + idx)].width = max_length + 2

                print(f"\n데이터가 {excel_filename}에 저장되었습니다.")
                print(f"\n총 {len(df)}개의 상품 정보가 수집되었습니다.")
            else:
                print("\n수집된 데이터가 없습니다.")

except Exception as e:
    print(f"에러 발생: {str(e)}")
    if 'driver' in locals():
        print("현재 페이지 소스:")
        print(driver.page_source[:1000])

finally:
    if 'driver' in locals():
        driver.quit()

페이지 접속 완료
데이터 요청 시작
응답 데이터: {'list1': [{'GOODS_TYPE': 'GA', 'GOODS_TYPE_NM': '보장성', 'SELL_TYPE': 'SA', 'SELL_TYPE_NM': '개인'}, {'GOODS_TYPE': 'GB', 'GOODS_TYPE_NM': '연금', 'SELL_TYPE': 'SA', 'SELL_TYPE_NM': '개인'}, {'GOODS_TYPE': 'GC', 'GOODS_TYPE_NM': '생사 혼합', 'SELL_TYPE': 'SA', 'SELL_TYPE_NM': '개인'}, {'GOODS_TYPE': 'GF', 'GOODS_TYPE_NM': '장애인 전용', 'SELL_TYPE': 'SA', 'SELL_TYPE_NM': '개인'}, {'GOODS_TYPE': 'GA', 'GOODS_TYPE_NM': '보장성', 'SELL_TYPE': 'SB', 'SELL_TYPE_NM': '단체'}, {'GOODS_TYPE': 'GB', 'GOODS_TYPE_NM': '연금', 'SELL_TYPE': 'SB', 'SELL_TYPE_NM': '단체'}, {'GOODS_TYPE': 'GC', 'GOODS_TYPE_NM': '생사 혼합', 'SELL_TYPE': 'SB', 'SELL_TYPE_NM': '단체'}, {'GOODS_TYPE': 'GE', 'GOODS_TYPE_NM': '퇴직', 'SELL_TYPE': 'SB', 'SELL_TYPE_NM': '단체'}, {'GOODS_TYPE': 'GB', 'GOODS_TYPE_NM': '연금', 'SELL_TYPE': 'SC', 'SELL_TYPE_NM': '방카 슈랑스'}, {'GOODS_TYPE': 'GA', 'GOODS_TYPE_NM': '보장성', 'SELL_TYPE': 'SD', 'SELL_TYPE_NM': '특약'}, {'GOODS_TYPE': 'GB', 'GOODS_TYPE_NM': '연금', 'SELL_TYPE': 'SD', 'SELL_TYPE_NM': '특약'}