In [None]:
import requests
import pandas as pd
import time
from datetime import datetime

def get_floor_number(floor_info):
    if not floor_info or '/' not in floor_info:
        return None
    
    floor, _ = floor_info.split('/')
    if floor == '저':
        return 1
    elif floor == '중':
        return 7  # 중층은 대체로 4-7층이므로 7층으로 간주
    elif floor == '고':
        return 12  # 고층은 대체로 12층 이상이므로 12층으로 간주
    try:
        return int(floor)
    except ValueError:
        return None

def convert_area_to_pyeong(area_m2):
    return area_m2 / 3.3058

def convert_price_to_number(price_str):
    if not price_str:
        return float('inf')
    
    parts = price_str.split()
    total = 0
    
    for part in parts:
        if '억' in part:
            total += int(part.replace('억', '')) * 10000
        else:
            total += int(part.replace(',', ''))
    
    return total

def get_real_estate_data(complex_id):
    cookies = {
        'NAC': 'fy5GCABTmweeE',
        'NNB': 'POQIQ3VVBGUGO',
        'nhn.realestate.article.rlet_type_cd': 'A01',
        'nhn.realestate.article.trade_type_cd': '""',
        'nhn.realestate.article.ipaddress_city': '4100000000',
        '_fwb': '2822mnjwK7KrhQ4ppK031c.1740097918618',
        'landHomeFlashUseYn': 'Y',
        'NACT': '1',
        'SRT30': '1740276799',
        'SRT5': '1740276799',
        'REALESTATE': 'Sun%20Feb%2023%202025%2011%3A13%3A36%20GMT%2B0900%20(Korean%20Standard%20Time)',
        'BUC': 'QuCnJVKpftzR-H_rr7vo3TIAIol2aZu562bx4tTBtZs=',
    }

    headers = {
        'accept': '*/*',
        'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
        'authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IlJFQUxFU1RBVEUiLCJpYXQiOjE3NDAyNzY4MTYsImV4cCI6MTc0MDI4NzYxNn0.eIaOAg5jFR_cprKYpljfa3gLn_h5eOiZZEKXt2XA91w',
        'priority': 'u=1, i',
        'referer': 'https://new.land.naver.com/complexes/109412',
        'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
    }

    small_data = []  # 23-27평
    large_data = []  # 32평 이상
    
    for page in range(1, 6):
        url = f'https://new.land.naver.com/api/articles/complex/{complex_id}?realEstateType=APT&tradeType=A1&page={page}'
        
        try:
            response = requests.get(url, cookies=cookies, headers=headers)
            response.raise_for_status()
            
            if not response.text.strip():  
                print(f"🚨 빈 응답 (단지 ID: {complex_id}, 페이지: {page})")
                continue

            data = response.json()
            
            if 'articleList' not in data or not data['articleList']:
                print(f"⚠️ 매물이 없습니다. (단지 ID: {complex_id}, 페이지: {page})")
                break
            
            for article in data['articleList']:
                floor = get_floor_number(article.get('floorInfo', ''))
                area_pyeong = round(convert_area_to_pyeong(article.get('area1', 0)), 1)
                
                if floor is not None and (floor >= 4 or article.get('floorInfo', '').split('/')[0] in ['중', '고']):
                    article_data = {
                        '아파트명': article.get('articleName', ''),
                        '가격': article.get('dealOrWarrantPrc', ''),
                        '가격_정렬용': convert_price_to_number(article.get('dealOrWarrantPrc', '')),
                        '면적(평)': area_pyeong,
                        '층수': article.get('floorInfo', ''),
                        '방향': article.get('direction', ''),
                        '특징': article.get('articleFeatureDesc', '정보 없음')
                    }
                    
                    if 23 <= area_pyeong <= 27:
                        small_data.append(article_data)
                    elif area_pyeong >= 32:
                        large_data.append(article_data)
        
        except requests.exceptions.RequestException as e:
            print(f"❌ 요청 오류: {e}")
            continue
        except ValueError as e:
            print(f"❌ JSON 변환 오류: {e}")
            print("응답 내용:", response.text)  
            continue
        
        time.sleep(1)  

    result = []
    if small_data:
        result.append(min(small_data, key=lambda x: x['가격_정렬용']))
    if large_data:
        result.append(min(large_data, key=lambda x: x['가격_정렬용']))

    for item in result:
        del item['가격_정렬용']
    
    return result

def process_all_apartments():
    try:
        file_path = 'aptlist.xlsx'
        try:
            apt_df = pd.read_excel(file_path)
        except FileNotFoundError:
            file_path = input("Excel 파일을 찾을 수 없습니다. 직접 경로 입력: ")
            apt_df = pd.read_excel(file_path)

        all_results = []
        total_apts = len(apt_df)

        print("\n📌 데이터 수집 시작...")
        
        for idx, row in apt_df.iterrows():
            complex_id = str(row['complex_id'])
            print(f"▶ [{idx+1}/{total_apts}] 단지 ID: {complex_id} 처리 중...")
            
            data = get_real_estate_data(complex_id)
            if data:
                all_results.extend(data)
            
            time.sleep(2)  

        if all_results:
            df = pd.DataFrame(all_results)
            output_file = 'real_estate_data.xlsx'
            df.to_excel(output_file, index=False)
            print(f"\n✅ 수집 완료! 데이터가 '{output_file}' 파일에 저장되었습니다.")
        else:
            print("\n❌ 저장할 데이터가 없습니다.")
            
    except Exception as e:
        print(f"오류 발생: {e}")

if __name__ == "__main__":
    print("\n🏠 네이버 부동산 데이터 수집기 실행")
    print(f"⏳ 실행 날짜: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    process_all_apartments()



🏠 네이버 부동산 데이터 수집기 실행
⏳ 실행 날짜: 2025-02-24 22:12:03

📌 데이터 수집 시작...
▶ [1/13] 단지 ID: 109412 처리 중...
❌ 요청 오류: Expecting value: line 1 column 1 (char 0)
❌ 요청 오류: Expecting value: line 1 column 1 (char 0)
❌ 요청 오류: Expecting value: line 1 column 1 (char 0)
❌ 요청 오류: Expecting value: line 1 column 1 (char 0)
❌ 요청 오류: Expecting value: line 1 column 1 (char 0)
▶ [2/13] 단지 ID: 115708 처리 중...
❌ 요청 오류: Expecting value: line 1 column 1 (char 0)
❌ 요청 오류: Expecting value: line 1 column 1 (char 0)


KeyboardInterrupt: 