In [None]:
import requests
import pandas as pd
from datetime import datetime
import json
import re

def get_floor_number(floor_info):
    if not floor_info or '/' not in floor_info:
        return None
    
    floor, total = floor_info.split('/')
    if floor == '저':
        return 1
    try:
        return int(floor)
    except ValueError:
        return None

def convert_area_to_pyeong(area_m2):
    return area_m2 / 3.3058

def convert_price_to_number(price_str):
    # '10억 7,000' -> 107000
    if not price_str:
        return float('inf')
    
    parts = price_str.split()
    total = 0
    
    for part in parts:
        if '억' in part:
            total += int(part.replace('억', '')) * 10000
        else:
            total += int(part.replace(',', ''))
    
    return total

def get_real_estate_data(complex_id, min_pyeong, max_pyeong):
    cookies = {
        'NAC': 'fy5GCABTmweeE',
        'NNB': 'POQIQ3VVBGUGO',
        'nhn.realestate.article.rlet_type_cd': 'A01',
        'nhn.realestate.article.trade_type_cd': '""',
        'nhn.realestate.article.ipaddress_city': '4100000000',
        '_fwb': '2822mnjwK7KrhQ4ppK031c.1740097918618',
        'landHomeFlashUseYn': 'Y',
        'NACT': '1',
        'SRT30': '1740276799',
        'SRT5': '1740276799',
        'REALESTATE': 'Sun%20Feb%2023%202025%2011%3A13%3A36%20GMT%2B0900%20(Korean%20Standard%20Time)',
        'BUC': 'QuCnJVKpftzR-H_rr7vo3TIAIol2aZu562bx4tTBtZs=',
    }

    headers = {
        'accept': '*/*',
        'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
        'authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IlJFQUxFU1RBVEUiLCJpYXQiOjE3NDAyNzY4MTYsImV4cCI6MTc0MDI4NzYxNn0.eIaOAg5jFR_cprKYpljfa3gLn_h5eOiZZEKXt2XA91w',
        'priority': 'u=1, i',
        'referer': f'https://new.land.naver.com/complexes/{complex_id}',
        'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
    }

    all_data = []
    for page in range(1, 6):  # 1페이지부터 5페이지까지 검색
        url = f'https://new.land.naver.com/api/articles/complex/{complex_id}?realEstateType=APT%3AABYG%3AJGC%3APRE&tradeType=A1&tag=%3A%3A%3A%3A%3A%3A%3A%3A&rentPriceMin=0&rentPriceMax=900000000&priceMin=0&priceMax=900000000&areaMin=0&areaMax=900000000&oldBuildYears&recentlyBuildYears&minHouseHoldCount&maxHouseHoldCount&showArticle=true&sameAddressGroup=true&minMaintenanceCost&maxMaintenanceCost&priceType=RETAIL&directions=&complexNo={complex_id}&buildingNos=&areaNos=&type=list&order=prc&page={page}'
        
        try:
            response = requests.get(url, cookies=cookies, headers=headers)
            response.raise_for_status()
            data = response.json()
            
            if 'articleList' not in data or not data['articleList']:
                break
            
            articles = data['articleList']
            for article in articles:
                floor = get_floor_number(article.get('floorInfo', ''))
                area_pyeong = convert_area_to_pyeong(article.get('area1', 0))
                
                if min_pyeong <= round(area_pyeong) <= max_pyeong and (floor is not None and floor >= 4):
                    all_data.append({
                        '매물번호': f"=HYPERLINK('https://new.land.naver.com/articles/{article.get('articleNo', '')}', '{article.get('articleNo', '')}')",
                        '아파트명': article.get('articleName', ''),
                        '거래유형': '매매',
                        '가격': article.get('dealOrWarrantPrc', ''),
                        '가격_정렬용': convert_price_to_number(article.get('dealOrWarrantPrc', '')),
                        '면적(m²)': article.get('area1', ''),
                        '면적(평)': round(area_pyeong, 1),
                        '층수': floor,
                        '방향': article.get('direction', ''),
                        '특징': article.get('articleFeatureDesc', '정보 없음'),
                        '건물명': article.get('buildingName', ''),
                        '등록일': article.get('articleConfirmYmd', '')
                    })
        
        except requests.exceptions.RequestException as e:
            print(f"데이터 수집 중 오류 발생: {e}")
        except json.JSONDecodeError as e:
            print(f"JSON 파싱 오류: {e}")

    # 가격순으로 정렬하고 상위 5개만 반환
    if all_data:
        sorted_data = sorted(all_data, key=lambda x: x['가격_정렬용'])[:5]
        for item in sorted_data:
            del item['가격_정렬용']  # 정렬용 필드 제거
        return sorted_data
    return []

def save_to_csv(data):
    if not data:
        print("저장할 데이터가 없습니다.")
        return
    
    df = pd.DataFrame(data)
    filename = f'real_estate_data_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
    df.to_csv(filename, index=False, encoding='utf-8-sig')
    print(f"\n가장 저렴한 5개 매물:")
    print(df[['아파트명', '가격', '면적(평)', '층수', '방향', '특징']].to_string())
    print(f"\n상세 데이터가 {filename}에 저장되었습니다.")

if __name__ == "__main__":
    complex_id = input("아파트 단지 ID를 입력하세요: ")
    
    print("\n면적 선택:")
    print("1. 23평 ~ 27평")
    print("2. 30평 이상")
    choice = input("번호를 선택하세요 (1 또는 2): ")

    if choice == "1":
        min_pyeong, max_pyeong = 23, 27
    elif choice == "2":
        min_pyeong, max_pyeong = 30, 1000
    else:
        print("잘못된 입력입니다. 기본값(23~27평)으로 진행합니다.")
        min_pyeong, max_pyeong = 23, 27

    print("\n데이터 수집을 시작합니다...")
    data = get_real_estate_data(complex_id, min_pyeong, max_pyeong)
    save_to_csv(data)

면적 선택:
1. 23평 ~ 27평
2. 30평 이상
데이터 수집을 시작합니다...


ValueError: invalid literal for int() with base 10: '10억'