In [9]:
import requests
import json
import re
import sys
from bs4 import BeautifulSoup

soup = BeautifulSoup('html.parser')
soup

<html><body><p>html.parser</p></body></html>

In [10]:
# 인천광역시 10개 구/군 법정동 코드 (앞 5자리)
INCHEON_DISTRICTS = {"강화군": "28710",
     "계양구": "28245",
     "남동구": "28200",
     "동구": "28140",
     "미추홀구": "28177",  # (구)남구
    "부평구": "28237",
    "서구": "28260",
     "연수구": "28185",
    "옹진군": "28720",
     "중구": "28110"
 }

In [42]:
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd

BASE_URL = "http://apis.data.go.kr/1613000/RTMSDataSvcAptTradeDev/getRTMSDataSvcAptTradeDev"
SERVICE_KEY = "YPxXvm/3jO5hggkbdJFWnhh8IC3VdMlkRvZHh1pkG8eZxXw12ymm4OAW10urfUVN++TzkurcrZ5Os3Gr9P8Kwg=="
URL = f"{BASE_URL}"

# 인천 구별 데이터 저장할 딕셔너리
incheon_real_estate_data = {}

# 크롤링할 기간 설정
start_year = 2025
start_month = 1
end_year = 2025
end_month = 2


for year in range(start_year, end_year + 1):
    for month in range(1, 13):
        if year == start_year and month < start_month:
            continue
        if year == end_year and month > end_month:
            break

        deal_ymd = f"{year}{month:02}"  # 거래 연월 (YYYYMM 형식)
        print(f"⏳ {deal_ymd} 데이터 수집 시작")
    
    # 각 구별 데이터 요청       
    for gu, code in INCHEON_DISTRICTS.items():
        print(f"📌 {gu} ({code}) 데이터 요청 중...")

        params = {
            "LAWD_CD": code,
            "DEAL_YMD": deal_ymd,  # 거래 연월
            "serviceKey": SERVICE_KEY,
            "pageNo": 1,
            "numOfRows": 999
        }

        response = requests.get(URL, params=params)
        response.encoding = "utf-8"
        response.text
        
        # 응답 상태 코드 체크
        if response.status_code == 200:
            print(f"✅ {gu} 응답 상태: {response.status_code}")
            soup = BeautifulSoup(response.text, "xml")

            # 응답 XML 구조 확인
            print(f"📊 {gu} 응답 데이터 샘플:")
            print(soup.prettify()[:1000])  # 데이터 일부 출력해서 확인

            # 'item' 태그가 있는지 확인
            items = soup.find_all("item")
            if not items:
                print(f"⚠️ {gu}에 거래 데이터가 없습니다.")
            else:
                incheon_real_estate_data[gu] = items
                print(f"✅ {gu} 데이터 수집 완료 ({len(items)}건)")

                # 데이터 추출 및 검증
                all_data = []  # 구별 데이터 리스트 초기화
                for item in items:
                    data = {
                        "구": gu,
                        "거래금액": item.find("거래금액").text.strip() if item.find("거래금액") else None,
                        "건축년도": item.find("건축년도").text.strip() if item.find("건축년도") else None,
                        "법정동": item.find("법정동").text.strip() if item.find("법정동") else None,
                        "아파트": item.find("아파트").text.strip() if item.find("아파트") else None,
                        "전용면적": item.find("전용면적").text.strip() if item.find("전용면적") else None,
                        "층": item.find("층").text.strip() if item.find("층") else None,
                        "거래일": f"{item.find('년').text}-{item.find('월').text}-{item.find('일').text}"
                        if item.find('년') and item.find('월') and item.find('일') else None
                    }

                    # 데이터 유효성 검사
                    if None not in data.values():  # 모든 값이 유효한 경우에만 리스트에 추가
                        all_data.append(data)

                # DataFrame 변환
                if all_data:
                    df = pd.DataFrame(all_data)
                    print(f"✅ {gu} DataFrame 생성 완료")
                    print(df.head())

        else:
            print(f"❌ {gu} 요청 실패 (Status Code: {response.status_code})")
        
        time.sleep(0.5)  # API 요청 간격 조절

print("📦 데이터 수집 완료")

⏳ 202501 데이터 수집 시작
⏳ 202502 데이터 수집 시작
📌 강화군 (28710) 데이터 요청 중...
✅ 강화군 응답 상태: 200
📊 강화군 응답 데이터 샘플:
<?xml version="1.0" encoding="utf-8"?>
<response>
 <header>
  <resultCode>
   000
  </resultCode>
  <resultMsg>
   OK
  </resultMsg>
 </header>
 <body>
  <items>
   <item>
    <aptDong>
    </aptDong>
    <aptNm>
     용진1
    </aptNm>
    <aptSeq>
     28710-1
    </aptSeq>
    <bonbun>
     0623
    </bonbun>
    <bubun>
     0000
    </bubun>
    <buildYear>
     1989
    </buildYear>
    <buyerGbn>
     개인
    </buyerGbn>
    <cdealDay>
    </cdealDay>
    <cdealType>
    </cdealType>
    <dealAmount>
     9,250
    </dealAmount>
    <dealDay>
     25
    </dealDay>
    <dealMonth>
     2
    </dealMonth>
    <dealYear>
     2025
    </dealYear>
    <dealingGbn>
     직거래
    </dealingGbn>
    <estateAgentSggNm>
    </estateAgentSggNm>
    <excluUseAr>
     67.875
    </excluUseAr>
    <floor>
     5
    </floor>
    <jibun>
     623
    </jibun>
    <landCd>
     1
    </landCd>
    <la

In [43]:
import pandas as pd
import xml.etree.ElementTree as ET

df_incheon_real_estate_data = pd.DataFrame()

for gu, items in incheon_real_estate_data.items():
    for item in items:
        # XML 데이터를 파싱하여 Element 객체로 변환
        xml_data = str(item)

        # XML 파싱
        root = ET.fromstring(xml_data)

        # 각 항목을 딕셔너리 형태로 추출하여 리스트에 저장
        data = {
            'aptDong': root.find('aptDong').text if root.find('aptDong') is not None else '',
            'aptNm': root.find('aptNm').text if root.find('aptNm') is not None else '',
            'aptSeq': root.find('aptSeq').text if root.find('aptSeq') is not None else '',
            'bonbun': root.find('bonbun').text if root.find('bonbun') is not None else '',
            'bubun': root.find('bubun').text if root.find('bubun') is not None else '',
            'buildYear': root.find('buildYear').text if root.find('buildYear') is not None else '',
            'buyerGbn': root.find('buyerGbn').text if root.find('buyerGbn') is not None else '',
            'cdealDay': root.find('cdealDay').text if root.find('cdealDay') is not None else '',
            'cdealType': root.find('cdealType').text if root.find('cdealType') is not None else '',
            'dealAmount': root.find('dealAmount').text if root.find('dealAmount') is not None else '',
            'dealDay': root.find('dealDay').text if root.find('dealDay') is not None else '',
            'dealMonth': root.find('dealMonth').text if root.find('dealMonth') is not None else '',
            'dealYear': root.find('dealYear').text if root.find('dealYear') is not None else '',
            'dealingGbn': root.find('dealingGbn').text if root.find('dealingGbn') is not None else '',
            'estateAgentSggNm': root.find('estateAgentSggNm').text if root.find('estateAgentSggNm') is not None else '',
            'excluUseAr': root.find('excluUseAr').text if root.find('excluUseAr') is not None else '',
            'floor': root.find('floor').text if root.find('floor') is not None else '',
            'jibun': root.find('jibun').text if root.find('jibun') is not None else '',
            'landCd': root.find('landCd').text if root.find('landCd') is not None else '',
            'landLeaseholdGbn': root.find('landLeaseholdGbn').text if root.find('landLeaseholdGbn') is not None else '',
            'rgstDate': root.find('rgstDate').text if root.find('rgstDate') is not None else '',
            'roadNm': root.find('roadNm').text if root.find('roadNm') is not None else '',
            'roadNmBonbun': root.find('roadNmBonbun').text if root.find('roadNmBonbun') is not None else '',
            'roadNmBubun': root.find('roadNmBubun').text if root.find('roadNmBubun') is not None else '',
            'roadNmCd': root.find('roadNmCd').text if root.find('roadNmCd') is not None else '',
            'roadNmSeq': root.find('roadNmSeq').text if root.find('roadNmSeq') is not None else '',
            'roadNmSggCd': root.find('roadNmSggCd').text if root.find('roadNmSggCd') is not None else '',
            'roadNmbCd': root.find('roadNmbCd').text if root.find('roadNmbCd') is not None else '',
            'sggCd': root.find('sggCd').text if root.find('sggCd') is not None else '',
            'slerGbn': root.find('slerGbn').text if root.find('slerGbn') is not None else '',
            'umdCd': root.find('umdCd').text if root.find('umdCd') is not None else '',
            'umdNm': root.find('umdNm').text if root.find('umdNm') is not None else ''
        }

        # 데이터프레임으로 변환
        df = pd.DataFrame([data])

        # 기존 데이터프레임에 합침
        df_incheon_real_estate_data = pd.concat([df_incheon_real_estate_data, df], ignore_index=True)

# 결과 출력
df_incheon_real_estate_data

Unnamed: 0,aptDong,aptNm,aptSeq,bonbun,bubun,buildYear,buyerGbn,cdealDay,cdealType,dealAmount,...,roadNmBonbun,roadNmBubun,roadNmCd,roadNmSeq,roadNmSggCd,roadNmbCd,sggCd,slerGbn,umdCd,umdNm
0,,용진1,28710-1,0623,0000,1989,개인,,,9250,...,00027,00000,4271375,01,28710,0,28710,개인,25022,강화읍 관청리
1,,강화2차세광엔리치빌,28710-245,0619,0000,2004,개인,,,17900,...,00253,00001,3150025,02,28710,0,28710,개인,31025,선원면 창리
2,,강화서희스타힐스1단지,28710-496,0456,0000,2024,개인,,,22500,...,00162,00031,3150025,00,28710,0,28710,개인,31025,선원면 창리
3,,강화2차세광엔리치빌,28710-245,0619,0000,2004,개인,,,24300,...,00253,00001,3150025,02,28710,0,28710,개인,31025,선원면 창리
4,,용진3,28710-5,0394,0001,1991,개인,,,8700,...,00009,00002,4271060,00,28710,0,28710,개인,25025,강화읍 갑곳리
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2003,,하늘도시우미린1단지,28110-833,1881,0001,2012,개인,,,27000,...,00351,00000,3149078,01,28110,0,28110,개인,14500,중산동
2004,,운서SKVIEWskycityⅡ,28110-937,1598,0001,2022,개인,,,48800,...,00249,00000,3149060,00,28110,,28110,개인,14600,운남동
2005,,e편한세상영종국제도시오션하임,28110-898,1887,0004,2018,개인,,,47500,...,00112,00000,3149082,00,28110,0,28110,개인,14500,중산동
2006,15,라이프비취2차,28110-28,0027,0107,1981,개인,,,8000,...,00047,00000,4247395,01,28110,0,28110,개인,11800,항동7가


In [44]:
real_estate_schema = {
    "sggCd": "법정동시군구코드",
    "umdCd": "법정동읍면동코드",
    "landCd": "법정동지번코드",
    "bonbun": "법정동본번코드",
    "bubun": "법정동부번코드",
    "roadNm": "도로명",
    "roadNmSggCd": "도로명시군구코드",
    "roadNmCd": "도로명코드",
    "roadNmSeq": "도로명일련번호코드",
    "roadNmbCd": "도로명지상지하코드",
    "roadNmBonbun": "도로명건물본번호코드",
    "roadNmBubun": "도로명건물부번호코드",
    "umdNm": "법정동",
    "aptNm": "아파트명",
    "jibun": "지번",
    "excluUseAr": "전용면적",
    "dealYear": "계약년도",
    "dealMonth": "계약월",
    "dealDay": "계약일",
    "dealAmount": "거래금액(만원)",
    "floor": "층",
    "buildYear": "건축년도",
    "aptSeq": "단지 일련번호",
    "cdealType": "해제여부",
    "cdealDay": "해제사유발생일",
    "dealingGbn": "거래유형(중개 및 직거래 여부)",
    "estateAgentSggNm": "중개사소재지(시군구 단위)",
    "rgstDate": "등기일자",
    "aptDong": "아파트 동명",
    "slerGbn": "거래주체정보_매도자(개인/법인/공공기관/기타)",
    "buyerGbn": "거래주체정보_매수자(개인/법인/공공기관/기타)",
    "landLeaseholdGbn": "토지임대부 아파트 여부"
}


In [45]:
df_incheon_real_estate_data.rename(columns=real_estate_schema, inplace=True)
df_incheon_real_estate_data.head()

Unnamed: 0,아파트 동명,아파트명,단지 일련번호,법정동본번코드,법정동부번코드,건축년도,거래주체정보_매수자(개인/법인/공공기관/기타),해제사유발생일,해제여부,거래금액(만원),...,도로명건물본번호코드,도로명건물부번호코드,도로명코드,도로명일련번호코드,도로명시군구코드,도로명지상지하코드,법정동시군구코드,거래주체정보_매도자(개인/법인/공공기관/기타),법정동읍면동코드,법정동
0,,용진1,28710-1,623,0,1989,개인,,,9250,...,27,0,4271375,1,28710,0,28710,개인,25022,강화읍 관청리
1,,강화2차세광엔리치빌,28710-245,619,0,2004,개인,,,17900,...,253,1,3150025,2,28710,0,28710,개인,31025,선원면 창리
2,,강화서희스타힐스1단지,28710-496,456,0,2024,개인,,,22500,...,162,31,3150025,0,28710,0,28710,개인,31025,선원면 창리
3,,강화2차세광엔리치빌,28710-245,619,0,2004,개인,,,24300,...,253,1,3150025,2,28710,0,28710,개인,31025,선원면 창리
4,,용진3,28710-5,394,1,1991,개인,,,8700,...,9,2,4271060,0,28710,0,28710,개인,25025,강화읍 갑곳리


In [46]:
# 거래금액 칼럼 데이터타입 변환환
df_incheon_real_estate_data['거래금액(만원)'] = df_incheon_real_estate_data['거래금액(만원)'].str.replace(",", "").astype(int)

In [16]:
# 거래금액을 평균 값으로 그룹화
grouped_incheon_2020 = df_incheon_real_estate_data.groupby(["법정동"], as_index=False).agg({"거래금액(만원)": "mean"}).rename(columns={"거래금액(만원)": "거래금액_2020"}) 
grouped_incheon_2020

Unnamed: 0,법정동,거래금액_2020
0,가정동,37979.729730
1,가좌동,19177.313433
2,간석동,21888.982456
3,갈산동,21862.500000
4,강화읍 갑곳리,15033.333333
...,...,...
78,청학동,20770.454545
79,학익동,26708.737500
80,항동7가,17028.571429
81,화수동,11440.000000


In [23]:
grouped_incheon_2021 = df_incheon_real_estate_data.groupby(["법정동"], as_index=False).agg({"거래금액(만원)": "mean"}).rename(columns={"거래금액(만원)": "거래금액_2021"}) 
grouped_incheon_2021

Unnamed: 0,법정동,거래금액_2021
0,가정동,34805.611111
1,가좌동,22352.941176
2,간석동,23840.294118
3,갈산동,39540.000000
4,강화읍 갑곳리,13500.000000
...,...,...
66,청학동,28910.000000
67,학익동,33760.894737
68,항동7가,24053.571429
69,화수동,32500.000000


In [29]:
grouped_incheon_2022 = df_incheon_real_estate_data.groupby(["법정동"], as_index=False).agg({"거래금액(만원)": "mean"}).rename(columns={"거래금액(만원)": "거래금액_2022"}) 
grouped_incheon_2022

Unnamed: 0,법정동,거래금액_2022
0,가정동,39960.000000
1,가좌동,22394.117647
2,간석동,25424.444444
3,갈산동,27514.285714
4,강화읍 갑곳리,7575.000000
...,...,...
63,청천동,31571.428571
64,청학동,29952.333333
65,학익동,27118.750000
66,항동7가,14600.000000


In [35]:
grouped_incheon_2023 = df_incheon_real_estate_data.groupby(["법정동"], as_index=False).agg({"거래금액(만원)": "mean"}).rename(columns={"거래금액(만원)": "거래금액_2023"}) 
grouped_incheon_2023

Unnamed: 0,법정동,거래금액_2023
0,가정동,46640.000000
1,가좌동,24351.515152
2,간석동,30434.210526
3,갈산동,27821.052632
4,강화읍 갑곳리,16100.000000
...,...,...
67,청학동,19728.571429
68,학익동,31759.615385
69,항동7가,17500.000000
70,화수동,18700.000000


In [41]:
grouped_incheon_2024 = df_incheon_real_estate_data.groupby(["법정동"], as_index=False).agg({"거래금액(만원)": "mean"}).rename(columns={"거래금액(만원)": "거래금액_2024"}) 
grouped_incheon_2024

Unnamed: 0,법정동,거래금액_2024
0,가정동,54057.142857
1,가좌동,26907.142857
2,간석동,25885.966667
3,갈산동,25206.666667
4,강화읍 갑곳리,14800.000000
...,...,...
70,청학동,23400.000000
71,학익동,27188.181818
72,항동7가,16687.500000
73,화수동,20000.000000


In [47]:
grouped_incheon_2025 = df_incheon_real_estate_data.groupby(["법정동"], as_index=False).agg({"거래금액(만원)": "mean"}).rename(columns={"거래금액(만원)": "거래금액_2025"}) 
grouped_incheon_2025

Unnamed: 0,법정동,거래금액_2025
0,가정동,42131.081081
1,가좌동,26771.875000
2,간석동,30777.777778
3,갈산동,25729.166667
4,강화읍 갑곳리,16925.000000
...,...,...
70,청학동,23660.000000
71,학익동,27720.909091
72,항동7가,15625.000000
73,화수동,22900.000000


In [48]:
incheon = pd.merge(grouped_incheon_2020, grouped_incheon_2021, on='법정동', how='outer')
incheon = pd.merge(incheon, grouped_incheon_2022, on='법정동', how='outer')
incheon = pd.merge(incheon, grouped_incheon_2023, on='법정동', how='outer')
incheon = pd.merge(incheon, grouped_incheon_2024, on='법정동', how='outer')
incheon = pd.merge(incheon, grouped_incheon_2025, on='법정동', how='outer')
incheon

Unnamed: 0,법정동,거래금액_2020,거래금액_2021,거래금액_2022,거래금액_2023,거래금액_2024,거래금액_2025
0,가정동,37979.729730,34805.611111,39960.000000,46640.000000,54057.142857,42131.081081
1,가좌동,19177.313433,22352.941176,22394.117647,24351.515152,26907.142857,26771.875000
2,간석동,21888.982456,23840.294118,25424.444444,30434.210526,25885.966667,30777.777778
3,갈산동,21862.500000,39540.000000,27514.285714,27821.052632,25206.666667,25729.166667
4,강화읍 갑곳리,15033.333333,13500.000000,7575.000000,16100.000000,14800.000000,16925.000000
...,...,...,...,...,...,...,...
84,청학동,20770.454545,28910.000000,29952.333333,19728.571429,23400.000000,23660.000000
85,학익동,26708.737500,33760.894737,27118.750000,31759.615385,27188.181818,27720.909091
86,항동7가,17028.571429,24053.571429,14600.000000,17500.000000,16687.500000,15625.000000
87,화수동,11440.000000,32500.000000,,18700.000000,20000.000000,22900.000000
