In [1]:
from IPython.display import display, HTML
display(HTML("""
<style>
div.container{width:86% !important;}
div.cell.code_cell.rendered{width:100%;}
div.CodeMirror {font-family:Consolas; font-size:12pt;}
div.output {font-size:15pt; font-weight:bold;}
div.input {font-family:Consolas; font-size:12pt;}
div.prompt {min-width:70px;}
div#toc-wrapper{padding-top:120px;}
div.text_cell_render ul li{font-size:12pt;padding:5px;}
table.dataframe{font-size:15px;}
</style>
"""))

# 유동인구 데이터 수집

In [3]:
# 필요 라이브러리 호출
import dotenv
import os
import time
import pandas as pd
import matplotlib.pyplot as plt
import requests
import xml.etree.ElementTree as ET
import warnings

In [4]:
warnings.simplefilter("ignore")

In [5]:
dotenv.load_dotenv() # API 사용을 위한 인증키 로드
API_KEY = os.getenv('SEOUL_DATA')

In [6]:
df = pd.read_excel('data/서울시 주요 120장소 목록.xlsx')
df # API를 통해 불러올 주요 장소의 이름과 카테고리 체크

Unnamed: 0,CATEGORY,NO,AREA_CD,AREA_NM,ENG_NM
0,관광특구,1,POI001,강남 MICE 관광특구,Gangnam MICE Special Tourist Zone
1,관광특구,2,POI002,동대문 관광특구,Dongdaemun Fashion Town Special Tourist Zone
2,관광특구,3,POI003,명동 관광특구,Myeong-dong Namdaemun Bukchang-dong Da-dong Mu...
3,관광특구,4,POI004,이태원 관광특구,Itaewon Special Tourist Zone
4,관광특구,5,POI005,잠실 관광특구,Jamsil Special Tourist Zone
...,...,...,...,...,...
115,공원,116,POI124,서대문독립공원,Seodaemun Independence Park
116,공원,117,POI125,안양천,Anyangcheon River
117,공원,118,POI126,여의서로,Yeouiseoro
118,공원,119,POI127,올림픽공원,Olympic Park


In [7]:
spot_name = df['AREA_NM'].tolist() # 주요 장소의 이름을 list에 담아  iterable 객체로 for문을 활용해 정보를 수집

In [15]:
data_list = []
for spot in spot_name :
    url = f"http://openapi.seoul.go.kr:8088/{API_KEY}/xml/citydata/1/5/" + spot
    response = requests.get(url)
    soup = BeautifulSoup(response.text,features='html.parser')
    data_list.append({'장소구분':soup.select_one("AREA_NM").text,
                      '최소유동인구':soup.select_one("AREA_PPLTN_MIN").text,
                      '최대유동인구':soup.select_one("AREA_PPLTN_MAX").text,
                      '습도':soup.select_one("HUMIDITY").text,
                      '기온':soup.select_one("TEMP").text,
                      '날씨':soup.select_one("SKY_STTS").text,
                      '미세먼지지수':soup.select_one("AIR_IDX_MVL").text,
                     '정보수집시간':soup.select_one( 'PPLTN_TIME').text,})

In [16]:
pd.DataFrame(data_list)

Unnamed: 0,AREA_NM,AREA_PPLTN_MIN,AREA_PPLTN_MAX,HUMIDITY,TEMP,SKY_STTS,AIR_IDX_MVL,PPLTN_TIME
0,강남 MICE 관광특구,16000,18000,46,30.9,맑음,76.0,2025-06-15 12:55
1,동대문 관광특구,20000,22000,47,31.8,구름많음,87.0,2025-06-15 12:55
2,명동 관광특구,38000,40000,47,31.8,구름많음,87.0,2025-06-15 12:55
3,이태원 관광특구,8000,8500,49,30.1,구름많음,78.0,2025-06-15 12:55
4,잠실 관광특구,74000,76000,46,32.2,구름많음,73.0,2025-06-15 12:55
...,...,...,...,...,...,...,...,...
115,서대문독립공원,1000,1500,47,31.8,구름많음,87.0,2025-06-15 12:55
116,안양천,1000,1500,42,31.4,구름많음,81.0,2025-06-15 12:55
117,여의서로,200,300,50,29.3,맑음,79.0,2025-06-15 12:55
118,올림픽공원,18000,20000,46,32.2,구름많음,73.0,2025-06-15 12:55


In [None]:
import time
import datetime
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
# 예시 장소
base_output = "seoul_population_data.csv"
backup_folder = "backups"

# 백업 폴더 없으면 생성
os.makedirs(backup_folder, exist_ok=True)

def save_data(data_list):
    df = pd.DataFrame(data_list)

    # 기본 데이터에 append
    if not os.path.exists(base_output):
        df.to_csv(base_output, mode='w', index=False, encoding='utf-8-sig')
    else:
        df.to_csv(base_output, mode='a', index=False, encoding='utf-8-sig', header=False)

    # 날짜별로 백업 파일 저장
    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M')
    backup_file = os.path.join(backup_folder, f"backup_{timestamp}.csv")
    df.to_csv(backup_file, index=False, encoding='utf-8-sig')

# 1년 반복 (예시로 짧게 반복하도록 테스트 시에는 range(3) 등으로 변경 가능)
for i in range(8760):
    data_list = []

    for spot in spot_name:
        try:
            url = f"http://openapi.seoul.go.kr:8088/{API_KEY}/xml/citydata/1/5/{spot}"
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.text, features='html.parser')

            data = {
                '장소구분': soup.select_one("AREA_NM").text,
                '최소유동인구': soup.select_one("AREA_PPLTN_MIN").text,
                '최대유동인구': soup.select_one("AREA_PPLTN_MAX").text,
                '습도': soup.select_one("HUMIDITY").text,
                '기온': soup.select_one("TEMP").text,
                '날씨': soup.select_one("SKY_STTS").text,
                '미세먼지지수': soup.select_one("AIR_IDX_MVL").text,
                '정보수집시간': soup.select_one("PPLTN_TIME").text,
                '수집시각': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                '장소이름': spot
            }
            data_list.append(data)

        except Exception as e:
            print(f"[{spot}] 수집 실패: {e}")

    save_data(data_list)

    print(f"[{i+1}/8760] ✅ 저장 및 백업 완료: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

    # 1시간 대기
    time.sleep(3600)

[1/8760] ✅ 저장 및 백업 완료: 2025-06-15 16:59:01
