In [4]:

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from datetime import datetime as dt
from datetime import timedelta
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoAlertPresentException, TimeoutException


# 팝업창 에러 제어
from selenium.common.exceptions import NoAlertPresentException
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import time
import os
import shutil



## 창 닫힘 방지 옵션
options = Options()
options.add_experimental_option("detach", True)

url = "https://tmacs.kotsa.or.kr/web/TG/TG300/TG3100/Tg2127.jsp?mid=S1810"   

driver = webdriver.Chrome(options=options)
driver.get(url)

html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')


wait = WebDriverWait(driver, 10)


# ---다운로드 폴더에 새로운 파일이 생길때까지 기다리는 함수에 적용하기 위해서 자신의 다운로드 폴더 경로를 하기!
# # 예시: DOWNLOAD_DIR = "C:/Users/YourName/Downloads"
DOWNLOAD_DIR = r"C:\Users\mstel\Downloads"


def handle_no_data_popup(driver):
    """팝업창에서 '검색 결과가 없습니다' 메시지를 닫고 True/False 반환"""
    try:
        WebDriverWait(driver, 1).until(EC.alert_is_present())
        alert = driver.switch_to.alert
        msg = alert.text
        alert.accept()
        print(f"⚠️ 팝업 메시지: {msg}")
        return "검색 결과가 없습니다" in msg
    except (TimeoutException, NoAlertPresentException):
        return False
 

def wait_for_download_complete(download_dir, timeout=30):
    """다운로드 폴더에 새로운 파일이 생길 때까지 대기"""
    before_files = set(os.listdir(download_dir))
    elapsed = 0
    while elapsed < timeout:
        time.sleep(1)
        after_files = set(os.listdir(download_dir))
        new_files = after_files - before_files
        if new_files:
            print(f"✅ 다운로드 완료: {new_files}")
            return list(new_files)[0]  # 새로 생긴 파일 이름 반환
        elapsed += 1
    raise TimeoutError("다운로드가 완료되지 않았습니다.")




def click_detail_button(driver, wait):
    """조회 결과에서 사고현황(세부) 버튼 클릭 → 팝업 열림"""
    detailed_btn = wait.until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "div.btn_box.type02 > a.btn.morebtn"))
    )
    driver.execute_script("arguments[0].scrollIntoView(true);", detailed_btn)
    driver.execute_script("arguments[0].click();", detailed_btn)
    


import pandas as pd

def handle_popup_download(driver, wait):
    """팝업 창 전환 → 기준년도 파싱 → 다운로드 → 엑셀에 기준년도 컬럼 추가 → 파일명 통일 저장 → 닫기"""
    main_window = driver.current_window_handle

    # 팝업으로 전환
    wait.until(lambda d: len(d.window_handles) > 1)
    for handle in driver.window_handles:
        if handle != main_window:
            driver.switch_to.window(handle)
            break

    time.sleep(2)

    # 두 번째 팝업 처리
    handle_no_data_popup(driver)

    # ------------------------------
    # 기준년도 파싱
    # ------------------------------
    popup_html = driver.page_source
    soup = BeautifulSoup(popup_html, 'html.parser')
    기준년도 = soup.select_one(
        "#new_popup > div.pop_cont > table > tbody > tr > td > dl:nth-child(1) > dd"
    ).text.strip()

    # ------------------------------
    # 엑셀 다운로드
    # ------------------------------
    download_btn = wait.until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "div.btn_box.type02.mar_t0 > a"))
    )
    driver.execute_script("arguments[0].click();", download_btn)
    downloaded_file = wait_for_download_complete(DOWNLOAD_DIR, timeout=60)

    # ------------------------------
    # 엑셀에 기준년도 컬럼 추가
    # ------------------------------
    downloaded_path = os.path.join(DOWNLOAD_DIR, downloaded_file)
    df = pd.read_excel(downloaded_path)
    df['기준년도'] = 기준년도

    # ------------------------------
    # 파일명 통일 (자동 번호 붙이기)
    # ------------------------------
    base_filename = "사고분석-지역별.xlsx"
    save_path = os.path.join(DOWNLOAD_DIR, base_filename)

    if os.path.exists(save_path):
        i = 1
        while True:
            numbered_path = os.path.join(DOWNLOAD_DIR, f"사고분석-지역별 ({i}).xlsx")
            if not os.path.exists(numbered_path):
                save_path = numbered_path
                break
            i += 1

    df.to_excel(save_path, index=False)
    print(f"✅ 최종 저장 완료: {save_path}")

    # 팝업 닫기 후 메인으로 복귀
    driver.close()
    driver.switch_to.window(main_window)




    




In [5]:


# 드롭다운 요소 찾기
year_select = Select(driver.find_element(By.ID, "Year"))
years = year_select.options

# 2016~2023 범위만 value로 가져오기
filtered_years = sorted([
    opt.get_attribute("value") 
    for opt in years 
    if 2016 <= int(opt.get_attribute("value")) <= 2023
], key=lambda x: int(x))  # 숫자 순 정렬

# 순서대로 선택
for year_value in filtered_years:
    year_select = Select(driver.find_element(By.ID, "Year"))
    year_select.select_by_value(year_value)

    # --- 광역 반복 ---
    sido_select = Select(driver.find_element(By.ID, "sido"))
    sidos = [opt.get_attribute("value") for opt in sido_select.options if opt.get_attribute("value")]

    for sido_val in sidos:
        sido_select = Select(driver.find_element(By.ID, "sido"))
        sido_select.select_by_value(sido_val)
        sido_text = sido_select.first_selected_option.text

        # --- 기초 반복 ---
        wait.until(EC.presence_of_element_located((By.ID, "jijace")))
        jijace_select = Select(driver.find_element(By.ID, "jijace"))
        jijaces = [opt.get_attribute("value") for opt in jijace_select.options if opt.get_attribute("value")]

        for jijace_val in jijaces:
            jijace_select = Select(driver.find_element(By.ID, "jijace"))
            jijace_select.select_by_value(jijace_val)
            jijace_text = jijace_select.first_selected_option.text

            # 조회 버튼 클릭
            download_btn = driver.find_element(By.CSS_SELECTOR, "div.btn_wrap > a")
            download_btn.click()

            # 첫 번째 팝업 처리
            if handle_no_data_popup(driver):
                print(f"[⚠️ 데이터 없음] {year_value}_{sido_text}_{jijace_text} → 건너뜀")
                continue

            # 2️⃣ 사고현황 버튼 클릭
            click_detail_button(driver, wait)
            time.sleep(1)

            # 3️⃣ 팝업에서 다운로드 + 기준년도 컬럼 추가 + 파일명 통일 저장
            handle_popup_download(driver, wait)

            time.sleep(2)  # 안정화 대기



✅ 다운로드 완료: {'사고다발지점 상세정보.xls'}
✅ 최종 저장 완료: C:\Users\mstel\Downloads\사고분석-지역별.xlsx
✅ 다운로드 완료: {'사고다발지점 상세정보 (1).xls'}
✅ 최종 저장 완료: C:\Users\mstel\Downloads\사고분석-지역별 (1).xlsx


InvalidSessionIdException: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: chrome=138.0.7204.169); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#invalidsessionidexception
Stacktrace:
	GetHandleVerifier [0x0x7ff765d7e415+77285]
	GetHandleVerifier [0x0x7ff765d7e470+77376]
	(No symbol) [0x0x7ff765b49a6a]
	(No symbol) [0x0x7ff765b35cf5]
	(No symbol) [0x0x7ff765b5a7fa]
	(No symbol) [0x0x7ff765bcfc05]
	(No symbol) [0x0x7ff765bf0192]
	(No symbol) [0x0x7ff765bc83e3]
	(No symbol) [0x0x7ff765b91521]
	(No symbol) [0x0x7ff765b922b3]
	GetHandleVerifier [0x0x7ff766061efd+3107021]
	GetHandleVerifier [0x0x7ff76605c29d+3083373]
	GetHandleVerifier [0x0x7ff76607bedd+3213485]
	GetHandleVerifier [0x0x7ff765d9884e+184862]
	GetHandleVerifier [0x0x7ff765da055f+216879]
	GetHandleVerifier [0x0x7ff765d87084+113236]
	GetHandleVerifier [0x0x7ff765d87239+113673]
	GetHandleVerifier [0x0x7ff765d6e298+11368]
	BaseThreadInitThunk [0x0x7ffb61411fe4+20]
	RtlUserThreadStart [0x0x7ffb6385ef91+33]


In [18]:
# 파일 시간 순으로 정렬
fpath = "./data3"

if os.path.exists(f'{fpath}/사고분석-지역별.xlsx'):
	os.rename(f'{fpath}/사고분석-지역별.xlsx', f'{fpath}/사고분석-지역별 (0).xlsx')

files_with_time = [(f, os.path.getmtime(os.path.join(fpath, f)))
                   for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]

files_with_time.sort(key=lambda x: x[1])

sorted_files = [f[0] for f in files_with_time]
print(sorted_files)

['사고분석-지역별 (0).xlsx', '사고분석-지역별 (1).xlsx', 'test.csv']


In [11]:
# 파일 합치기
xlsx_files = [f"{fpath}/{f}" for f in sorted_files]

df = pd.read_excel(xlsx_files[0], header=[0,1,2])
for f in xlsx_files[1:]:
  tmp = pd.read_excel(f, header=[0,1,2])
  df = pd.concat([df, tmp])

df.reset_index(drop=True, inplace=True)

In [12]:
# 결과 확인
print(df.head())
df.tail()

  사고다발지점 상세정보         Unnamed: 1         Unnamed: 2 Unnamed: 3  \
          지자체 Unnamed: 1_level_1                지점명    발생건수(건)   
           광역                 기초 Unnamed: 2_level_2         건수   
0          서울                강남구           신논현역 사거리         80   
1          서울                강남구             학동역사거리         79   
2          서울                강남구    하나저축은행 강남지점(남쪽)         54   
3          서울                강남구     영동세브란스(강남세브란스)         47   
4          서울                강남구          도산공원앞 사거리         49   

          Unnamed: 4         Unnamed: 5         Unnamed: 6         Unnamed: 7  \
  Unnamed: 4_level_1 Unnamed: 5_level_1 Unnamed: 6_level_1 Unnamed: 7_level_1   
                  사망                 중상                 경상                 부상   
0                  1                 16                 53                 10   
1                  0                 13                 57                  9   
2                  0                 13                 37        

Unnamed: 0_level_0,사고다발지점 상세정보,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,기준년도
Unnamed: 0_level_1,지자체,Unnamed: 1_level_1,지점명,발생건수(건),Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,사상자수(명),...,Unnamed: 15_level_1,Unnamed: 16_level_1,ACM_PLC_NO,SIDO_CD,JIJACE_CD,MOTOCY_CNT,CYCLE_CNT,WALK_CNT,DRK,2016년
Unnamed: 0_level_2,광역,기초,Unnamed: 2_level_2,건수,사망,중상,경상,부상,대형\n사고,사망,...,심각도,통합지수,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,2016년
57,서울,강동구,천호사거리(천호역 앞),11,0,4,6,1,0,0,...,2.033333,1.833333,2016110000000000.0,11000.0,11740.0,1.0,0.0,0.0,1.0,2016년
58,서울,강동구,길동 사거리(휴다임타워),7,0,2,5,0,0,0,...,2.566667,1.926667,2016110000000000.0,11000.0,11740.0,1.0,0.0,0.0,2.0,2016년
59,서울,강동구,천호역 삼거리(2번출구 북쪽),14,0,3,10,1,0,0,...,1.7,1.7,2016110000000000.0,11000.0,11740.0,1.0,2.0,0.0,0.0,2016년
60,서울,강동구,길동 사거리(휴다임타워),7,0,1,5,1,0,0,...,1.533333,1.213333,2016110000000000.0,11000.0,11740.0,1.0,0.0,0.0,1.0,2016년
61,,,합계,697,7,245,409,36,0,7,...,,,,,,,,,,2016년


In [16]:
# csv 파일로 내보내기 - 파일명 확인.
df.to_csv("./data3/test.csv", index=False)

In [17]:
import pandas as pd
# ------------- 중복 체크 -------------
df_2016_2017 = pd.read_csv("test.csv", header=[0,1,2])
df_2016_2017

Unnamed: 0_level_0,사고다발지점 상세정보,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,기준년도
Unnamed: 0_level_1,지자체,Unnamed: 1_level_1,지점명,발생건수(건),Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,사상자수(명),...,Unnamed: 15_level_1,Unnamed: 16_level_1,ACM_PLC_NO,SIDO_CD,JIJACE_CD,MOTOCY_CNT,CYCLE_CNT,WALK_CNT,DRK,2016년
Unnamed: 0_level_2,광역,기초,Unnamed: 2_level_2,건수,사망,중상,경상,부상,대형\n사고,사망,...,심각도,통합지수,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,2016년
0,서울,강남구,신논현역 사거리,80,1,16,53,10,0,1,...,13.033333,11.566667,2.016110e+15,11000.0,11680.0,3.0,0.0,0.0,9.0,2016년
1,서울,강남구,학동역사거리,79,0,13,57,9,0,0,...,12.466667,10.973333,2.016110e+15,11000.0,11680.0,3.0,0.0,0.0,12.0,2016년
2,서울,강남구,하나저축은행 강남지점(남쪽),54,0,13,37,4,0,0,...,11.966667,9.873333,2.016110e+15,11000.0,11680.0,6.0,0.0,0.0,11.0,2016년
3,서울,강남구,영동세브란스(강남세브란스),47,0,19,27,1,0,0,...,12.033333,10.073333,2.016110e+15,11000.0,11680.0,2.0,1.0,0.0,15.0,2016년
4,서울,강남구,도산공원앞 사거리,49,0,14,33,2,0,0,...,11.466667,9.506667,2.016110e+15,11000.0,11680.0,3.0,0.0,0.0,12.0,2016년
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,서울,강동구,천호사거리(천호역 앞),11,0,4,6,1,0,0,...,2.033333,1.833333,2.016110e+15,11000.0,11740.0,1.0,0.0,0.0,1.0,2016년
58,서울,강동구,길동 사거리(휴다임타워),7,0,2,5,0,0,0,...,2.566667,1.926667,2.016110e+15,11000.0,11740.0,1.0,0.0,0.0,2.0,2016년
59,서울,강동구,천호역 삼거리(2번출구 북쪽),14,0,3,10,1,0,0,...,1.700000,1.700000,2.016110e+15,11000.0,11740.0,1.0,2.0,0.0,0.0,2016년
60,서울,강동구,길동 사거리(휴다임타워),7,0,1,5,1,0,0,...,1.533333,1.213333,2.016110e+15,11000.0,11740.0,1.0,0.0,0.0,1.0,2016년
