In [2]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import pandas as pd
import time
import re
import os

import warnings
warnings.filterwarnings('ignore')

# 팝업창 에러 제어
from selenium.common.exceptions import NoAlertPresentException

In [3]:
options = Options()
options.add_experimental_option("detach", True)

url = "https://taas.koroad.or.kr/gis/mcm/mcl/initMap.do?menuId=GIS_GMP_STS_RSN"   
driver = webdriver.Chrome(options=options)
driver.get(url)

html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')

In [None]:
def get_files():

    driver.switch_to.window(driver.window_handles[-1])

    download = "body > div > input"
    driver.find_elements(By.CSS_SELECTOR, download)[0].click()

    time.sleep(10)

    driver.close()
    driver.switch_to.window(driver.window_handles[0])

In [None]:
"""
2014년 ~ 2024년 사고 데이터 수집
전국 대상, 전체 사고 유형 선택

Tag
사고년도 : #ptsRafYearStart ~ #ptsRafYearEnd
시도 : #ptsRafSido
시군구 : #ptsRafSigungu
사고유형 : #ptsRafCh1AccidentContent > li:nth-child(1) > input[type=checkbox]     * (1 ~ 4)


* 검색결과 "다운로드 폴더" -> "사고분석-지역별 (number).xlsx"로 저장,,
"""

years = soup.select("#ptsRafYearStart > option")

type_accident = "#ptsRafCh1AccidentContent > li > input[type=checkbox]"
checkboxs = driver.find_elements(By.CSS_SELECTOR, type_accident)

# 체크박스 선택 초기화
for checkbox in checkboxs:
    if checkbox.is_selected():
        checkbox.click()

# 사고 전체 선택
for i in range(4):
    driver.find_elements(By.CSS_SELECTOR, type_accident)[i].click()


# 시도 지역 선택
soup = BeautifulSoup(driver.page_source, 'html.parser')
provinces = soup.select("ptsRafSido > option")
provinces = [x.text for x in provinces]

for province in provinces:
    select_province = Select(driver.find_element('id', 'ptsRafSido'))
    select_province.select_by_visible_text(province)
    time.sleep(1)

    # 시군구 지역 선택
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    cities = soup.select('#ptsRafSigungu > option')
    cities = [x.text for x in cities]

    for city in cities[1:]:
        select_city = Select(driver.find_element('id', 'ptsRafSigungu'))
        select_city.select_by_visible_text(city)
        time.sleep(1)

        for idx in range(len(years[7:])-1, -1, -3):  # 2014 ~ 2024
            # 시작년도 선택
            driver.find_element('id', 'ptsRafYearStart').click()
            start_ = Select(driver.find_element('id', 'ptsRafYearStart'))
            start_.select_by_index(idx)

            # 종료년도 선택 
            driver.find_element('id', 'ptsRafYearEnd').click()
            end_ = Select(driver.find_element('id', 'ptsRafYearEnd'))
            end_.select_by_index(0)   # 첫번째 요소 선택 -> 3년 치 자료 조회.
            

            ## 검색 버튼 클릭
            search_button = "#regionAccidentFind > div.condition-wrap > p > a"
            driver.find_elements(By.CSS_SELECTOR, search_button)[0].click()

            time.sleep(5)
            
            
            ## 검색 결과 flag
            if_result = 1

            ### 에러 팝업 창 처리 ###
            try:
                alert = driver.switch_to.alert
                alert.accept()
                if_result = 0
            except NoAlertPresentException:
                pass
            
            if if_result:
                
                ## 목록보기
                show_list = "#regionAccidentFind > div.searc-total > div.btn > p > a"
                driver.find_elements(By.CSS_SELECTOR, show_list)[0].click()

                get_files()


In [None]:
fpath = "C:/Users/USER/Downloads"
taas_files = [f"{fpath}/{x}" for x in os.listdir(fpath) if '사고분석-지역별' in x]

df = pd.read_excel(taas_files[0])
for f in taas_files[1:]:
  tmp = pd.read_excel(f)
  df = pd.concat([df, tmp])

df.reset_index(drop=True, inplace=True)

In [None]:
# "구분번호"로 재정렬
df.sort_values("구분번호", inplace=True, ignore_index=True)

In [None]:
# "다운로드" 폴더 밑에 "taas_df.csv" 이름으로 저장
df.to_csv(f"{fpath}/taas_df.csv", index=False)