In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select

import pandas as pd
import time


# 팝업창 에러 제어
from selenium.common.exceptions import NoAlertPresentException
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt

## 창 닫힘 방지 옵션
options = Options()
options.add_experimental_option("detach", True)

url = "https://taas.koroad.or.kr/gis/mcm/mcl/initMap.do?menuId=GIS_GMP_STS_RSN"   # 교통사고분석시스템 GIS url

driver = webdriver.Chrome(options=options)
driver.get(url)

html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')

In [None]:
### 융합 분석으로 이동 ###
driver.find_element(By.CSS_SELECTOR, "#menuRoadNoSearch > a").click()
time.sleep(0.3)

In [None]:
region_df = pd.DataFrame(columns=['구분번호', '시군구', '도로종류', '도로명'])
# region_df.head()

In [None]:
def get_data():
  numbers = []
  regions = []
  seen_ids = set()

  driver.switch_to.window(driver.window_handles[-1])
  time.sleep(1)        # 조정 가능
  
  viewport = driver.find_element(By.CLASS_NAME, "slick-viewport")

  previous_count = 0
  max_tries = 700      # 무한 루프 방지

  for _ in range(max_tries):
    # 현재 페이지 소스에서 목록 추출
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    items = soup.select("#accidentInfoListView > div.slick-viewport > div > div")

    for item in items:
      try:
        number = item.select_one(".slick-cell.l0.r0").text.strip()
        region = item.select_one(".slick-cell.l3.r3").text.strip()

        if number not in seen_ids:
          seen_ids.add(number)
          numbers.append(number)
          regions.append(region)
      except:
        continue

    # 스크롤 내려서 다음 항목 렌더링
    driver.execute_script("arguments[0].scrollTop += 200", viewport)
    time.sleep(0.3)

    # 더 이상 늘지 않으면 종료
    if len(seen_ids) == previous_count:
      break
    previous_count = len(seen_ids)

  ## 중간 확인용
  # print(f"총 {len(numbers)}")

  driver.close()
  driver.switch_to.window(driver.window_handles[0])
  
  return numbers, regions

In [None]:
type_accident = "#ptsNafCh1AccidentContent > li > input[type=checkbox]"
checkboxs = driver.find_elements(By.CSS_SELECTOR, type_accident)

# 체크박스 선택 초기화
for checkbox in checkboxs:
  if checkbox.is_selected():
    checkbox.click()

# 사고 전체 선택
for i in range(4):
  driver.find_elements(By.CSS_SELECTOR, type_accident)[i].click()


time.sleep(0.1)

soup = BeautifulSoup(driver.page_source, 'html.parser')
years = soup.select("#ptsNafYearStart > option")


for idx in range(len(years[7:])-1, -1, -3):   # 2014 ~ 2024

  # 시작년도 선택
  driver.find_element('id', 'ptsNafYearStart').click()
  start_ = Select(driver.find_element('id', 'ptsNafYearStart'))
  start_.select_by_index(idx)

  # 종료년도 선택 
  driver.find_element('id', 'ptsNafYearEnd').click()
  end_ = Select(driver.find_element('id', 'ptsNafYearEnd'))
  end_.select_by_index(0)   # 첫번째 요소 선택 -> 3년 치 자료 조회.


  ### 도로 종류 ###
  soup = BeautifulSoup(driver.page_source, 'html.parser')
  rranks = soup.select('#ptsNafRoadRank > option')
  rranks = [x.text for x in rranks]

  for rank in rranks:
    Select(driver.find_element('id', 'ptsNafRoadRank')).select_by_visible_text(rank)
    time.sleep(0.1)

    ### 도로명 ###
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    rnames = soup.select('#ptsNafCh1RoadName > option')
    rnames= [x.text for x in rnames]

    for name in rnames:

      # 검색결과 flag
      if_result = 1

      Select(driver.find_element('id', 'ptsNafCh1RoadName')).select_by_visible_text(name)
      time.sleep(1)


      ### 전체 지역으로 검색 -> 시도, 시군구 선택 x. 전체가 default ###

      ## 검색 버튼 클릭
      search_button = "#roadNumberAccidentFind > div:nth-child(4) > p > a"
      driver.find_elements(By.CSS_SELECTOR, search_button)[0].click()

      time.sleep(5)

      ### 에러 팝업 창 처리 ###
      try:
        alert = driver.switch_to.alert
        alert.accept()
        if_result = 0
      except NoAlertPresentException:
        pass

      if if_result:
        
        ## 목록보기
        show_list = "#roadNumberAccidentFind > div.searc-total > div.btn > p > a"
        driver.find_elements(By.CSS_SELECTOR, show_list)[0].click()
        time.sleep(0.3)

        numbers, regions = get_data()
        for number, region in zip(numbers, regions):
          region_df.loc[len(region_df)] = [number, region, rank, name]

In [None]:
# "구분번호"로 재정렬
region_df.sort_values("구분번호", inplace=True, ignore_index=True)

In [None]:
# "다운로드" 폴더 밑에 "region_df.csv" 이름으로 저장
fpath = "C:/Users/USER/Downloads"
region_df.to_csv(f"{fpath}/region_df.csv", index=False)