In [None]:
# -*- coding: utf-8 -*-
import csv, os, time, random, sys, re
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

START_URL = "https://sugang.hongik.ac.kr/cn50000.jsp"

DAY_PATTERN = re.compile(r"[ÏõîÌôîÏàòÎ™©Í∏àÌÜ†Ïùº]")
INT_PATTERN = re.compile(r"^\d+$")
TIME_PATTERN = re.compile(r"[ÏõîÌôîÏàòÎ™©Í∏àÌÜ†Ïùº]\s*\d+")  # Ïõî234, Ìôî 234 Îì±

INVALID_PROF = {"Ï†ÑÏ≤¥", "ÏóÜÏùå", "ÎØ∏Ï†ï", "-", ""}

# ---------------- DB Ïó∞Í≤∞ ----------------
def db_connect():
    return pymysql.connect(
        host="localhost",
        user="root",
        password="4205",
        database="qnet_crawling2",
        charset="utf8mb4",
        autocommit=True
    )

def save_course(row, conn):
    sql = """
        INSERT INTO course (opened_grade, department, course_name, enrolled, professor, schedule)
        VALUES (%s, %s, %s, %s, %s, %s)
        ON DUPLICATE KEY UPDATE
            enrolled = VALUES(enrolled),
            professor = VALUES(professor),
            schedule = VALUES(schedule);
    """
    with conn.cursor() as cur:
        cur.execute(sql, (
            row.get("Í∞úÏÑ§ÌïôÎÖÑ"),
            row.get("Ï£ºÍ¥ÄÌïôÍ≥º"),
            row.get("Í≥ºÎ™©Î™Ö"),
            int(row.get("ÏàòÍ∞ïÏù∏Ïõê") or 0),
            row.get("ÍµêÏàòÎ™Ö"),
            row.get("ÏöîÏùºÎ∞èÏãúÍ∞Ñ"),
        ))

# ---------------- Ïú†Ìã∏ ----------------
def build_driver():
    opt = webdriver.ChromeOptions()
    opt.add_argument("--start-maximized")
    opt.add_argument("--disable-gpu")
    return webdriver.Chrome(options=opt)

def norm(s: str) -> str:
    return " ".join((s or "").replace("\xa0", " ").split())

def wait_css(driver, css, timeout=15):
    return WebDriverWait(driver, timeout).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, css))
    )

# ---------------- ÌÖåÏù¥Î∏î ÌååÏã± ----------------
def parse_table_rows(table):
    rows = table.find_elements(By.CSS_SELECTOR, "tr")[1:]
    out = []

    for r in rows:
        tds = r.find_elements(By.CSS_SELECTOR, "td")
        if not tds:
            continue

        # Í≥ºÎ™©(list_subject) ÏúÑÏπò Ï∞æÍ∏∞
        subj_idx = -1
        for i, td in enumerate(tds):
            if "list_subject" in (td.get_attribute("class") or ""):
                subj_idx = i
                break
        if subj_idx < 0:
            continue

        course_name = norm(tds[subj_idx].get_attribute("innerText"))
        opened_grade = norm(tds[0].get_attribute("innerText"))

        # üîµ Ï£ºÍ¥ÄÌïôÍ≥º: Í≥ºÎ™© td Í∏∞Ï§Ä ÏôºÏ™Ω 4Ïπ∏
        try:
            dept_text = norm(tds[subj_idx - 4].get_attribute("innerText"))
        except Exception:
            dept_text = ""

        # ---------------- ÏöîÏùº/ÏãúÍ∞Ñ + ÍµêÏàòÎ™Ö Ìå®ÌÑ¥ Í∏∞Î∞ò Ï∂îÏ∂ú ----------------
        time_idx, time_text = None, ""
        prof_text = ""
        prof_candidates = []
        saw_all = False  # 'Ï†ÑÏ≤¥' Î≥∏ Ï†Å ÏûàÎäîÏßÄ

        for j in range(subj_idx + 1, len(tds)):
            txt = norm(tds[j].get_attribute("innerText"))
            if not txt:
                continue

            # ÏöîÏùº/ÏãúÍ∞ÑÏù∏ÏßÄ Î®ºÏ†Ä Ï≤¥ÌÅ¨
            if TIME_PATTERN.search(txt) or (DAY_PATTERN.search(txt) and any(ch.isdigit() for ch in txt)):
                if time_idx is None:  # Ï≤´ Î≤àÏß∏Î°ú Î∞úÍ≤¨Îêú ÏãúÍ∞Ñ ÏÖÄÎßå ÏÇ¨Ïö©
                    time_idx, time_text = j, txt

            # ÍµêÏàò ÌõÑÎ≥¥ ÏàòÏßë
            # Ïà´ÏûêÎßå, ÏãúÍ∞ÑÌå®ÌÑ¥, ÏôÑÏ†Ñ ÏùòÎØ∏ÏóÜÎäî Í∞íÏùÄ Ï†úÏô∏
            if INT_PATTERN.match(txt):
                continue
            if TIME_PATTERN.search(txt):
                continue
            if txt in INVALID_PROF:
                saw_all = True
                continue

            # ÌïúÍ∏Ä/ÏòÅÎ¨∏ÏûêÍ∞Ä ÏÑûÏù∏ ÌÖçÏä§Ìä∏Îäî ÍµêÏàò Ïù¥Î¶Ñ ÌõÑÎ≥¥Î°ú Î≥∏Îã§
            if re.search(r"[Í∞Ä-Ìû£A-Za-z]", txt):
                prof_candidates.append((j, txt))

        # ÍµêÏàòÎ™Ö ÌôïÏ†ï: ÏãúÍ∞Ñ ÏÖÄ Ïù¥Ï†ÑÏùò ÎßàÏßÄÎßâ ÌõÑÎ≥¥
        if prof_candidates:
            if time_idx is not None:
                prof_candidates = [(idx, t) for idx, t in prof_candidates if idx < time_idx]
            if prof_candidates:
                prof_text = prof_candidates[-1][1]
        # ÌõÑÎ≥¥Í∞Ä Ï†ÑÌòÄ ÏóÜÍ≥† 'Ï†ÑÏ≤¥'Îßå ÏûàÏóàÏúºÎ©¥ 'Ï†ÑÏ≤¥'ÎùºÎèÑ ÎÑ£Ïñ¥Ï§ÄÎã§
        if not prof_text and saw_all:
            prof_text = "Ï†ÑÏ≤¥"

        # ---------------- ÏàòÍ∞ïÏù∏Ïõê ----------------
        enrolled_text = ""
        if time_idx is not None:
            scan_end = time_idx - 1
        else:
            scan_end = len(tds) - 1

        for j in range(scan_end, subj_idx, -1):
            txt = norm(tds[j].get_attribute("innerText"))
            if INT_PATTERN.match(txt):
                enrolled_text = txt
                break

        item = {
            "Í∞úÏÑ§ÌïôÎÖÑ": opened_grade,
            "Ï£ºÍ¥ÄÌïôÍ≥º": dept_text,
            "Í≥ºÎ™©Î™Ö": course_name,
            "ÏàòÍ∞ïÏù∏Ïõê": enrolled_text,
            "ÍµêÏàòÎ™Ö": prof_text,
            "ÏöîÏùºÎ∞èÏãúÍ∞Ñ": time_text,
        }

        if any(item.values()):
            out.append(item)

    return out

# ---------------- ÌåùÏóÖ ÌååÏã± ----------------
def parse_second_page(driver, timeout=15):
    end = time.time() + timeout
    while time.time() < end:
        try:
            driver.switch_to.default_content()
            tables = driver.find_elements(By.CSS_SELECTOR, "#select_list")
            if tables:
                return parse_table_rows(tables[0])

            for f in driver.find_elements(By.TAG_NAME, "iframe"):
                try:
                    driver.switch_to.frame(f)
                    tables = driver.find_elements(By.CSS_SELECTOR, "#select_list")
                    if tables:
                        return parse_table_rows(tables[0])
                finally:
                    driver.switch_to.default_content()

        except Exception:
            pass
        time.sleep(0.4)
    return []

# ---------------- main ----------------
def main():
    conn = db_connect()
    driver = build_driver()
    driver.get(START_URL)

    wait_css(driver, "#table_seoul")

    anchors = driver.find_elements(By.CSS_SELECTOR, '#table_seoul a[href^="javascript:gocn4001"]')

    out_path = "courses.csv"
    need_header = not os.path.exists(out_path)
    f = open(out_path, "a", newline="", encoding="utf-8-sig")
    writer = csv.DictWriter(f, fieldnames=["Í∞úÏÑ§ÌïôÎÖÑ", "Ï£ºÍ¥ÄÌïôÍ≥º", "Í≥ºÎ™©Î™Ö", "ÏàòÍ∞ïÏù∏Ïõê", "ÍµêÏàòÎ™Ö", "ÏöîÏùºÎ∞èÏãúÍ∞Ñ"])
    if need_header:
        writer.writeheader()

    original = driver.current_window_handle

    for idx, a in enumerate(anchors, 1):
        print(f"[INFO] ({idx}/{len(anchors)}) ÎßÅÌÅ¨ ÌÅ¥Î¶≠‚Ä¶")

        before = set(driver.window_handles)
        driver.execute_script("arguments[0].scrollIntoView({block:'center'})", a)
        time.sleep(0.2)
        a.click()
        time.sleep(0.6)

        after = set(driver.window_handles)

        try:
            if len(after) > len(before):
                new_handler = list(after - before)[0]
                driver.switch_to.window(new_handler)

                rows = parse_second_page(driver)
                for row in rows:
                    print("   ", row)
                    writer.writerow(row)
                    save_course(row, conn)

                driver.close()
                driver.switch_to.window(original)

            else:
                rows = parse_second_page(driver)
                for row in rows:
                    print("   ", row)
                    writer.writerow(row)
                    save_course(row, conn)

                driver.back()
                wait_css(driver, "#table_seoul")
                anchors = driver.find_elements(By.CSS_SELECTOR, '#table_seoul a[href^="javascript:gocn4001"]')

        except TimeoutException:
            print("  [TIMEOUT] Í±¥ÎÑàÎúÄ")

        time.sleep(random.uniform(0.6, 1.2))

    conn.close()
    f.close()
    driver.quit()
    print("[DONE] CSV + DB Ï†ÄÏû• ÏôÑÎ£å!")

if __name__ == "__main__":
    main()
