In [100]:
# 작업자 : 권은이

import os, tempfile, time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

USER = "kwoneunei"
PW   = "kwoneunei0511@@"

USER_DATA_DIR = os.path.join(tempfile.gettempdir(), "selenium_profile_everytime")
os.makedirs(USER_DATA_DIR, exist_ok=True)

opts = Options()
# 맥에서는 Linux용 우회 플래그 빼는 게 안정적
opts.add_argument("--start-maximized")
opts.add_argument(f"--user-data-dir={USER_DATA_DIR}")
opts.add_argument("--profile-directory=Default")
opts.add_argument("--no-first-run")
opts.add_argument("--no-default-browser-check")
# 필요시 번역UI만 끄기
opts.add_argument("--disable-features=TranslateUI")

# Chrome 바이너리 지정이 꼭 필요하지는 않음(기본 경로 감지됨).
# opts.binary_location = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"

driver = webdriver.Chrome(options=opts)  # Selenium Manager가 드라이버 자동 준비
wait = WebDriverWait(driver, 15)

driver.get("https://everytime.kr/login")
wait.until(EC.visibility_of_element_located((By.NAME, "id"))).send_keys(USER)
driver.find_element(By.NAME, "password").send_keys(PW)

btns = driver.find_elements(By.XPATH, '//input[@type="submit" and contains(@value,"로그인")]')
(btns[0] if btns else driver).click() if btns else driver.execute_script(
    'document.querySelector(\'form[action="/user/login"]\').submit();'
)

try:
    WebDriverWait(driver, 15).until(EC.any_of(
        EC.url_contains("everytime.kr"),
        EC.presence_of_element_located((By.XPATH, '//a[contains(.,"게시판")]'))
    ))
    print("로그인 성공(추정):", driver.title)
except Exception as e:
    print("로그인 실패/차단:", repr(e))
    driver.save_screenshot("everytime_after_login.png")
    with open("everytime_after_login.html","w",encoding="utf-8") as f:
        f.write(driver.page_source)

로그인 성공(추정): 로그인 - 에브리타임


In [101]:
# -*- coding: utf-8 -*-
# 현재 열린 #subjects 표에서 별점 0이 아닌 강의만 들어가 강의평 수집 → CSV
# 전제: Selenium WebDriver 인스턴스 'driver'가 로그인 상태이며 /timetable에서
#       "수업 목록에서 검색"을 눌러 #subjects 표가 떠 있는 상태.

import csv, os, re, time, json, sys
from typing import List, Dict, Any, Optional
from urllib.parse import urljoin
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, JavascriptException

OUT_CSV   = "et_reviews_nonzero3.csv"
STATE_PATH = OUT_CSV + ".state.json"   # ← 진행 상태 파일
WAIT_SEC  = 60
PAUSE_ART = 60    # 강의평 스크롤 간 대기
MAX_ART   = 100000  # 강의평 스크롤 횟수
PRINT_PFX = "[ET-REV]"
BASE_ORIGIN = "https://everytime.kr"

def log(msg):
    print(f"{PRINT_PFX} {msg}"); sys.stdout.flush()

def abs_url(href: str) -> str:
    return href if href.startswith("http") else urljoin(BASE_ORIGIN, href or "/")

def safe_text(el) -> str:
    try:
        return el.text.strip()
    except Exception:
        return ""

def to_star_from_style(style_text: str) -> Optional[float]:
    if not style_text: return None
    m = re.search(r"width:\s*([\d.]+)%", style_text)
    if not m: return None
    return round(float(m.group(1)) / 20.0, 2)

def parse_rating_on_star_container(star_container) -> Optional[float]:
    try:
        on = star_container.find_element(By.CSS_SELECTOR, "span.on")
        return to_star_from_style(on.get_attribute("style"))
    except Exception:
        return None

# ===== CSV 이어쓰기 보장: 파일 없거나 '빈 파일'일 때만 헤더 생성 =====
def ensure_csv():
    if (not os.path.exists(OUT_CSV)) or (os.path.getsize(OUT_CSV) == 0):
        with open(OUT_CSV, "w", encoding="utf-8", newline="") as f:
            w = csv.writer(f)
            w.writerow([
                "lecture_id","course_code","course_name","professor","schedule","credit","time_hour",
                "division","year_target","note","capacity","enrolled",
                "avg_rating","rating_count","assignment","groupwork","grade_policy","attendance","exam_type",
                "review_star","review_semester","review_text"
            ])

def write_rows(rows: List[Dict[str, Any]]):
    with open(OUT_CSV, "a", encoding="utf-8", newline="") as f:
        w = csv.DictWriter(f, fieldnames=[
            "lecture_id","course_code","course_name","professor","schedule","credit","time_hour",
            "division","year_target","note","capacity","enrolled",
            "avg_rating","rating_count","assignment","groupwork","grade_policy","attendance","exam_type",
            "review_star","review_semester","review_text"
        ])
        for r in rows:
            w.writerow(r)

# ===== 이어하기를 위한 상태 로드/저장 =====
def load_state() -> Dict[str, Any]:
    if os.path.exists(STATE_PATH):
        try:
            with open(STATE_PATH, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception:
            pass
    return {"done_ids": []}

def save_state(state: Dict[str, Any]):
    try:
        with open(STATE_PATH, "w", encoding="utf-8") as f:
            json.dump(state, f, ensure_ascii=False, indent=2)
    except Exception as e:
        log(f"STATE 저장 실패: {e}")

def existing_done_ids_from_csv() -> set:
    done = set()
    if os.path.exists(OUT_CSV):
        try:
            with open(OUT_CSV, "r", encoding="utf-8", newline="") as f:
                r = csv.DictReader(f)
                for row in r:
                    lid = str(row.get("lecture_id") or "").strip()
                    if lid:
                        done.add(lid)
        except Exception:
            pass
    return done

# ===== 표 접근/파싱 =====
def find_subjects_table():
    # 이미 레이어가 떠있다면 #subjects 존재
    sec = WebDriverWait(driver, WAIT_SEC).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "#subjects"))
    )
    lst = sec.find_element(By.CSS_SELECTOR, ".list")
    table = lst.find_element(By.CSS_SELECTOR, "table")
    return lst, table

def scroll_table_all(lst, table):
    prev, stagnant = 0, 0
    for _ in range(700):
        try:
            driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight;", lst)
        except JavascriptException:
            try:
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            except JavascriptException:
                pass
        time.sleep(0.6)
        cur = len(table.find_elements(By.CSS_SELECTOR, "tbody tr"))
        if cur > prev:
            prev, stagnant = cur, 0
        else:
            stagnant += 1
        if stagnant >= 6:
            break

def parse_rows_nonzero(table) -> List[Dict[str, Any]]:
    metas = []
    trs = table.find_elements(By.CSS_SELECTOR, "tbody tr")
    for tr in trs:
        tds = tr.find_elements(By.CSS_SELECTOR, "td")
        if len(tds) < 14: 
            continue
        # 별 링크
        try:
            a = tds[8].find_element(By.CSS_SELECTOR, 'a.star')
        except Exception:
            continue
        title = (a.get_attribute("title") or "0").strip()
        if title == "0":
            continue  # ★ 별점 0 건 제외

        href = a.get_attribute("href") or a.get_attribute("data-href") or ""
        review_url = abs_url(href)
        m = re.search(r"/lecture/view/(\d+)", review_url)
        if not m:
            continue
        lecture_id = m.group(1)

        metas.append({
            "lecture_id": lecture_id,
            "course_code": safe_text(tds[1]),
            "course_name": safe_text(tds[2]),
            "professor":   safe_text(tds[3]),
            "schedule":    safe_text(tds[4]),
            "credit":      safe_text(tds[5]),
            "time_hour":   safe_text(tds[6]),
            "division":    safe_text(tds[0]),
            "year_target": safe_text(tds[11]),
            "note":        safe_text(tds[13]),
            "capacity":    safe_text(tds[10]),
            "enrolled":    safe_text(tds[9]),
            "review_url":  review_url,
        })
    # lecture_id 중복 제거
    dedup = {}
    for m in metas:
        dedup[m["lecture_id"]] = m
    return list(dedup.values())

# ===== 강의평 페이지 =====
def wait_review_visible(timeout=WAIT_SEC):
    WebDriverWait(driver, timeout).until(
        lambda d: (
            len(d.find_elements(By.CSS_SELECTOR, "section.review")) > 0 or
            len(d.find_elements(By.CSS_SELECTOR, ".article_tab .articles")) > 0
        )
    )

def open_article_tab(url: str):
    url2 = url if "?tab=article" in url else (url + ("&" if "?" in url else "?") + "tab=article")
    driver.get(url2)
    wait_review_visible()
    time.sleep(0.6)

def extract_summary() -> Dict[str, Optional[str]]:
    data = {"avg_rating":None,"rating_count":None,"assignment":None,"groupwork":None,
            "grade_policy":None,"attendance":None,"exam_type":None}
    sec = None
    panes = None
    try:
        s = driver.find_elements(By.CSS_SELECTOR, "section.review")
        if s: sec = s[0]
    except Exception: pass
    try:
        panes = driver.find_elements(By.CSS_SELECTOR, ".article_tab")
    except Exception: pass

    if sec:
        try:
            data["avg_rating"] = safe_text(sec.find_element(By.CSS_SELECTOR, ".rating .title .average")) or None
        except Exception: pass
        try:
            t = safe_text(sec.find_element(By.CSS_SELECTOR, ".rating .title .count"))
            m = re.search(r"(\d+)\s*개", t)
            data["rating_count"] = m.group(1) if m else None
        except Exception: pass
        try:
            for subj in sec.find_elements(By.CSS_SELECTOR, ".assessment .summary .subjective"):
                h3 = safe_text(subj.find_element(By.CSS_SELECTOR, "h3"))
                val = safe_text(subj.find_element(By.CSS_SELECTOR, "span"))
                if "과제" in h3: data["assignment"] = val
                elif "조모임" in h3: data["groupwork"] = val
                elif "성적" in h3: data["grade_policy"] = val
        except Exception: pass
        try:
            for det in sec.find_elements(By.CSS_SELECTOR, ".assessment .detail"):
                title = safe_text(det.find_element(By.CSS_SELECTOR, "h3"))
                opts  = [safe_text(x) for x in det.find_elements(By.CSS_SELECTOR, ".options span") if safe_text(x)]
                if "출결" in title: data["attendance"] = ", ".join(opts) if opts else None
                elif "시험" in title: data["exam_type"] = ", ".join(opts) if opts else None
        except Exception: pass

    if (not data["avg_rating"]) and panes:
        try:
            data["avg_rating"] = safe_text(panes[0].find_element(By.CSS_SELECTOR, ".header .average .title")) or None
        except Exception: pass
    return data

def find_articles():
    arts = driver.find_elements(By.CSS_SELECTOR, "section.review .articles .article")
    if arts: return arts
    return driver.find_elements(By.CSS_SELECTOR, ".article_tab .articles .article")

def scroll_collect_articles():
    seen, stagn = 0, 0
    for _ in range(MAX_ART):
        curr = find_articles()
        if len(curr) > seen:
            seen, stagn = len(curr), 0
        else:
            stagn += 1
        if stagn >= 4: break
        try:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        except JavascriptException:
            pass
        time.sleep(PAUSE_ART)
    return find_articles()

def extract_article_row(article_el) -> Dict[str, Optional[str]]:
    row = {"review_star": None, "review_semester": None, "review_text": None}
    # 별점
    star_container = None
    for sel in [".rate .star", ".title .rate .star", ".star"]:
        if star_container: break
        try:
            star_container = article_el.find_element(By.CSS_SELECTOR, sel)
        except Exception: pass
    if star_container:
        row["review_star"] = parse_rating_on_star_container(star_container)
    # 수강학기
    for sel in [".info .semester", ".semester"]:
        if row["review_semester"]: break
        try:
            row["review_semester"] = safe_text(article_el.find_element(By.CSS_SELECTOR, sel))
        except Exception: pass
    # 본문
    for sel in [".text", ".content", ".body"]:
        if row["review_text"]: break
        try:
            row["review_text"] = safe_text(article_el.find_element(By.CSS_SELECTOR, sel))
        except Exception: pass
    return row

def crawl_one(meta: Dict[str, Any]) -> List[Dict[str, Any]]:
    open_article_tab(meta["review_url"])
    summary = extract_summary()
    arts = scroll_collect_articles()
    if not arts:
        return [{
            **{k: meta[k] for k in ["lecture_id","course_code","course_name","professor","schedule","credit","time_hour",
                                    "division","year_target","note","capacity","enrolled"]},
            **summary,
            "review_star": None, "review_semester": None, "review_text": None
        }]
    out = []
    for a in arts:
        ar = extract_article_row(a)
        out.append({
            **{k: meta[k] for k in ["lecture_id","course_code","course_name","professor","schedule","credit","time_hour",
                                    "division","year_target","note","capacity","enrolled"]},
            **summary,
            "review_star": ar["review_star"],
            "review_semester": ar["review_semester"],
            "review_text": ar["review_text"],
        })
    return out

def main():
    ensure_csv()

    # ==== 이어하기 준비: STATE + CSV ====
    state = load_state()
    done_ids = set(str(x) for x in state.get("done_ids", []))
    done_ids |= existing_done_ids_from_csv()
    if done_ids:
        log(f"이어하기: 이미 처리된 lecture_id {len(done_ids)}개 건너뜀")

    # 표 확보/파싱
    log("표 컨텍스트(#subjects) 확보 중…")
    lst, table = find_subjects_table()

    log("표 끝까지 스크롤 중…")
    scroll_table_all(lst, table)

    log("별점 0 제외하고 행 파싱…")
    metas_all = parse_rows_nonzero(table)
    # 아직 안 한 것만 대상으로
    metas = [m for m in metas_all if str(m["lecture_id"]) not in done_ids]
    log(f"대상 강의 수: {len(metas)} (전체 {len(metas_all)} 중 미처리만)")

    done = 0
    for i, meta in enumerate(metas, 1):
        lid = str(meta['lecture_id'])
        log(f"[{i}/{len(metas)}] lecture_id={lid} {meta['course_name']} / {meta['professor']}")
        try:
            rows = crawl_one(meta)
            write_rows(rows)
            done += 1

            # ─ 상태 즉시 반영(중단 내성)
            done_ids.add(lid)
            state["done_ids"] = sorted(list(done_ids))
            save_state(state)

            log(f"  └ 저장 완료. 누적 처리 {done}개")
        except TimeoutException:
            log("  └ 타임아웃 → 건너뜀")
        except Exception as e:
            log(f"  └ 오류: {e} → 건너뜀")
        time.sleep(0.6)

    log(f"완료. 처리 강의: {done}개")
    log(f"CSV: {OUT_CSV}")
    log(f"STATE: {STATE_PATH}")

# 실행
if __name__ == "__main__":
    main()

[ET-REV] 이어하기: 이미 처리된 lecture_id 885개 건너뜀
[ET-REV] 표 컨텍스트(#subjects) 확보 중…
[ET-REV] 표 끝까지 스크롤 중…
[ET-REV] 별점 0 제외하고 행 파싱…
[ET-REV] 대상 강의 수: 59 (전체 944 중 미처리만)
[ET-REV] [1/59] lecture_id=2941504 [SW와AI]AI와데이터기초 / 이미향
[ET-REV]   └ 저장 완료. 누적 처리 1개
[ET-REV] [2/59] lecture_id=1696452 생물통계실습 / 이채영
[ET-REV]   └ 저장 완료. 누적 처리 2개
[ET-REV] [3/59] lecture_id=2322078 생화학1 / 심가용
[ET-REV]   └ 저장 완료. 누적 처리 3개
[ET-REV] [4/59] lecture_id=2322082 세포생물학 / 박상연
[ET-REV]   └ 저장 완료. 누적 처리 4개
[ET-REV] [5/59] lecture_id=2941795 오믹스 / 이채영
[ET-REV]   └ 저장 완료. 누적 처리 5개
[ET-REV] [6/59] lecture_id=2609226 응용미생물학 / 서정아
[ET-REV]   └ 저장 완료. 누적 처리 6개
[ET-REV] [7/59] lecture_id=408294 일반생물2및실험 / 김미연
[ET-REV]   └ 저장 완료. 누적 처리 7개
[ET-REV] [8/59] lecture_id=310698 통계수학 / 권혁성
[ET-REV]   └ 저장 완료. 누적 처리 8개
[ET-REV] [9/59] lecture_id=2321357 보험원론 / 홍지민
[ET-REV]   └ 저장 완료. 누적 처리 9개
[ET-REV] [10/59] lecture_id=314696 의사결정론 / 김성철
[ET-REV]   └ 저장 완료. 누적 처리 10개
[ET-REV] [11/59] lecture_id=2609234 미적분학2 / 김미미
[ET-REV]   └ 저장 완료. 누적 처