In [1]:
import re
import time
from dataclasses import dataclass
from datetime import datetime
from urllib.parse import urljoin
from html import escape

import requests
import pandas as pd
from bs4 import BeautifulSoup
from dateutil import parser as dtparser


# ======================
# 설정
# ======================
LIST_URL = "https://portal.koreatech.ac.kr/ctt/bb/bulletin?b=14&ls=20&ln={page}&dm=l"
DETAIL_URL = "https://portal.koreatech.ac.kr/ctt/bb/bulletin?b=14&ls=20&ln=1&dm=r&p={post_id}"

TARGET_CATEGORY = "근로장학생 모집"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
    "Accept-Language": "ko-KR,ko;q=0.9,en;q=0.8",
}

REQUEST_DELAY_SEC = 0.4  # 너무 빠르게 긁지 말기


# ======================
# 필터 기준
# ======================
@dataclass
class Criteria:
    # 초과학기/휴학생 등 "불가"면 제외 (단, '가능' 문구면 제외하지 않음. 애매하면 UNKNOWN)
    exclude_if_disqualified: bool = True

    # 등록일 기준 필터
    posted_at_after: datetime | None = None  # inclusive lower bound
    posted_at_before: datetime | None = None # exclusive upper bound (2025년만 보려면 필요)

    # 마감일 기준 필터(원하면)
    deadline_after: datetime | None = None   # 예: datetime.now()


# ✅ 예시: 2025년 글만 보고 싶으면 아래처럼
CRITERIA = Criteria(
    exclude_if_disqualified=True,
    posted_at_after=datetime(2025, 1, 1),
    posted_at_before=datetime(2026, 1, 1),
    deadline_after=None
)


# ======================
# 텍스트 정규화
# ======================
def norm(s: str) -> str:
    s = s.replace("\u3000", " ")
    s = re.sub(r"\s+", " ", s).strip().lower()
    return s


# ======================
# (핵심) 오탐 방지용: 가능/불가 문맥 판정
# ======================
RE_NEG = re.compile(r"(불가|제외|금지|제한|신청\s*불가|지원\s*불가|대상\s*아님|해당\s*없음|중복\s*불가)")
RE_POS = re.compile(r"(가능|허용|무관|상관\s*없|제한\s*없|포함|신청\s*가능|지원\s*가능)")

KW_OVERS = re.compile(r"(초과\s*학기(생)?|초과등록|초과수학)")
KW_LEAVE = re.compile(r"(휴학\s*생|휴학생)")
KW_GRAD = re.compile(r"(졸업\s*생|졸업생|수료생)")

# 추가로 자주 나오는 제한 문구들(원하면 더 확장)
KW_NATIONAL = re.compile(r"(국가\s*근로)")
KW_SEMESTER_WORK = re.compile(r"(학기\s*근로(생)?)")
KW_NIGHT_PATROL = re.compile(r"(야간\s*순찰)")
KW_MAX_40H = re.compile(r"(40\s*h|40시간|주\s*40\s*시간|최대\s*시간).{0,10}(초과)")
KW_TIMETABLE_CONFLICT = re.compile(r"(수강\s*시간표|수업\s*시간|시간표).{0,30}(중복|겹|충돌)|중복\s*불가.*\(온라인포함\)")

DISQUALIFY_KEYWORDS = {
    "초과학기": KW_OVERS,
    "휴학생": KW_LEAVE,
    "졸업/수료": KW_GRAD,
    "국가근로": KW_NATIONAL,
    "학기근로": KW_SEMESTER_WORK,
    "야간순찰": KW_NIGHT_PATROL,
    "최대시간40h": KW_MAX_40H,
    "시간표중복": KW_TIMETABLE_CONFLICT,
}

RE_NOTICE_BLOCK = re.compile(
    r"(※\s*유의사항|유의사항|지원대상|신청\s*자격|제한사항|불가\s*대상)\s*[:：]?\s*(.+)",
    re.IGNORECASE | re.DOTALL
)

def extract_notice_block(text: str) -> str:
    m = RE_NOTICE_BLOCK.search(text)
    if not m:
        return text  # 못 찾으면 전체 검사(보수적)
    return m.group(0)[:2500]

def window_around(text: str, match: re.Match, radius: int = 45) -> str:
    s = match.start()
    e = match.end()
    return text[max(0, s - radius): min(len(text), e + radius)]

def classify_keyword(text: str, kw_pat: re.Pattern) -> bool | None:
    """
    return:
      True  = '불가/제외' 쪽으로 확실
      False = '가능/허용' 쪽으로 확실
      None  = 애매(UNKNOWN)
    """
    t = norm(text)
    matches = list(kw_pat.finditer(t))
    if not matches:
        return None

    saw_neg = False
    saw_pos = False

    for m in matches:
        w = window_around(t, m, radius=55)
        if RE_NEG.search(w):
            saw_neg = True
        if RE_POS.search(w):
            saw_pos = True

    # 가능만 있으면 허용
    if saw_pos and not saw_neg:
        return False
    # 불가만 있으면 제외
    if saw_neg and not saw_pos:
        return True
    # 둘 다면 애매
    return None

def disqualify_status(body_text: str) -> tuple[str, list[str]]:
    """
    returns:
      status: "PASS" | "BLOCK" | "UNKNOWN"
      reasons: 매칭된 키워드 리스트
    """
    block = extract_notice_block(body_text)

    reasons_block = []
    reasons_unknown = []

    for name, pat in DISQUALIFY_KEYWORDS.items():
        res = classify_keyword(block, pat)
        if res is True:
            reasons_block.append(name)
        elif res is None:
            # 키워드는 있는데 가능/불가 문맥이 불명확한 케이스
            # 단, 키워드 자체가 안 나왔으면 classify_keyword가 None이므로 여기로 안 옴.
            reasons_unknown.append(name)

    if reasons_block:
        return "BLOCK", reasons_block

    # UNKNOWN은 "키워드 등장했는데" 문맥이 애매한 경우만.
    # (키워드가 한 번도 등장 안 하면 reasons_unknown도 비어있음)
    if reasons_unknown:
        return "UNKNOWN", reasons_unknown

    return "PASS", []


# ======================
# 마감일 추출(정확도 위해 폴백 제거)
# ======================
RE_DEADLINE = re.compile(
    r"(마감|접수|신청|모집)\s*(기간|일시|까지)?\s*[:\-]?\s*"
    r"(?P<date>"
    r"\d{4}[-./]\d{1,2}[-./]\d{1,2}(?:\s*\(?[월화수목금토일]\)?\s*)?(?:\s*\d{1,2}:\d{2})?"
    r"|"
    r"\d{1,2}[-./]\d{1,2}(?:\s*\(?[월화수목금토일]\)?\s*)?(?:\s*\d{1,2}:\d{2})?"
    r")"
)

def extract_deadline(text: str, default_year: int | None = None) -> datetime | None:
    m = RE_DEADLINE.search(text)
    if not m:
        return None

    raw = m.group("date").strip().replace(".", "-").replace("/", "-")
    if default_year and re.match(r"^\d{1,2}-\d{1,2}", raw) and not re.match(r"^\d{4}-", raw):
        raw = f"{default_year}-{raw}"
    try:
        return dtparser.parse(raw, fuzzy=True)
    except:
        return None


# ======================
# 크롤링: 목록 파싱
# ======================
def fetch(session: requests.Session, url: str) -> str:
    r = session.get(url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    return r.text


def parse_list(html: str) -> list[dict]:
    soup = BeautifulSoup(html, "lxml")
    items = []

    # 1) 테이블 기반 파싱 시도
    rows = soup.select("table tbody tr")
    if rows:
        for tr in rows:
            tds = tr.find_all("td")
            if len(tds) < 6:
                continue

            post_id = tds[0].get_text(strip=True)
            category = tds[1].get_text(strip=True)
            title = tds[2].get_text(" ", strip=True)
            posted_at = tds[3].get_text(strip=True)
            writer = tds[4].get_text(strip=True)

            if not post_id.isdigit():
                continue

            if "근로" not in category:
                continue

            link = DETAIL_URL.format(post_id=post_id)

            items.append({
                "post_id": post_id,
                "category": category,
                "title": title,
                "posted_at": posted_at,
                "writer": writer,
                "link": link,
            })

        if items:
            return items

    # 2) fallback: 텍스트에서 파싱
    text = soup.get_text("\n", strip=True)
    pattern = re.compile(
        r"(?m)^(?P<id>\d{5,})\s+(?P<cat>[^\n]+?)\s*\n(?P<title>[^\n]+?)\s*\n(?P<date>\d{4}-\d{2}-\d{2})\s+(?P<writer>[^\s]+)"
    )

    for m in pattern.finditer(text):
        post_id = m.group("id")
        category = m.group("cat").strip()
        title = m.group("title").strip()
        posted_at = m.group("date").strip()
        writer = m.group("writer").strip()

        if "근로" not in category and "근로" not in title:
            continue

        link = DETAIL_URL.format(post_id=post_id)

        items.append({
            "post_id": post_id,
            "category": category,
            "title": title,
            "posted_at": posted_at,
            "writer": writer,
            "link": link,
        })

    uniq = {it["post_id"]: it for it in items}
    return list(uniq.values())


# ======================
# 상세 본문 텍스트 얻기
# ======================
def parse_detail_text(html: str) -> str:
    soup = BeautifulSoup(html, "lxml")

    candidates = [
        ".bbs_view", ".board-view", ".view", ".content", "#content", ".article", ".cont"
    ]
    for sel in candidates:
        el = soup.select_one(sel)
        if el and el.get_text(strip=True):
            return el.get_text("\n", strip=True)

    return soup.get_text("\n", strip=True)


# ======================
# 필터 적용
# ======================
def passes(item: dict, body_text: str) -> tuple[bool, dict]:
    info = {}

    # (1) 결격/UNKNOWN 판정
    status, reasons = disqualify_status(body_text)
    info["dq_status"] = status          # PASS / BLOCK / UNKNOWN
    info["dq_reasons"] = reasons        # 리스트

    if CRITERIA.exclude_if_disqualified and status == "BLOCK":
        return False, info

    # (2) 등록일 필터
    try:
        posted_dt = dtparser.parse(item["posted_at"])
        info["posted_dt"] = posted_dt
    except:
        posted_dt = None
        info["posted_dt"] = None

    if CRITERIA.posted_at_after and posted_dt:
        if posted_dt < CRITERIA.posted_at_after:
            return False, info

    if CRITERIA.posted_at_before and posted_dt:
        if posted_dt >= CRITERIA.posted_at_before:
            return False, info

    # (3) 마감일 필터(원하면)
    dl = extract_deadline(body_text, default_year=datetime.now().year)
    info["deadline"] = dl
    if CRITERIA.deadline_after and dl:
        if dl < CRITERIA.deadline_after:
            return False, info

    return True, info

def disqualify_status(body_text: str) -> tuple[str, list[str]]:
    block = extract_notice_block(body_text)

    reasons_block = []
    reasons_unknown = []
    any_keyword_seen = False

    for name, pat in DISQUALIFY_KEYWORDS.items():
        # 키워드가 아예 등장했는지 체크
        if pat.search(norm(block)):
            any_keyword_seen = True

        res = classify_keyword(block, pat)
        if res is True:
            reasons_block.append(name)
        elif res is None:
            # 키워드는 있는데 가능/불가 문맥이 불명확한 케이스(혹은 충돌)
            # classify_keyword가 키워드 없으면 None도 가능하니, any_keyword_seen로 후처리
            if pat.search(norm(block)):
                reasons_unknown.append(name)

    if reasons_block:
        return "BLOCK", reasons_block

    if reasons_unknown:
        return "UNKNOWN", reasons_unknown

    # ✅ 조건 관련 키워드 자체가 안 나오면 "조건 미기재"
    if not any_keyword_seen:
        return "NO_CONDITION", []

    return "PASS", []


In [2]:
from html import escape
from datetime import datetime

def make_board_html(df: pd.DataFrame, out_path: str = "koreatech_workstudy_board.html"):
    def badge(status: str) -> str:
        if status == "UNKNOWN":
            return '<span class="badge badge-warn">검토필요</span>'
        if status == "NO_CONDITION":
            return '<span class="badge badge-ok">조건 미기재</span>'
        return ""

    rows_html = []
    for _, r in df.iterrows():
        num = escape(str(r.get("글번호", "")))
        title = escape(str(r.get("제목", "")))
        date = escape(str(r.get("등록일", "")))   # "YYYY-MM-DD" 기대
        writer = escape(str(r.get("작성자", "")))
        link = escape(str(r.get("링크", "")).strip())
        status = str(r.get("판정", "PASS"))
        reasons = escape(str(r.get("사유", "")))

        reason_html = f'<div class="reason">{reasons}</div>' if reasons else ""

        # 팝업 열기: window.open (새탭 아니라 작은 창)
        rows_html.append(f"""
        <tr data-date="{date}">
          <td class="num">{num}</td>
          <td class="title">
            {badge(status)}
            <a href="#" class="postlink" data-url="{link}">{title}</a>
            {reason_html}
          </td>
          <td class="date">{date}</td>
          <td class="writer">{writer}</td>
        </tr>
        """)

    generated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    html = f"""<!doctype html>
<html lang="ko">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1"/>
  <title>근로장학생 모집 필터 결과</title>
  <style>
    body {{
      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Noto Sans KR", Arial, sans-serif;
      background: #f6f8fb;
      margin: 0;
      padding: 20px;
      color: #1f2937;
    }}
    .wrap {{ max-width: 1100px; margin: 0 auto; }}
    .header {{ display:flex; justify-content: space-between; align-items: flex-end; gap: 12px; margin-bottom: 12px; }}
    h1 {{ font-size: 20px; margin: 0; }}
    .meta {{ font-size: 12px; color: #6b7280; white-space: nowrap; }}
    .card {{
      background: white;
      border: 1px solid #e5e7eb;
      border-radius: 12px;
      overflow: hidden;
      box-shadow: 0 1px 2px rgba(0,0,0,0.04);
    }}

    .toolbar {{
      display:flex;
      flex-wrap: wrap;
      gap: 10px;
      padding: 12px;
      border-bottom: 1px solid #eef2f7;
      background: #fbfdff;
      align-items: center;
    }}
    .toolbar .grow {{ flex: 1 1 260px; }}
    .toolbar input[type="text"] {{
      width: 100%;
      padding: 10px 12px;
      border: 1px solid #e5e7eb;
      border-radius: 10px;
      outline: none;
      font-size: 14px;
    }}
    .toolbar .datebox {{
      display:flex;
      gap: 8px;
      align-items:center;
      flex: 0 0 auto;
    }}
    .toolbar input[type="date"] {{
      padding: 8px 10px;
      border: 1px solid #e5e7eb;
      border-radius: 10px;
      outline: none;
      font-size: 13px;
      background: white;
    }}
    .toolbar button {{
      padding: 9px 10px;
      border: 1px solid #e5e7eb;
      background: white;
      border-radius: 10px;
      cursor: pointer;
      font-size: 13px;
    }}
    .toolbar button:hover {{
      background: #f3f4f6;
    }}

    table {{ width: 100%; border-collapse: collapse; }}
    thead th {{
      text-align: left;
      font-size: 13px;
      padding: 12px;
      background: #f3f4f6;
      border-bottom: 1px solid #e5e7eb;
      color: #374151;
    }}
    tbody td {{
      padding: 12px;
      border-bottom: 1px solid #eef2f7;
      font-size: 14px;
      vertical-align: top;
    }}
    tbody tr:hover {{ background: #f9fafb; }}
    td.num {{ width: 90px; color:#6b7280; }}
    td.date {{ width: 120px; color:#6b7280; }}
    td.writer {{ width: 160px; color:#6b7280; }}

    /* 제목: 더 두껍게 + 2px 정도 키움 */
    td.title a {{
      color: #111827;
      text-decoration: none;
      font-weight: 700;
      font-size: 16px;
    }}
    td.title a:hover {{ text-decoration: underline; }}

    .badge {{
      display:inline-block;
      font-size: 12px;
      padding: 3px 8px;
      border-radius: 999px;
      margin-right: 8px;
      vertical-align: middle;
      border: 1px solid transparent;
    }}
    .badge-warn {{
      background: #fef3c7;
      color: #92400e;
      border-color: #fde68a;
    }}
    .badge-ok {{
      background: #dcfce7;
      color: #166534;
      border-color: #86efac;
    }}

    .reason {{
      margin-top: 6px;
      font-size: 12px;
      color: #6b7280;
    }}

    .pager {{
      display:flex;
      justify-content: space-between;
      align-items: center;
      gap: 10px;
      padding: 12px;
      background: #fbfdff;
      border-top: 1px solid #eef2f7;
    }}
    .pager .info {{
      font-size: 12px;
      color: #6b7280;
    }}
    .pager .btns {{
      display:flex;
      gap: 8px;
    }}
    .pager button {{
      padding: 8px 10px;
      border: 1px solid #e5e7eb;
      background: white;
      border-radius: 10px;
      cursor: pointer;
      font-size: 13px;
    }}
    .pager button:disabled {{
      cursor: not-allowed;
      opacity: 0.5;
    }}

    .empty {{ padding: 18px; color: #6b7280; }}
  </style>
</head>
<body>
  <div class="wrap">
    <div class="header">
      <h1>근로장학생 모집 필터 결과</h1>
      <div class="meta">생성: {generated_at} · 총 <span id="totalCnt">{len(df)}</span>개</div>
    </div>

    <div class="card">
      <div class="toolbar">
        <div class="grow">
          <input id="q" type="text" placeholder="검색: 제목/작성자/번호 (날짜는 기간필터로)" />
        </div>

        <div class="datebox">
          <span style="font-size:12px;color:#6b7280;">기간</span>
          <input id="from" type="date" />
          <span style="font-size:12px;color:#6b7280;">~</span>
          <input id="to" type="date" />
          <button onclick="clearDates()">초기화</button>
        </div>
      </div>

      <table>
        <thead>
          <tr>
            <th>글번호</th>
            <th>제목</th>
            <th>등록일</th>
            <th>작성자</th>
          </tr>
        </thead>
        <tbody id="tbody">
          {''.join(rows_html) if rows_html else '<tr><td class="empty" colspan="4">결과가 없습니다.</td></tr>'}
        </tbody>
      </table>

      <div class="pager">
        <div class="info">
          표시: <span id="shownCnt">0</span>개 / 필터 결과: <span id="filteredCnt">0</span>개
        </div>
        <div class="btns">
          <button id="prevBtn" onclick="prevPage()">이전</button>
          <button id="nextBtn" onclick="nextPage()">다음</button>
        </div>
      </div>
    </div>
  </div>

  <script>
    const PAGE_SIZE = 30;
    let page = 1;
    let filteredRows = [];

    function ymdToNum(s) {{
      // "YYYY-MM-DD" -> 20250201
      if (!s || s.length < 10) return null;
      return parseInt(s.slice(0,4) + s.slice(5,7) + s.slice(8,10));
    }}

    function getFilters() {{
      const q = document.getElementById('q').value.toLowerCase().trim();
      const from = document.getElementById('from').value;
      const to = document.getElementById('to').value;
      return {{
        q,
        fromNum: from ? ymdToNum(from) : null,
        toNum: to ? ymdToNum(to) : null
      }};
    }}

    function applyFilters() {{
      const tbody = document.getElementById('tbody');
      const allRows = Array.from(tbody.querySelectorAll('tr'));

      // 빈 결과 row 처리(초기 HTML에 있을 수도)
      const realRows = allRows.filter(r => r.querySelector('.postlink'));

      const {{ q, fromNum, toNum }} = getFilters();

      filteredRows = realRows.filter(tr => {{
        // 텍스트 검색(날짜 제외: 제목/작성자/번호 위주)
        const num = (tr.querySelector('.num')?.innerText || '').toLowerCase();
        const title = (tr.querySelector('.title')?.innerText || '').toLowerCase();
        const writer = (tr.querySelector('.writer')?.innerText || '').toLowerCase();
        const textOK = (!q) || (num.includes(q) || title.includes(q) || writer.includes(q));

        // 기간 필터(등록일 기반)
        const d = tr.getAttribute('data-date');
        const dNum = ymdToNum(d);
        let dateOK = true;
        if (fromNum && dNum) dateOK = dateOK && (dNum >= fromNum);
        if (toNum && dNum) dateOK = dateOK && (dNum <= toNum);

        return textOK && dateOK;
      }});

      page = 1;
      renderPage();
    }}

    function renderPage() {{
      const tbody = document.getElementById('tbody');
      const allRows = Array.from(tbody.querySelectorAll('tr'));
      const realRows = allRows.filter(r => r.querySelector('.postlink'));

      // 일단 다 숨김
      realRows.forEach(tr => tr.style.display = 'none');

      const totalFiltered = filteredRows.length;
      const start = (page - 1) * PAGE_SIZE;
      const end = Math.min(start + PAGE_SIZE, totalFiltered);

      for (let i = start; i < end; i++) {{
        filteredRows[i].style.display = '';
      }}

      document.getElementById('filteredCnt').innerText = totalFiltered.toString();
      document.getElementById('shownCnt').innerText = (end - start).toString();

      document.getElementById('prevBtn').disabled = (page <= 1);
      document.getElementById('nextBtn').disabled = (end >= totalFiltered);
    }}

    function nextPage() {{
      page += 1;
      renderPage();
    }}

    function prevPage() {{
      if (page > 1) page -= 1;
      renderPage();
    }}

    function clearDates() {{
      document.getElementById('from').value = '';
      document.getElementById('to').value = '';
      applyFilters();
    }}

    // 팝업창으로 열기(새탭 X)
    function openPopup(url) {{
      const w = 1000;
      const h = 800;
      const left = Math.max(0, Math.floor((screen.width - w) / 2));
      const top = Math.max(0, Math.floor((screen.height - h) / 2));
      window.open(url, 'workstudy_post',
        `width=${{w}},height=${{h}},left=${{left}},top=${{top}},resizable=yes,scrollbars=yes`);
    }}

    // 이벤트 바인딩
    document.addEventListener('click', (e) => {{
      const a = e.target.closest('.postlink');
      if (!a) return;
      e.preventDefault();
      const url = a.getAttribute('data-url');
      if (url) openPopup(url);
    }});

    document.getElementById('q').addEventListener('input', applyFilters);
    document.getElementById('from').addEventListener('change', applyFilters);
    document.getElementById('to').addEventListener('change', applyFilters);

    // 초기 렌더
    applyFilters();
  </script>
</body>
</html>
"""

    with open(out_path, "w", encoding="utf-8") as f:
        f.write(html)

    print("HTML 저장 완료:", out_path)


In [3]:

# ======================
# 메인
# ======================
def crawl(max_pages: int = 10, max_items: int = 300) -> pd.DataFrame:
    session = requests.Session()

    out = []
    seen = set()

    for page in range(1, max_pages + 1):
        html = fetch(session, LIST_URL.format(page=page))
        items = parse_list(html)

        if not items:
            break

        for it in items:
            if it["post_id"] in seen:
                continue
            seen.add(it["post_id"])

            time.sleep(REQUEST_DELAY_SEC)
            dhtml = fetch(session, it["link"])
            body = parse_detail_text(dhtml)

            ok, info = passes(it, body)

            print("제목:", it["title"])
            print("등록일:", it["posted_at"])
            print("판정:", info.get("dq_status"), "사유:", info.get("dq_reasons"))
            print("--------------------")

            if not ok:
                continue

            out.append({
                "글번호": it["post_id"],
                "제목": it["title"],
                "등록일": it["posted_at"],
                "작성자": it["writer"],
                "링크": it["link"],
                "판정": info.get("dq_status", "PASS"),
                "사유": ",".join(info.get("dq_reasons", [])),
                "마감일추정": info.get("deadline"),
            })

            if len(out) >= max_items:
                break

        if len(out) >= max_items:
            break

    return pd.DataFrame(out, columns=["글번호","제목","등록일","작성자","링크","판정","사유","마감일추정"])



In [4]:

if __name__ == "__main__":
    df = crawl(max_pages=50)

    # 보기 좋게 정렬 (등록일 내림차순)
    if not df.empty:
        df["등록일_dt"] = pd.to_datetime(df["등록일"], errors="coerce")
        df = df.sort_values("등록일_dt", ascending=False).drop(columns=["등록일_dt"])

    print(df.head(30).to_string(index=False))

제목: [근로장학생 모집]{마감] [전문대학원교학팀] 대학원 학위수여식(2/19, 2/20) 근로장학생 모집
등록일: 2026-02-04
판정: BLOCK 사유: ['초과학기', '휴학생', '졸업/수료', '국가근로']
--------------------
제목: ▶학생식당 단기알바 근로장학생 모집 ◀
등록일: 2026-02-04
판정: PASS 사유: []
--------------------
제목: [마감] [학사팀] 전공설명회 및 오리엔테이션 행사 지원 근로장학생 모집
등록일: 2026-02-03
판정: UNKNOWN 사유: ['국가근로']
--------------------
제목: [대학원교학팀] 단기 근로 장학생을 모집합니다. _모집 완료 마감
등록일: 2026-02-03
판정: UNKNOWN 사유: ['초과학기']
--------------------
제목: [Edutech센터][마감] [단기] 미래교육혁신처 Edutech센터 2026년도 2월 중 근로장학생(K-LXP서포터즈) 모집
등록일: 2026-01-28
판정: BLOCK 사유: ['초과학기', '휴학생', '졸업/수료']
--------------------
제목: [IPP센터]단기 근로장학생을 모집합니다.
등록일: 2026-01-28
판정: BLOCK 사유: ['국가근로']
--------------------
제목: [온라인평생교육원] 플랫폼운영팀 근로장학생 모집(~1. 30(금))_마감
등록일: 2026-01-28
판정: BLOCK 사유: ['휴학생', '졸업/수료']
--------------------
제목: 2캠퍼스 HRD교육팀 근로장학생 모집(2.1까지)
등록일: 2026-01-26
판정: UNKNOWN 사유: ['초과학기', '졸업/수료']
--------------------
제목: [정보화운영팀] 단기근로장학생 (디자인) 모집
등록일: 2026-01-26
판정: NO_CONDITION 사유: []
--------------------
제목: (마감) [학사팀] 2026학

In [6]:

    # CSV 저장
    df.to_csv("koreatech_workstudy_filtered3.csv", index=False, encoding="utf-8-sig")
    print("Saved: koreatech_workstudy_filtered3.csv")

    # HTML 저장(게시판 스타일 + UNKNOWN 노란 배지)
    make_board_html(df, "C:\\Users\\dsino\\Desktop\\koreatech_workstudy_board3.html")

Saved: koreatech_workstudy_filtered3.csv
HTML 저장 완료: C:\Users\dsino\Desktop\koreatech_workstudy_board3.html
