In [2]:
import requests
from bs4 import BeautifulSoup
import re

# MICCAI 2024 메인 페이지
base_url = "https://papers.miccai.org/miccai-2024/"
response = requests.get(base_url)
response.raise_for_status()  # 요청 실패 시 예외 발생

soup = BeautifulSoup(response.text, 'html.parser')

# 정규표현식으로 '001-PaperXXXX.html' 형태의 링크를 추출
pattern = re.compile(r"^/miccai-2024/\d{3}-Paper\d{4}\.html$")

# 링크 필터링
paper_links = []
for a_tag in soup.find_all("a", href=True):
    href = a_tag["href"]
    if pattern.match(href):
        full_url = base_url + href
        paper_links.append(full_url)

# 출력
for link in paper_links:
    print(link)

print(f"\n총 {len(paper_links)}개의 논문 리뷰 페이지를 찾았습니다.")


https://papers.miccai.org/miccai-2024//miccai-2024/001-Paper1861.html
https://papers.miccai.org/miccai-2024//miccai-2024/002-Paper1908.html
https://papers.miccai.org/miccai-2024//miccai-2024/003-Paper1001.html
https://papers.miccai.org/miccai-2024//miccai-2024/004-Paper0132.html
https://papers.miccai.org/miccai-2024//miccai-2024/005-Paper1015.html
https://papers.miccai.org/miccai-2024//miccai-2024/006-Paper2442.html
https://papers.miccai.org/miccai-2024//miccai-2024/007-Paper2090.html
https://papers.miccai.org/miccai-2024//miccai-2024/008-Paper3648.html
https://papers.miccai.org/miccai-2024//miccai-2024/009-Paper0219.html
https://papers.miccai.org/miccai-2024//miccai-2024/010-Paper2774.html
https://papers.miccai.org/miccai-2024//miccai-2024/011-Paper2364.html
https://papers.miccai.org/miccai-2024//miccai-2024/012-Paper2514.html
https://papers.miccai.org/miccai-2024//miccai-2024/013-Paper1668.html
https://papers.miccai.org/miccai-2024//miccai-2024/014-Paper1802.html
https://papers.micca

In [3]:
paper_links

['https://papers.miccai.org/miccai-2024//miccai-2024/001-Paper1861.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/002-Paper1908.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/003-Paper1001.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/004-Paper0132.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/005-Paper1015.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/006-Paper2442.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/007-Paper2090.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/008-Paper3648.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/009-Paper0219.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/010-Paper2774.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/011-Paper2364.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/012-Paper2514.html',
 'https://papers.miccai.org/miccai-2024//miccai-2024/013-Paper1668.html',
 'https://papers.miccai.org/miccai-202

In [22]:
import requests
from bs4 import BeautifulSoup
import csv

def extract_reviews(paper_url):
    response = requests.get(paper_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    li_tags = soup.find_all("li")

    current_reviewer = 0
    score = None
    post_rebuttal = None
    reviewers_data = []

    for li in li_tags:
        question_tag = li.find("strong")
        answer_tag = li.find("blockquote")

        if not question_tag or not answer_tag:
            continue

        question = question_tag.get_text(strip=True)
        answer = answer_tag.get_text(strip=True)

        if question.startswith("Please describe the contribution"):
            if current_reviewer != 0:
                reviewers_data.append([
                    paper_url,
                    f"Reviewer {current_reviewer}",
                    score if score else "N/A",
                    post_rebuttal if post_rebuttal else "N/A"
                ])
                score = None
                post_rebuttal = None
            current_reviewer += 1

        if "Rate the paper on a scale of 1-6" in question:
            score = answer

        if "[Post rebuttal] After reading the author’s rebuttal" in question:
            post_rebuttal = answer

    if current_reviewer > 0:
        reviewers_data.append([
            paper_url,
            f"Reviewer {current_reviewer}",
            score if score else "N/A",
            post_rebuttal if post_rebuttal else "N/A"
        ])

    return reviewers_data

all_reviews = []
for link in paper_links:
    link = link.replace("/miccai-2024//miccai-2024", "/miccai-2024")
    try:
        reviews = extract_reviews(link)
        all_reviews.extend(reviews)
    except Exception as e:
        print(f"❌ 크롤링 실패: {link} — {e}")
    break  # 테스트 시 한 개만


# CSV 저장
with open("miccai2024_reviews.csv", "w", newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Paper URL", "Reviewer", "Score", "Post-Rebuttal Opinion"])
    writer.writerows(all_reviews)

print("✅ CSV 파일 저장 완료: miccai2024_reviews.csv")


✅ CSV 파일 저장 완료: miccai2024_reviews.csv


In [21]:
import requests

# 예시 논문 리뷰 URL
url = paper_links[0]
url = url.replace("/miccai-2024//miccai-2024", "/miccai-2024")

# 요청 및 HTML 저장
response = requests.get(url)
html_content = response.text

# 1. 화면에 출력 (너무 길면 생략됨)
print(html_content)

# 2. 파일로 저장해서 확인
with open("miccai_sample_review.html", "w", encoding="utf-8") as f:
    f.write(html_content)

print("✅ HTML 저장 완료: miccai_sample_review.html")


<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<!-- Begin Jekyll SEO tag v2.7.1 -->
<title>3D Spine Shape Estimation from Single 2D DXA | MICCAI 2024 - Open Access</title>
<meta name="generator" content="Jekyll v3.9.0" />
<meta property="og:title" content="3D Spine Shape Estimation from Single 2D DXA" />
<meta name="author" content="Bourigault, Emmanuelle and Jamaludin, Amir and Zisserman, Andrew" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="Abstract Scoliosis is currently assessed solely on 2D lateral deviations, but recent studies have also revealed the importance of other imaging planes in understanding the deformation of the spine. Consequently, extracting the spinal geometry in 3D would help quantify these spinal deformations and aid diagnosis. In this study, we propose an automated general fram

In [20]:
paper_links[0]

'https://papers.miccai.org/miccai-2024//miccai-2024/001-Paper1861.html'

In [24]:
import requests
from bs4 import BeautifulSoup
import csv
import re

def extract_reviews(paper_url):
    response = requests.get(paper_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    review_sections = soup.find_all("h3", id=lambda x: x and x.startswith("review-"))

    reviewers_data = []

    for idx, section in enumerate(review_sections, 1):
        ul = section.find_next("ul")
        if not ul:
            continue

        pre_score = "N/A"
        post_score = "N/A"

        for li in ul.find_all("li"):
            question_tag = li.find("strong")
            answer_tag = li.find("blockquote")
            if not question_tag or not answer_tag:
                continue

            question = question_tag.get_text(strip=True)
            answer = answer_tag.get_text(strip=True)

            # 사전 점수
            if "Rate the paper on a scale of 1-6" in question:
                match = re.search(r"\((\d)\)", answer)
                if match:
                    pre_score = match.group(1)

            # 리부탈 이후 점수
            elif "[Post rebuttal]" in question and "state your overall opinion" in question:
                match = re.search(r"\((\d)\)", answer)
                if match:
                    post_score = match.group(1)

        reviewers_data.append([
            paper_url,
            f"Reviewer {idx}",
            pre_score,
            post_score
        ])

    return reviewers_data

# 테스트용 링크
test_url = paper_links[0].replace("/miccai-2024//miccai-2024", "/miccai-2024")

# 실행
all_reviews = []
try:
    reviews = extract_reviews(test_url)
    all_reviews.extend(reviews)
except Exception as e:
    print(f"❌ 크롤링 실패: {test_url} — {e}")

# CSV 저장
with open("miccai2024_reviews.csv", "w", newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Paper URL", "Reviewer", "Pre-rebuttal Score", "Post-rebuttal Score"])
    writer.writerows(all_reviews)

print("✅ CSV 파일 저장 완료: miccai2024_reviews.csv")


✅ CSV 파일 저장 완료: miccai2024_reviews.csv


In [26]:
import requests
from bs4 import BeautifulSoup
import csv
import re

def extract_reviews(paper_url):
    response = requests.get(paper_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    review_sections = soup.find_all("h3", id=lambda x: x and x.startswith("review-"))

    reviewers_data = []

    for idx, section in enumerate(review_sections, 1):
        ul = section.find_next("ul")
        if not ul:
            continue

        pre_score = "N/A"
        post_score = "N/A"

        for li in ul.find_all("li"):
            question_tag = li.find("strong")
            answer_tag = li.find("blockquote")
            if not question_tag or not answer_tag:
                continue

            question = question_tag.get_text(strip=True)
            answer = answer_tag.get_text(strip=True)

            # 사전 점수
            if "Rate the paper on a scale of 1-6" in question:
                match = re.search(r"\((\d)\)", answer)
                if match:
                    pre_score = match.group(1)

            # 리부탈 이후 점수
            elif "[Post rebuttal]" in question and "state your overall opinion" in question:
                match = re.search(r"\((\d)\)", answer)
                if match:
                    post_score = match.group(1)

        reviewers_data.append([
            paper_url,
            f"Reviewer {idx}",
            pre_score,
            post_score
        ])

    return reviewers_data

# 🔁 전체 논문 링크 순회
all_reviews = []
for paper_url in paper_links:
    fixed_url = paper_url.replace("/miccai-2024//miccai-2024", "/miccai-2024")
    try:
        reviews = extract_reviews(fixed_url)
        all_reviews.extend(reviews)
        print(f"✅ 완료: {fixed_url}")
    except Exception as e:
        print(f"❌ 실패: {fixed_url} — {e}")

# 📄 CSV 저장
with open("miccai2024_reviews.csv", "w", newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Paper URL", "Reviewer", "Pre-rebuttal Score", "Post-rebuttal Score"])
    writer.writerows(all_reviews)

print("📁 전체 저장 완료: miccai2024_reviews.csv")


✅ 완료: https://papers.miccai.org/miccai-2024/001-Paper1861.html
✅ 완료: https://papers.miccai.org/miccai-2024/002-Paper1908.html
✅ 완료: https://papers.miccai.org/miccai-2024/003-Paper1001.html
✅ 완료: https://papers.miccai.org/miccai-2024/004-Paper0132.html
✅ 완료: https://papers.miccai.org/miccai-2024/005-Paper1015.html
✅ 완료: https://papers.miccai.org/miccai-2024/006-Paper2442.html
✅ 완료: https://papers.miccai.org/miccai-2024/007-Paper2090.html
✅ 완료: https://papers.miccai.org/miccai-2024/008-Paper3648.html
✅ 완료: https://papers.miccai.org/miccai-2024/009-Paper0219.html
✅ 완료: https://papers.miccai.org/miccai-2024/010-Paper2774.html
✅ 완료: https://papers.miccai.org/miccai-2024/011-Paper2364.html
✅ 완료: https://papers.miccai.org/miccai-2024/012-Paper2514.html
✅ 완료: https://papers.miccai.org/miccai-2024/013-Paper1668.html
✅ 완료: https://papers.miccai.org/miccai-2024/014-Paper1802.html
✅ 완료: https://papers.miccai.org/miccai-2024/015-Paper0143.html
✅ 완료: https://papers.miccai.org/miccai-2024/016-Paper15

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

def extract_author_feedback(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    # "Author Feedback" 섹션의 h1 태그 찾기
    feedback_header = soup.find("h1", id="authorFeedback-id")
    if feedback_header:
        blockquote = feedback_header.find_next("blockquote")
        if blockquote:
            return blockquote.get_text(strip=True)
    return ""

feedback_results = []

for url in paper_links:
    try:
        feedback_text = extract_author_feedback(url)
        feedback_results.append([url, feedback_text])
        print(f"✅ 피드백 추출 완료: {url}")
    except Exception as e:
        print(f"❌ 오류 발생: {url} — {e}")

# CSV 저장
with open("miccai2024_author_feedback.csv", "w", newline='', encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Paper URL", "Authors Feedback"])
    writer.writerows(feedback_results)

print("📄 Authors Feedback 저장 완료: miccai2024_author_feedback.csv")