In [5]:
import requests

In [6]:
url ="http://localhost:8000/soup_try.html" # 로컬파일 위치치
response = requests.get(url)
print(response.text)

<!DOCTYPE HTML>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <title>Error response</title>
    </head>
    <body>
        <h1>Error response</h1>
        <p>Error code: 404</p>
        <p>Message: File not found.</p>
        <p>Error code explanation: 404 - Nothing matches the given URI.</p>
    </body>
</html>



In [11]:
# Save the response content to a local file

# Read the file using with statement
with open("soup_try.html", "r", encoding="utf-8") as file:
    content = file.read()

print(content)

<html lang="en">
<head>
    <title>My Enhanced Website</title>
</head>
<body>
    <header id="main-header">
        <h1 class="title">Welcome to My Website</h1>
        <nav>
            <ul class="nav-links">
                <li><a href="/home" class="nav-item">Home</a></li>
                <li><a href="/about" class="nav-item">About</a></li>
                <li><a href="/contact" class="nav-item">Contact</a></li>
            </ul>
        </nav>
    </header>

    <section id="intro">
        <h2 class="section-title">Introduction</h2>
        <p class="text">This website contains various elements for web scraping practice.</p>
        <p class="text">Use <strong>BeautifulSoup</strong> to extract information!</p>
    </section>

    <section id="content">
        <article class="post" id="post-1">
            <h2 class="post-title">First Blog Post</h2>
            <p class="post-meta">Published on <span class="date">March 20, 2025</span></p>
            <p class="post-content">This i

In [12]:
from bs4 import BeautifulSoup

# BeautifulSoup 객체 생성
soup = BeautifulSoup(content, "html.parser")

# HTML 전체 출력
print(soup.prettify())

<html lang="en">
 <head>
  <title>
   My Enhanced Website
  </title>
 </head>
 <body>
  <header id="main-header">
   <h1 class="title">
    Welcome to My Website
   </h1>
   <nav>
    <ul class="nav-links">
     <li>
      <a class="nav-item" href="/home">
       Home
      </a>
     </li>
     <li>
      <a class="nav-item" href="/about">
       About
      </a>
     </li>
     <li>
      <a class="nav-item" href="/contact">
       Contact
      </a>
     </li>
    </ul>
   </nav>
  </header>
  <section id="intro">
   <h2 class="section-title">
    Introduction
   </h2>
   <p class="text">
    This website contains various elements for web scraping practice.
   </p>
   <p class="text">
    Use
    <strong>
     BeautifulSoup
    </strong>
    to extract information!
   </p>
  </section>
  <section id="content">
   <article class="post" id="post-1">
    <h2 class="post-title">
     First Blog Post
    </h2>
    <p class="post-meta">
     Published on
     <span class="date">
      Marc

In [14]:
# 첫 번째 h1 태그 가져오기
title = soup.find("h1").text
print("Title:", title)

# 모든 h2 태그 가져오기
h2_tags = soup.find_all("h2")
print(h2_tags)
for h2 in h2_tags:
    print("H2:", h2.text)


Title: Welcome to My Website
[<h2 class="section-title">Introduction</h2>, <h2 class="post-title">First Blog Post</h2>, <h2 class="post-title">Second Blog Post</h2>, <h2 class="section-title">Image Gallery</h2>, <h2 class="section-title">User Data</h2>]
H2: Introduction
H2: First Blog Post
H2: Second Blog Post
H2: Image Gallery
H2: User Data


### 1. 특정 태그 추출
### soup.find 혹은 soup.select_one 방식

In [9]:
# Extract the text from the <h1> tag
h1_text = soup.find('h1', class_='title').get_text()

print(h1_text)

Welcome to My Website


In [10]:
# Extract the text using the CSS selector
h1_text = soup.select_one('#main-header > h1').get_text()

print(h1_text)

Welcome to My Website


### 2. 특정 클래스나 ID로 데이터 찾기

In [11]:
# 특정 ID 요소 찾기
intro_section = soup.find(id="intro")
print("Intro section:", intro_section.text.strip())

# 특정 class 요소 찾기
posts = soup.find_all(class_="post-title")
for post in posts:
    print("Post Title:", post.text)


Intro section: Introduction
This website contains various elements for web scraping practice.
Use BeautifulSoup to extract information!
Post Title: First Blog Post
Post Title: Second Blog Post


In [12]:
# 특정 ID 요소 찾기 (CSS 선택자 이용)
intro_section = soup.select_one("#intro")
print("Intro section:", intro_section.text.strip())

# 특정 class 요소 찾기 (CSS 선택자 이용)
posts = soup.select(".post-title")
for post in posts:
    print("Post Title:", post.text)

Intro section: Introduction
This website contains various elements for web scraping practice.
Use BeautifulSoup to extract information!
Post Title: First Blog Post
Post Title: Second Blog Post


### 3. 테이블 데이터 추출

In [13]:
# 테이블 데이터 추출
rows = soup.select(".user-table tbody tr")
for row in rows:
    cols = row.find_all("td")
    user_id = cols[0].text.strip()
    name = cols[1].text.strip()
    email = cols[2].text.strip()
    print(f"User ID: {user_id}, Name: {name}, Email: {email}")


User ID: 1, Name: Alice, Email: alice@example.com
User ID: 2, Name: Bob, Email: bob@example.com


In [14]:
import pandas as pd

# Extract data and store it in a list
data = []
for row in rows:
    cols = row.find_all("td")
    user_id = cols[0].text.strip()
    name = cols[1].text.strip()
    email = cols[2].text.strip()
    data.append({"User ID": user_id, "Name": name, "Email": email})

# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(data)
print(df)

  User ID   Name              Email
0       1  Alice  alice@example.com
1       2    Bob    bob@example.com
