# Understanding Web Scraping

## Importing BeautifulSoup4

In [1]:
from bs4 import BeautifulSoup

## Reading html file

In [2]:
with open("home.html", "r") as html_file:
    content = html_file.read()
    print(content)

<!DOCTYPE html>
<html lang="en">
    <head>
        <!--Required meta tags-->
        <meta charset="utf-8">
        <meta name="viewport" content="width=device-width">
        <!--Bootstrap CSS-->
        <link rel="stylesheet">
        <title>My Courses</title>
    </head>
    <body>
        <h1>Hello, Start Learning!</h1>
        <div class="card" id="card-python-for-bs4">
            <div class="card-holder">
                Python
            </div>
            <div class="card-body">
                <h5 class="card-title">Python for begineers</h5>
                <p class="card-text">If you are new to python, this is the course that you should buy!</p>
                <a href="#" class="btn btn-primary">Start for 20$</a>
            </div>
        </div>
        <div class="card" id="card-python-for-bs4">
            <div class="card-holder">
                Python
            </div>
            <div class="card-body">
                <h5 class="card-title">Python Web Development

## Creating BeautifulSoup object

In [3]:
soup = BeautifulSoup(content, "lxml")
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <!--Required meta tags-->
  <meta charset="utf-8"/>
  <meta content="width=device-width" name="viewport"/>
  <!--Bootstrap CSS-->
  <link rel="stylesheet"/>
  <title>
   My Courses
  </title>
 </head>
 <body>
  <h1>
   Hello, Start Learning!
  </h1>
  <div class="card" id="card-python-for-bs4">
   <div class="card-holder">
    Python
   </div>
   <div class="card-body">
    <h5 class="card-title">
     Python for begineers
    </h5>
    <p class="card-text">
     If you are new to python, this is the course that you should buy!
    </p>
    <a class="btn btn-primary" href="#">
     Start for 20$
    </a>
   </div>
  </div>
  <div class="card" id="card-python-for-bs4">
   <div class="card-holder">
    Python
   </div>
   <div class="card-body">
    <h5 class="card-title">
     Python Web Development
    </h5>
    <p class="card-text">
     If you feel enough confident with python, you are ready to learn how to create your own website
    </p>
 

## Finding a single tag

In [4]:
tags = soup.find("h5")
print(tags)

<h5 class="card-title">Python for begineers</h5>


## Finding all related tags

In [5]:
tags_all = soup.find_all("h5")
print(tags_all)

[<h5 class="card-title">Python for begineers</h5>, <h5 class="card-title">Python Web Development</h5>, <h5 class="card-title">Python Machine Learning</h5>]


## Extracting texts from a tags

In [6]:
for course in tags_all:
    print(course.text)

Python for begineers
Python Web Development
Python Machine Learning


In [10]:
course_cards = soup.find_all("div", class_="card")
print(course_cards)

[<div class="card" id="card-python-for-bs4">
<div class="card-holder">
                Python
            </div>
<div class="card-body">
<h5 class="card-title">Python for begineers</h5>
<p class="card-text">If you are new to python, this is the course that you should buy!</p>
<a class="btn btn-primary" href="#">Start for 20$</a>
</div>
</div>, <div class="card" id="card-python-for-bs4">
<div class="card-holder">
                Python
            </div>
<div class="card-body">
<h5 class="card-title">Python Web Development</h5>
<p class="card-text">If you feel enough confident with python, you are ready to learn how to create your own website</p>
<a class="btn btn-primary" href="#">Start for 50$</a>
</div>
</div>, <div class="card" id="card-python-for-bs4">
<div class="card-holder">
                Python
            </div>
<div class="card-body">
<h5 class="card-title">Python Machine Learning</h5>
<p class="card-text">Become a Python Machine Learning master!</p>
<a class="btn btn-prima

## Extracting tags as method

In [13]:
for course in course_cards:
    print(course.h5)

<h5 class="card-title">Python for begineers</h5>
<h5 class="card-title">Python Web Development</h5>
<h5 class="card-title">Python Machine Learning</h5>


In [16]:
for course in course_cards:
    course_name = course.h5.text
    course_price = course.a.text
    print(course_name, "\n", course_price)

Python for begineers 
 Start for 20$
Python Web Development 
 Start for 50$
Python Machine Learning 
 Start for 100$


In [18]:
for course in course_cards:
    course_name = course.h5.text
    course_price = course.a.text.split()[-1]
    print(f"{course_name} costs {course_price}")
    

Python for begineers costs 20$
Python Web Development costs 50$
Python Machine Learning costs 100$
