# Scraping demo with BeautifulSoup4 
by [Real-Python](https://realpython.com/beautiful-soup-web-scraper-python/)

We are going to scrape a fake python jobs web page

In [None]:
import requests
from bs4 import BeautifulSoup

In [None]:
base_url = "https://realpython.github.io/fake-jobs/"

In [None]:
page = requests.get(base_url)

1. Inspect the website in your browser
2. Open the dev tools and take a look to all the html tags and css attributes

In [None]:
soup = BeautifulSoup(page.content, "html.parser")

### Find Elements by ID

In [None]:
results = soup.find(id="ResultsContainer")

In [None]:
print(results.prettify())

### Find Elements by HTML Class Name

In [None]:
job_elements = results.find_all("div", class_="card-content")

In [None]:
for job_element in job_elements:
    print(job_element, end="\n"*2)

In [None]:
for job_element in job_elements:
    title_element = job_element.find("h2", class_="title")
    company_element = job_element.find("h3", class_="company")
    location_element = job_element.find("p", class_="location")
    print(title_element)
    print(company_element)
    print(location_element)
    print()

#### Extract Text From HTML Elements

In [None]:
for job_element in job_elements:
    title_element = job_element.find("h2", class_="title")
    company_element = job_element.find("h3", class_="company")
    location_element = job_element.find("p", class_="location")
    print(title_element.text.strip())
    print(company_element.text.strip())
    print(location_element.text.strip())
    print()

### Find Elements by Class Name and Text Content


In [None]:
python_jobs = results.find_all("h2", string="Python")

### Pass a Function to a Beautiful Soup Method

In [None]:
python_jobs = results.find_all(
    "h2", string=lambda text: "python" in text.lower()
)

In [None]:
print(python_jobs)

### Access Parent Elements

In [None]:
python_job_elements = [
    h2_element.parent.parent.parent for h2_element in python_jobs
]

### Navigate through links

In [None]:
base_url = 'http://examplewebsite.com'
with requests.Session() as session:  # maintaining a web-scraping session
    soup = BeautifulSoup(session.get(base_url).content, "html.parser")

    for link in soup.select("div.container a[href]"):
        full_link = urljoin(base_url, link["href"])
        new_page = session.get(full_link).content