In [1]:
!pip install beautifulsoup4
!pip install requests

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


# Web Scraping and JSON Data Storage

## Introduction

This Python script is designed for web scraping using the BeautifulSoup library and storing the scraped data in a structured JSON file. It is a practical example of how to scrape job listings from a specific website and save them in a JSON format.

## Dependencies

Before running this script, ensure you have the following Python libraries installed:

- `json`: For handling JSON data.
- `BeautifulSoup` (bs4): For parsing HTML content.
- `requests`: For sending HTTP GET requests.

You can install these libraries using pip:

```python
pip install json
pip install beautifulsoup4
pip install requests


In [10]:
from bs4 import BeautifulSoup
import requests
import json

## Web scraping site: "https://realpython.github.io/fake-jobs/"

In [11]:
# Define the URL
url = "https://realpython.github.io/fake-jobs/"

# Send an HTTP GET request to the URL
response = requests.get(url)

# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')

# Initialize a list to store job listings as dictionaries
job_listings_data = []

# Example: Extract and print the job listings
job_listings = soup.find_all('div', class_='card')
# print(job_listings)
for job in job_listings:
    job_title = job.h2.text.strip()
    job_company = job.h3.text.strip()
    job_location = job.find('p', class_='location').text.strip()
    
#     # Check if 'salary' element exists before extracting its text
    job_salary_element = job.find('p', class_='salary')
    job_salary = job_salary_element.text.strip() if job_salary_element else "Salary information not available"
    
    # Create a dictionary for the current job listing
    job_data = {
        "Title": job_title,
        "Company": job_company,
        "Location": job_location,
        "Salary": job_salary,
    }
    
    # Append the job data dictionary to the list
    job_listings_data.append(job_data)

In [12]:
with open('job_listings.json', 'w') as json_file:
    json.dump(job_listings_data, json_file, indent=4)
    
# Open the JSON file for reading
with open('job_listings.json', 'r') as json_file:
    job_listings_data = json.load(json_file)

# Print the contents of the JSON file
for job_data in job_listings_data:
    print("Title:", job_data["Title"])
    print("Company:", job_data["Company"])
    print("Location:", job_data["Location"])
    print("Salary:", job_data["Salary"])
    print("\n------------------------\n")

Title: Senior Python Developer
Company: Payne, Roberts and Davis
Location: Stewartbury, AA
Salary: Salary information not available

------------------------

Title: Energy engineer
Company: Vasquez-Davidson
Location: Christopherville, AA
Salary: Salary information not available

------------------------

Title: Legal executive
Company: Jackson, Chambers and Levy
Location: Port Ericaburgh, AA
Salary: Salary information not available

------------------------

Title: Fitness centre manager
Company: Savage-Bradley
Location: East Seanview, AP
Salary: Salary information not available

------------------------

Title: Product manager
Company: Ramirez Inc
Location: North Jamieview, AP
Salary: Salary information not available

------------------------

Title: Medical technical officer
Company: Rogers-Yates
Location: Davidville, AP
Salary: Salary information not available

------------------------

Title: Physiological scientist
Company: Kramer-Klein
Location: South Christopher, AE
Salary: Sal

## Web Scraping site: "https://quotes.toscrape.com/"

In [19]:
# Define the URL
url = "https://quotes.toscrape.com/"

# Send an HTTP GET request to the URL
response = requests.get(url)

# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')

# Extract and print quotes, authors, and tags
quote_elements = soup.find_all('span', class_='text')
author_elements = soup.find_all('small', class_='author')
tag_elements = soup.find_all('div', class_='tags')

quote_listings_data = []

for i in range(len(quote_elements)):
    quote = quote_elements[i].text
    author = author_elements[i].text
    tags = [tag.text for tag in tag_elements[i].find_all('a', class_='tag')]
    
    # Create a dictionary for the current job listing
    quote_data = {
        "Quote": quote,
        "Author": author,
        "Tag": tags,
    }
    
    # Append the job data dictionary to the list
    quote_listings_data.append(quote_data)
    
    print(f"Quote: {quote}")
    print(f"Author: {author}")
    print(f"Tags: {', '.join(tags)}")
    print("\n------------------------\n")

Quote: “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
Author: Albert Einstein
Tags: change, deep-thoughts, thinking, world

------------------------

Quote: “It is our choices, Harry, that show what we truly are, far more than our abilities.”
Author: J.K. Rowling
Tags: abilities, choices

------------------------

Quote: “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”
Author: Albert Einstein
Tags: inspirational, life, live, miracle, miracles

------------------------

Quote: “The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”
Author: Jane Austen
Tags: aliteracy, books, classic, humor

------------------------

Quote: “Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”
Author: Marilyn Monroe
Tags: be-yourself, inspirational



In [21]:
quote_listings_data

[{'Quote': '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”',
  'Author': 'Albert Einstein',
  'Tag': ['change', 'deep-thoughts', 'thinking', 'world']},
 {'Quote': '“It is our choices, Harry, that show what we truly are, far more than our abilities.”',
  'Author': 'J.K. Rowling',
  'Tag': ['abilities', 'choices']},
 {'Quote': '“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”',
  'Author': 'Albert Einstein',
  'Tag': ['inspirational', 'life', 'live', 'miracle', 'miracles']},
 {'Quote': '“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”',
  'Author': 'Jane Austen',
  'Tag': ['aliteracy', 'books', 'classic', 'humor']},
 {'Quote': "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”",
  'Author': 'Marilyn Monroe',
  'Tag': ['be-

In [22]:
with open('quote_listings.json', 'w') as json_file:
    json.dump(quote_listings_data, json_file, indent=4)
    
# Open the JSON file for reading
with open('quote_listings.json', 'r') as json_file:
    quote_listings_data = json.load(json_file)

# Print the contents of the JSON file
for quote_data in quote_listings_data:
    print("Quote:", quote_data["Quote"])
    print("Author:", quote_data["Author"])
    print("Tag:", quote_data["Tag"])
    print("\n------------------------\n")

Quote: “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
Author: Albert Einstein
Tag: ['change', 'deep-thoughts', 'thinking', 'world']

------------------------

Quote: “It is our choices, Harry, that show what we truly are, far more than our abilities.”
Author: J.K. Rowling
Tag: ['abilities', 'choices']

------------------------

Quote: “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”
Author: Albert Einstein
Tag: ['inspirational', 'life', 'live', 'miracle', 'miracles']

------------------------

Quote: “The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”
Author: Jane Austen
Tag: ['aliteracy', 'books', 'classic', 'humor']

------------------------

Quote: “Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”
Author: Marilyn Monroe
