In [5]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import pytz

# URL of the page to scrape
url = "https://ballotpedia.org/Financial_regulation_in_Massachusetts"
ist = pytz.timezone('Asia/Kolkata')

# Send a GET request to fetch the page content
response = requests.get(url)

json_data = []
# Check if the request was successful
if response.status_code == 200:
    # Parse the page content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the div with id "mw-content-text" and class "mw-parser-output"
    content_div = soup.find('div', id='mw-content-text').find('div', class_='mw-parser-output')

    if content_div:
        # Extract text from all <p> tags within the specified div
        paragraphs = content_div.find_all('p')
        texts = []
        for para in paragraphs:
            texts.append(para.get_text())
        texts = '.'.join(texts)

        now_utc = datetime.now(pytz.utc)
        now_ist = now_utc.astimezone(ist)
        formatted_date_time_ist = now_ist.strftime("%m%d%y %H:%M:%S")

        # Create the JSON object
        text_object_json = {
                                "text": texts,
                                "metadata": {
                                "date_downloaded": formatted_date_time_ist,
                                    "site_url": url,
                                    "extra_data": {
                                        "heading": "Financial regulation in Massachusetts(ballotpedia.org)",
                                    }
                                },
                                "volunteer_id": "2121",
                                "location": "Pune, Maharashtra, India"
                            }

        # Append the JSON object to json_data list
        json_data.append(text_object_json)

else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

In [6]:
json_data

[{'text': '\n.\n.\n. This article does not contain the most recently published data on this subject. If you would like to help our coverage grow, consider donating to Ballotpedia.\n.\n.The United States financial system is a network that facilitates exchanges between lenders and borrowers. The system, which includes banks and investment firms, is the base for all economic activity in the nation. According to the Federal Reserve, financial regulation has two main intended purposes: to ensure the safety and soundness of the financial system and to provide and enforce rules that aim to protect consumers. The regulatory framework varies across industries, with different regulations applying to different financial services.[1]\n.Individual federal and state entities have different and sometimes overlapping responsibilities within the regulatory system. For example, individual states and three federal agencies—the Federal Reserve, the Office of Comptroller of the Currency (OCC), and the Fede

In [7]:
# Save json_data to a JSON file
import json
output_file = "Financial regulation in Massachusetts(ballotpedia_org).json"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(json_data, f, ensure_ascii=False, indent=4)

In [8]:
with open('Financial regulation in Massachusetts(ballotpedia_org).json', "r", encoding="utf-8") as f:
    new_data = json.load(f)

In [9]:
new_data

[{'text': '\n.\n.\n. This article does not contain the most recently published data on this subject. If you would like to help our coverage grow, consider donating to Ballotpedia.\n.\n.The United States financial system is a network that facilitates exchanges between lenders and borrowers. The system, which includes banks and investment firms, is the base for all economic activity in the nation. According to the Federal Reserve, financial regulation has two main intended purposes: to ensure the safety and soundness of the financial system and to provide and enforce rules that aim to protect consumers. The regulatory framework varies across industries, with different regulations applying to different financial services.[1]\n.Individual federal and state entities have different and sometimes overlapping responsibilities within the regulatory system. For example, individual states and three federal agencies—the Federal Reserve, the Office of Comptroller of the Currency (OCC), and the Fede