TEMP   

In [59]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re
import json

BASE_URL: str = "https://www.merriam-webster.com/dictionary/"
words = ["Sell", "onion", "the", "set"]

# Store the results in a list to write into a JSON file
results = []

def find_meanings(word: str, html_content: str) -> None:
    soup = BeautifulSoup(html_content, 'html.parser')

    # Find all h1 elements with class 'hword'
    h1_elements = soup.find_all('h1', class_='hword')
    h1_data = [{'tag': tag.name, 'text': tag.text, 'class': tag.get('class')} for tag in h1_elements]
    h1_df = pd.DataFrame(h1_data)

    # Find elements with class 'dtText' inside 'dt' inside 'sense-context w-100'
    dtText_elements = soup.select(".sense-content .dt .dtText")
    dtText_data = [{'tag': tag.name, 'text': tag.text.strip()} for tag in dtText_elements]
    dtText_df = pd.DataFrame(dtText_data)

    # Create a new DataFrame with headers 'Words' and 'Meaning'
    words_series = h1_df['text'] if not h1_df.empty else pd.Series(["No word found"])
    meanings_series = dtText_df['text'] if not dtText_df.empty else pd.Series(["No meaning found"])

    meanings = meanings_series.to_list()
    filtered_meanings = meanings.copy()
    for i in range(len(meanings)):
        filtered_meanings[i] = re.sub(r":\s", "", meanings[i])

    # Collect results to write to the JSON file later
    word_meanings = {
        'word': words_series.iloc[0],
        'meanings': filtered_meanings
    }
    results.append(word_meanings)

for word in words:
    url: str = BASE_URL + word
    try:
        # Make a GET request to the URL
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code == 200:
            html_content = response.text
            print(f"HTML content fetched successfully for {word}")
            find_meanings(word, html_content)
            print("-" * 60)
        else:
            print(f"Failed to fetch page for {word}. Status code: {response.status_code}")
            print("-" * 60)
    except requests.RequestException as e:
        print(f"An error occurred: {e}")

# Write the results to a JSON file
with open("meanings.json", "w") as json_file:
    json.dump(results, json_file, indent=4)

print("Meanings have been saved to 'meanings.json'")


HTML content fetched successfully for Sell
------------------------------------------------------------
HTML content fetched successfully for onion
------------------------------------------------------------
HTML content fetched successfully for the
------------------------------------------------------------
HTML content fetched successfully for set
------------------------------------------------------------
Meanings have been saved to 'meanings.json'
