In [None]:
from bs4 import BeautifulSoup
import requests
import re

url = "https://www.examtopics.com/discussions/microsoft/view/153079-exam-dp-700-topic-1-question-6-discussion/"
with requests.Session() as session:
    # Set up retries
    session.headers.update({'User-Agent': 'Mozilla/5.0'})
    response = session.get(url)
    response.raise_for_status()
    
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the div with the "new-comment-box" class
    div = soup.find("div", class_="new-comment-box")

    # Extract the value of the data-title attribute
    if div and "data-title" in div.attrs:
        data_title = div["data-title"]

    p_tag = soup.find('p', class_='card-text')

    # Extract text and replace <br/> with new lines
    text_parts = []
    for element in p_tag.contents:
        if isinstance(element, str):
            text_parts.append(element)
        elif element.name == 'br':
            text_parts.append('\n')
        elif element.name == 'img':
            text_parts.append(element['src'])

    # Combine text parts into a single string
    question_str = ''.join(text_parts)

    # Find all answer choices and remove trailing blanks and new lines
    answer_choices = [li.text.strip().replace('\n', '').replace('\r', '') for li in soup.find_all('li', class_='multi-choice-item')]

    # combine all answer choices into a single string and new line separated
    answer_choices_str = '\n'.join(answer_choices)
    # concat the data_title, question_str and answer_choices_str
    final_str = f"{data_title}\n{question_str}\n{answer_choices_str}"

    # Processing the text to remove the extra spaces:
    lines = final_str.splitlines()  # Split the text into lines
    formatted_lines = []

    for line in lines:
        # Strip extra spaces and format neatly
        stripped = " ".join(line.split())
        if stripped:  # To ignore empty lines
            formatted_lines.append(stripped)

    # Join the formatted lines back together
    formatted_text = "\n".join(formatted_lines)

    # Find all badges with "Highly Voted"
    highly_voted_badges = soup.find_all("span", class_="badge badge-primary")

    comment_str = ""
    for badge in highly_voted_badges:
        # Check if the badge contains the text "Highly Voted"
        if "Highly Voted" in badge.text:
            # Find the parent comment-container
            comment_container = badge.find_parent("div", class_="comment-container")
            if comment_container:
                # Extract the text from the corresponding comment-content
                comment_content = comment_container.find("div", class_="comment-content")
                if comment_content:
                    comment_str += "Highly Voted comment found!" + "\n" + comment_content.text.strip() + "\n" + "******************************" + "\n" 
    # Combine the formatted text and comment string
    final_output = f"{formatted_text}\n{comment_str}"
    print(final_output)




Exam DP-700 topic 1 question 6 discussion
HOTSPOT -
You have a Fabric workspace that contains a warehouse named DW1. DW1 contains the following tables and columns.
https://img.examtopics.com/dp-700/image1.png
You need to create an output that presents the summarized values of all the order quantities by year and product. The results must include a summary of the order quantities at the year level for all the products.
How should you complete the code? To answer, select the appropriate options in the answer area.
NOTE: Each correct selection is worth one point.
https://img.examtopics.com/dp-700/image2.png
Highly Voted comment found!
SELECT YEAR
ROLLUP(YEAR(SO.ModifiedDATE), P.Name)
************************
Highly Voted comment found!
SELECT YEAR


Grouping Sets: Best answer. No extra stuff. Just what we want
Cube: Lots of extra combinations. Has a row with the total for all years and all products, and rows with the total for each product for all years. 
Rollup: Has the unnecessary total

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re

# Read the Excel file and get the column with URLs
excel_file = r"azure_examtopics_20250430.xlsx"  # Replace with your Excel file name
url_column = "link"  # Replace with the column name containing URLs
df = pd.read_excel(excel_file)
# filter exam = DP-700
df = df[df['exam'] == 'Exam DP-700 ']

# Open a text file to write the final output
output_file = "output.txt"
with open(output_file, "w", encoding="utf-8") as file:
    for url in df[url_column]:
        try:
            with requests.Session() as session:
                # Set up retries
                session.headers.update({'User-Agent': 'Mozilla/5.0'})
                response = session.get(url)
                response.raise_for_status()

                # Parse the HTML content
                soup = BeautifulSoup(response.content, 'html.parser')

                # Find the div with the "new-comment-box" class
                div = soup.find("div", class_="new-comment-box")

                # Extract the value of the data-title attribute
                data_title = div["data-title"] if div and "data-title" in div.attrs else ""

                # Extract the question text
                p_tag = soup.find('p', class_='card-text')
                text_parts = []
                if p_tag:
                    for element in p_tag.contents:
                        if isinstance(element, str):
                            text_parts.append(element)
                        elif element.name == 'br':
                            text_parts.append('\n')
                        elif element.name == 'img':
                            text_parts.append(element['src'])
                question_str = ''.join(text_parts)

                # Extract all answer choices
                answer_choices = [
                    li.text.strip().replace('\n', '').replace('\r', '')
                    for li in soup.find_all('li', class_='multi-choice-item')
                ]
                answer_choices_str = '\n'.join(answer_choices)

                # Combine the data_title, question_str, and answer_choices_str
                final_str = f"{data_title}\n{question_str}\n{answer_choices_str}"

                # Process the text to remove extra spaces
                lines = final_str.splitlines()
                formatted_lines = [" ".join(line.split()) for line in lines if line.strip()]
                formatted_text = "\n".join(formatted_lines)

                # Find all badges with "Highly Voted"
                highly_voted_badges = soup.find_all("span", class_="badge badge-primary")
                comment_str = ""
                for badge in highly_voted_badges:
                    if "Highly Voted" in badge.text:
                        comment_container = badge.find_parent("div", class_="comment-container")
                        if comment_container:
                            comment_content = comment_container.find("div", class_="comment-content")
                            if comment_content:
                                comment_str += (
                                    "Highly Voted comment found!\n"
                                    + comment_content.text.strip()
                                    + "\n******************************\n"
                                )

                # Combine the formatted text and comment string
                final_output = f"{formatted_text}\n{comment_str}"

                # Write the final output to the text file
                file.write(final_output + "\n\n")
        except Exception as e:
            print(f"Error processing URL {url}: {e}")