In [2]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service as EdgeService
from selenium.webdriver.edge.options import Options as EdgeOptions
from webdriver_manager.microsoft import EdgeChromiumDriverManager

from selenium.webdriver.common.by import By

from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup

import os
import json

import time
import timeit

In [3]:
# Set up the Edge options for non-headless execution
edge_options = EdgeOptions()
edge_options.use_chromium = True  # Ensure Edge uses the Chromium engine
# edge_options.add_argument('--headless')  # Remove this for non-headless mode
edge_options.add_argument('start-maximized')  # Maximize the browser window
edge_options.add_argument('--disable-infobars')  # Disable info bars

In [4]:
def start(params=edge_options):
    # Set up the WebDriver with EdgeChromiumDriverManager
    driver = webdriver.Edge(service=EdgeService(EdgeChromiumDriverManager().install()), options=edge_options)
    return driver

In [5]:
def getPS(contest_id, problem_index):
    # <---Going to the link--->
    
    url = f'https://codeforces.com/contest/{contest_id}/problem/{problem_index}'
    
    driver.get(url)
    
    xpath_expression = f'//div[@class="problem-statement"]/div'
    elements=driver.find_elements(By.XPATH, xpath_expression)

    # <---Getting the link to the tutorial of the problem--->

    ext_links=driver.find_elements(By.CLASS_NAME,'resource-locale')
    
    tut_element=ext_links[1].find_element(By.XPATH,'..')
    tut_element.tag_name
    tut_link=tut_element.get_attribute('href')

    # <---Extracting Metadata and saving into a .json file--->
    
    tags=[tag.text for tag in driver.find_elements(By.CLASS_NAME,'tag-box')]
    metadata={}
    metadata['tags']=tags
    metadata['contest_id']=contest_id
    metadata['problem_index']=problem_index
    for item in elements[0].find_elements(By.XPATH,'./div'):
    
        full_text=item.get_property('innerText')
        try:
            element = item.find_element(By.XPATH,'./div')
            main_text=(element.get_property('innerText'))
            inner_text=full_text.replace(main_text,'',1)
            metadata[main_text]=inner_text
        except NoSuchElementException:
            problem_name=item.text.replace(problem_index+'. ','')
            # print("Problem Name: "+problem_name)
            metadata['problem_name']=problem_name
    
    folder_name=f"{contest_id}"
    file_name=f"metadata_{problem_index}.json"
    
    # Ensure the directory exists, if not, create it
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    # Save the data as a JSON file in the specified folder
    file_path = os.path.join(folder_name, file_name)
    
    with open(file_path, "w", encoding="utf-8") as json_file:
        json.dump(metadata, json_file, indent=4)
    
    print("json file saved")

    # <---MAIN PS SECTION--->
    
    body=elements[1].find_elements(By.XPATH,'./p')
    
    text=[(item.text).replace('\n',' ') for item in body]
    text=''.join(text)
    problem_statement=("Problem Statement"+"\n"+text)

    # <---FOOTNOTE SECTION--->

    # footnote may not be possible
    try:
        footnote=elements[1].find_element(By.XPATH,'./div')
        footnote=footnote.find_elements(By.XPATH,'./p')
        
        footnote=[(item.text).replace('\n',' ') for item in footnote]
        footnote=''.join(footnote)
        footnote=("Footnote"+"\n"+footnote)
    except NoSuchElementException:
        footnote="No Footnote present"

    # <---INPUT SECTION--->
    
    input_title=elements[2].find_element(By.XPATH,'./div').text
    input_content=elements[2].find_elements(By.XPATH,'./p')
    
    input_content=[(item.text).replace('\n',' ') for item in input_content]
    input_content=''.join(input_content)
    
    input_=(input_title+"\n"+input_content)

    # <---OUTPUT SECTION--->
    
    output_title=elements[3].find_element(By.XPATH,'./div').text
    output_content=elements[3].find_elements(By.XPATH,'./p')
    
    output_content=[(item.text).replace('\n',' ') for item in output_content]
    output_content=''.join(output_content)
    
    output=(output_title+"\n"+output_content)

    # <---EXAMPLE SECTION--->
    
    sample_test=elements[4].find_elements(By.XPATH,'./div/div')
    
    input_cases=sample_test[0].find_element(By.TAG_NAME,'pre').find_elements(By.XPATH,'./div')
    output_cases=sample_test[1].find_element(By.TAG_NAME,'pre')
    
    # n=no_of_test_cases
    n=int(input_cases[0].text)
    
    # n_lines=no_of_lines_in_input
    n_lines=int((len(input_cases)-1)/n)
    
    input_cases=[[line.text for line in input_cases[i:i+n_lines]] for i in range(1, len(input_cases), n_lines)]
    
    output_cases=output_cases.text.split()
    
    test_cases=str([{"input":input, "output":output} for input,output in zip(input_cases,output_cases)])
    example=(elements[4].find_element(By.XPATH,'./div').text+"\n"+test_cases)

    # <---NOTE SECTION--->
    
    note_title=elements[5].find_element(By.XPATH,'./div').text
    note_content=elements[5].find_elements(By.XPATH,'./p')
    
    note_content=[(item.text).replace('\n',' ') for item in note_content]
    note_content=''.join(note_content)
    
    note=(note_title+"\n"+note_content)

    # <---Compiling different parts of PS into one--->
    
    final_full_problem_statement=problem_statement+"\n\n"+footnote+"\n\n"+input_+"\n\n"+output+"\n\n"+example+"\n\n"+note


    # <---Saving the .txt file--->
    
    folder_name=f"{contest_id}"
    file_name=f"problem_statement_{problem_index}.txt"
    
    # Ensure the directory exists, if not, create it
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    # Save the data as a JSON file in the specified folder
    file_path = os.path.join(folder_name, file_name)
    
    with open(file_path, "w", encoding="utf-8") as file:
        file.write(final_full_problem_statement)
    
    print("txt file saved")

    return tut_link
    

In [488]:
contest_id = 2049
problem_index = 'D'

In [490]:
execution_time = timeit.timeit(getPS(contest_id,problem_index), number=1)
print(f"function executed in {execution_time:.4f} seconds")