# Codeforces Problem and Editorial Scraper

This notebook contains the code for scraping problems and editorials from Codeforces using Selenium and BeautifulSoup.

In [None]:

import os
import json
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# Configuration
BASE_DIR = "codeforces_scraper"
PROBLEMS_DIR = os.path.join(BASE_DIR, "problems")
EDITORIALS_DIR = os.path.join(BASE_DIR, "editorials")
os.makedirs(PROBLEMS_DIR, exist_ok=True)
os.makedirs(EDITORIALS_DIR, exist_ok=True)

# Helper Functions
def save_to_file(directory, filename, content):
    file_path = os.path.join(directory, filename)
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(content)

def save_metadata(directory, metadata):
    file_path = os.path.join(directory, "metadata.json")
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(metadata, f, indent=4)

def scrape_problem(url):
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.binary_location = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
    service = Service(ChromeDriverManager().install())
    
    driver = webdriver.Chrome(service=service, options=options)
    try:
        driver.get(url)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "problem-statement"))
        )
        
        soup = BeautifulSoup(driver.page_source, "html.parser")
        title = soup.find("div", class_="title")
        statement = soup.find("div", class_="problem-statement")
        tags = soup.find_all("span", class_="tag-box")
        time_limit = soup.find("div", class_="time-limit")
        memory_limit = soup.find("div", class_="memory-limit")
        
        title_text = title.text.strip() if title else "Untitled"
        statement_text = statement.prettify() if statement else "No statement found"
        tags_text = [tag.text.strip() for tag in tags] if tags else []
        time_limit_text = time_limit.text.strip() if time_limit else "Not specified"
        memory_limit_text = memory_limit.text.strip() if memory_limit else "Not specified"
        
        safe_title = title_text.replace(" ", "_").replace("/", "-")
        save_to_file(PROBLEMS_DIR, f"{safe_title}.txt", statement_text)
        metadata = {
            "title": title_text,
            "tags": tags_text,
            "time_limit": time_limit_text,
            "memory_limit": memory_limit_text,
        }
        save_metadata(PROBLEMS_DIR, metadata)
        print(f"Problem '{title_text}' scraped successfully.")
    except Exception as e:
        print(f"Failed to scrape problem at {url}: {e}")
    finally:
        driver.quit()

def scrape_editorial(url):
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.binary_location = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
    service = Service(ChromeDriverManager().install())
    
    driver = webdriver.Chrome(service=service, options=options)
    try:
        driver.get(url)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "content"))
        )
        
        soup = BeautifulSoup(driver.page_source, "html.parser")
        title = soup.find("div", class_="title")
        editorial = soup.find("div", class_="content")
        
        title_text = title.text.strip() if title else "Untitled Editorial"
        editorial_text = editorial.prettify() if editorial else "No editorial content found"
        
        safe_title = title_text.replace(" ", "_").replace("/", "-")
        save_to_file(EDITORIALS_DIR, f"{safe_title}_editorial.txt", editorial_text)
        print(f"Editorial for '{title_text}' scraped successfully.")
    except Exception as e:
        print(f"Failed to scrape editorial at {url}: {e}")
    finally:
        driver.quit()

# Example Usage
if __name__ == "__main__":
    problem_url = "https://codeforces.com/problemset/problem/1/A"
    editorial_url = "https://codeforces.com/blog/entry/1"
    
    scrape_problem(problem_url)
    scrape_editorial(editorial_url)
