In [17]:
from dataclasses import dataclass, asdict, field
import json


@dataclass
class Solution:
    part: str
    solution: str

@dataclass
class Repair:
    appliance: str = ""
    category: str = ""
    solutions: list[Solution] = field(default_factory=list)
    issue_description_url: str = ""
    video_url: str = ""

In [18]:
from selenium import webdriver
from selenium.webdriver.common.by import By


def scrape_data(url: str, output_file: str) -> None:
    data = Repair()

    driver = webdriver.Chrome()
    driver.get(url)

    if "dishwasher" in url.lower():
        data.appliance = "Dishwasher"
    elif "refrigerator" in url.lower():
        data.appliance = "Refrigerator"

    # <h1 class="title-main mt-3 mb-3 mb-lg-4&gt;">How To Fix A Noisy Dishwasher</h1>
    title_el = driver.find_element(By.CSS_SELECTOR, 'h1.title-main')
    data.category = title_el.text
    print('category:', title_el.text)

    # find div with class "symptom-list"
    container = driver.find_element(By.CSS_SELECTOR, 'div.symptom-list')
    part_headers = container.find_elements(By.CSS_SELECTOR, 'h2.section-title.bold.col.mt-3.mb-3')
    for h2 in part_headers:
        part = h2.text.strip()

        # find the first following sibling with class "symptom-list__desc"
        desc_block = h2.find_element(
            By.XPATH,
            'following-sibling::div[contains(@class, "symptom-list__desc")][1]'
        )

        # inside that block, the left col-lg-6 is the solution text
        solution_div = desc_block.find_element(By.CSS_SELECTOR, 'div.col-lg-6')
        solution = solution_div.text.strip()

        data.solutions.append(Solution(part=part, solution=solution))
        print('part:', part)
        print('solution:', solution[:120], '...')

        data.issue_description_url = url

    # find video url
    # <img src="https://img.youtube.com/vi/XgZq_VdXKiQ/maxresdefault.jpg" class="yt-video__thumb b-lazy b-loaded loaded" title="How to Fix a Dishwasher That Won’t Start" alt="How to Fix a Dishwasher That Won’t Start">
    video_el = driver.find_element(By.CSS_SELECTOR, 'img[class="yt-video__thumb b-lazy b-loaded loaded"]')
    video_src = video_el.get_attribute("src")
    if "youtube.com/vi/" in video_src:
        video_id = video_src.split("youtube.com/vi/")[1].split("/")[0]
        data.video_url = f"https://www.youtube.com/watch?v={video_id}"
        print('video_url:', data.video_url)

    driver.quit()

    json_data = asdict(data)

    # Save to file
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(json_data, f, ensure_ascii=False, indent=2)


In [19]:
# urls = [
#     "https://www.partselect.com/Repair/Dishwasher/Noisy/",
#     "https://www.partselect.com/Repair/Dishwasher/Leaking/",
#     "https://www.partselect.com/Repair/Dishwasher/Will-Not-Start/",
#     "https://www.partselect.com/Repair/Dishwasher/Door-Latch-Failure/",
#     "https://www.partselect.com/Repair/Dishwasher/Not-Cleaning-Properly/",
#     "https://www.partselect.com/Repair/Dishwasher/Not-Cleaning-Properly/",
#     "https://www.partselect.com/Repair/Dishwasher/Not-Draining/",
#     "https://www.partselect.com/Repair/Dishwasher/Will-Not-Fill-Water/",
#     "https://www.partselect.com/Repair/Dishwasher/Will-Not-Dispense-Detergent/",
#     "https://www.partselect.com/Repair/Dishwasher/Not-Drying-Properly/",
# ]

urls = [
    "https://www.partselect.com/Repair/Refrigerator/Noisy/",
    "https://www.partselect.com/Repair/Refrigerator/Leaking/",
    "https://www.partselect.com/Repair/Refrigerator/Will-Not-Start/",
    "https://www.partselect.com/Repair/Refrigerator/Not-Making-Ice/",
    "https://www.partselect.com/Repair/Refrigerator/Refrigerator-Too-Warm/",
    "https://www.partselect.com/Repair/Refrigerator/Not-Dispensing-Water/",
    "https://www.partselect.com/Repair/Refrigerator/Refrigerator-Freezer-Too-Warm/",
    "https://www.partselect.com/Repair/Refrigerator/Door-Sweating/",
    "https://www.partselect.com/Repair/Refrigerator/Light-Not-Working/",
    "https://www.partselect.com/Repair/Refrigerator/Refrigerator-Too-Cold/",
    "https://www.partselect.com/Repair/Refrigerator/Running-Too-Long/",
    "https://www.partselect.com/Repair/Refrigerator/Freezer-Too-Cold/"
]

# for output file counter, start at 11
for i, url in enumerate(urls, start=11):
    output_file = f'part_{i}.json'
    scrape_data(url, output_file)

category: How To Fix A Noisy Refrigerator
part: Condenser Fan Motor
solution: Most modern frost-free refrigerators will have a fan cooled condenser coil. The condenser fan circulates air through the ...
part: Evaporator Fan Motor
solution: The evaporator fan motor is responsible for pulling air over the evaporator coils when the compressor is running. If you ...
part: Evaporator Fan Motor Grommet
solution: The evaporator fan motor grommet is used to isolate the motor from the mounting bracket and reduce vibration noise. Regu ...
video_url: https://www.youtube.com/watch?v=nxHx8mhOnvY
category: How To Fix Leaking Refrigerator
part: Door Gaskets Or Seals
solution: Door gaskets or seals are found along the outside of the refrigerator’s doors. They are normally made from a vinyl mater ...
part: Water Inlet Valve
solution: The water inlet valve is a solenoid-operated device that connects your household water supply line to your refrigerator  ...
part: Ice Maker Assembly
solution: If your fri