In [1]:
import requests

def check_robots_txt(domain):
    robots_url = f"https://{domain}/robots.txt"
    try:
        response = requests.get(robots_url)
        response.raise_for_status()
        print(f"Content of {robots_url}:\n")
        print(response.text)
    except requests.exceptions.RequestException as e:
        print(f"Could not retrieve robots.txt for {domain}: {e}")

check_robots_txt("www.equity.co.id")

Content of https://www.equity.co.id/robots.txt:

User-agent: *
Disallow: /wp-admin/
Allow: /wp-admin/admin-ajax.php

Sitemap: https://www.equity.co.id/wp-sitemap.xml



In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.equity.co.id/daftar-rumah-sakit-admedika-corporate/"
response = requests.get(url)
response.raise_for_status()

soup = BeautifulSoup(response.text, "html.parser")

data = []
for row in soup.select("tr[class^='ninja_table_row']"):
    cols = [col.get_text(strip=True) for col in row.find_all("td")]
    if cols:
        data.append(cols)

df = pd.DataFrame(data, columns=["No", "Nama RS", "Provinsi", "Kota", "Alamat", "Telepon"])
df.to_csv("daftar_rumah_sakit_admedika_corporate.csv", index=False)

print("[DONE] Data saved to daftar_rumah_sakit_admedika_corporate.csv")


[DONE] Data saved to daftar_rumah_sakit_admedika_corporate.csv


In [14]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Setup browser
options = Options()
# options.add_argument("--headless")  # tanpa tampilan GUI
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# URL awal
url = "https://axa-mandiri.co.id/rekanan-rumah-sakit-kantor-cabang"
driver.get(url)
wait = WebDriverWait(driver, 10)

time.sleep(3)  # tunggu load awal

all_data = []
page_num = 1

while True:
    print(f"Scraping halaman {page_num}...")

    # Tunggu data rumah sakit muncul
    wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.box-detail")))

    hospitals = driver.find_elements(By.CSS_SELECTOR, "div.box-detail")

    for hosp in hospitals:
        try:
            name = hosp.find_element(By.TAG_NAME, "h4").text.strip()
        except:
            name = ""

        try:
            address = hosp.find_element(By.CLASS_NAME, "address").text.strip()
        except:
            address = ""

        try:
            phone = hosp.find_element(By.CLASS_NAME, "tel").text.strip()
        except:
            phone = ""

        try:
            tags = [t.text.strip() for t in hosp.find_elements(By.CSS_SELECTOR, "div.box-tag span")]
        except:
            tags = []

        all_data.append({
            "Nama": name,
            "Alamat": address,
            "Telepon": phone,
            "Tags": ", ".join(tags)
        })

    # Coba klik tombol next
    try:
        next_button = wait.until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "li.page-next a.page-link.next"))
        )

        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        time.sleep(0.5)
        driver.execute_script("arguments[0].click();", next_button)
        page_num += 1
        time.sleep(2)  # tunggu halaman baru
    except:
        print("Tidak menemukan tombol next, selesai scraping.")
        break

# Simpan ke CSV
df = pd.DataFrame(all_data)
df.to_csv("daftar_rumah_sakit_axa_mandiri.csv", index=False, encoding="utf-8-sig")

driver.quit()

print(f"✅ Selesai! Total {len(df)} data tersimpan di daftar_rumah_sakit_axa_mandiri.csv")


KeyboardInterrupt: 