In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def get_links_from_page(url):
    response = requests.get(url)
    response.raise_for_status()  # Raise an error for bad status codes
    soup = BeautifulSoup(response.content, 'html.parser')

    topic_list = soup.find(id='topic-list')
    if not topic_list:
        return []

    links = []
    for card in topic_list.find_all('card-topic'):
        href = card.get('href')
        if href:
            links.append('https://www.alodokter.com' + href)
    return links

def scrape_all_pages(base_url, start_page=1, delay=5):
    all_links = []
    page = start_page
    while True:
        try:
            url = f"{base_url}{page}"
            print(f"Scraping page: {page}")
            links = get_links_from_page(url)
            if not links:
                print(f"No more links found. Stopping at page {page-1}.")
                break
            all_links.extend(links)
            page += 1
            time.sleep(delay)  # Add delay between requests
        except requests.HTTPError as e:
            if e.response.status_code == 404:
                print(f"Page {page} not found. Stopping.")
                break
            else:
                print(f"HTTP error occurred: {e}")
                break
        except Exception as e:
            print(f"An error occurred: {e}")
            break
    return all_links

def save_links_to_csv(links, filename='terapi-perilaku-kognitif.csv'):
    df = pd.DataFrame(links, columns=['Link'])
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

base_url = "https://www.alodokter.com/komunitas/topic-tag/terapi-perilaku-kognitif/page/"
all_links = scrape_all_pages(base_url)
save_links_to_csv(all_links)

Scraping page: 1
Scraping page: 2
Scraping page: 3
No more links found. Stopping at page 2.
Data saved to terapi-perilaku-kognitif.csv
