<a href="https://colab.research.google.com/github/diimprasetyos/belajar-web-scraping-python/blob/main/belajar_scrap_part2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import requests
from bs4 import BeautifulSoup
import json
import csv

In [10]:
# api url
base_url = 'https://satusehat.kemkes.go.id/platform/api/verification/summary/data-verified'

# company list
companies = []

# request header
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# fetch data 1 page
def fetch_data_from_page(page_number):
    params = {
        'search': '',
        'page': page_number,
        'perPage': 10,
        'category': 'fasyankes'
    }

    response = requests.get(base_url, headers=headers, params=params)
    data = response.json()

    for item in data['data']:
        name = item.get('nameFacility', 'N/A')
        location = item.get('districtDescription','N/A')
        category = ', '.join(item.get('typeFacility', ['N/A']))
        phone = item.get('phone', 'N/A')
        email = item.get('email', 'N/A')

        companies.append({
            'Nama Mitra': name,
            'Lokasi Mitra': location,
            'Kategori Mitra': category,
            'Nomor Handphone': phone,
            'Email': email
        })

# loop extract data 10 page
for page_number in range(1, 11):
    fetch_data_from_page(page_number)

# save to csv
csv_file = 'data_mitra_kemkes.csv'
csv_columns = ['Nama Mitra','Lokasi Mitra', 'Kategori Mitra', 'Nomor Handphone', 'Email']

try:
    with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
        for company in companies:
            writer.writerow(company)
    print(f"Data successfully written to {csv_file}")
except IOError:
    print("I/O error occurred")

Data successfully written to data_mitra_kemkes.csv


# base code

In [None]:
#base code
url = 'https://satusehat.kemkes.go.id/platform/system-rme-list'

response = requests.get(url)
content = response.content

soup = BeautifulSoup(content, 'html.parser')

#company list
companies = []

# li element contain class relative wfull
company_elements = soup.find_all('li', class_='relative w-full')

for company in company_elements:
    name = company.find('p', class_='manrope-14-600').text.strip() if company.find('p', class_='manrope-14-600') else 'N/A'
    category = company.find('p', class_='manrope-12-400').text.strip() if company.find('p', class_='manrope-12-400') else 'N/A'
    phone_element = company.find('a', href=lambda href: href and "tel:" in href)
    phone = phone_element.text.strip() if phone_element else 'N/A'
    email_element = company.find('a', href=lambda href: href and "mailto:" in href)
    email = email_element.text.strip() if email_element else 'N/A'

    companies.append({
        'Nama Perusahaan': name,
        'Kategori Perusahaan': category,
        'Nomor Handphone': phone,
        'Email': email
    })

#result
for company in companies:
    print(f"Nama Perusahaan: {company['Nama Perusahaan']}")
    print(f"Kategori Perusahaan: {company['Kategori Perusahaan']}")
    print(f"Nomor Handphone: {company['Nomor Handphone']}")
    print(f"Email: {company['Email']}")
    print('---')


Nama Perusahaan: BONA MEDIKA
Kategori Perusahaan: Klinik
Nomor Handphone: +628128941444
Email: admin@bonamedika.com
---
Nama Perusahaan: Doctor PHC
Kategori Perusahaan: Tempat Praktik Mandiri Tenaga Kesehatan
Nomor Handphone: +62818438043
Email: aplikasi.asb@gmail.com
---
Nama Perusahaan: praktech
Kategori Perusahaan: Tempat Praktik Mandiri Tenaga Kesehatan
Nomor Handphone: +6285725038849
Email: nurbektiaw@gmail.com
---
Nama Perusahaan: Pandora
Kategori Perusahaan: Rumah Sakit
Nomor Handphone: +6281387884881
Email: ptanaknegerimedikanusantara@gmail.com
---
Nama Perusahaan: LIS
Kategori Perusahaan: Tempat Praktik Mandiri Tenaga Kesehatan
Nomor Handphone: +6285855555322
Email: goteknol@gmail.com
---
Nama Perusahaan: rmc.muezzadev.com
Kategori Perusahaan: Klinik
Nomor Handphone: +62811666956
Email: admin@muezzadev.com
---
Nama Perusahaan: CLINFOS
Kategori Perusahaan: Klinik
Nomor Handphone: +627333286255
Email: ssecl.linggau@gmail.com
---
Nama Perusahaan: SIMRS Pandeka
Kategori Perusahaan