In [27]:
from bs4 import BeautifulSoup
from time import sleep
import requests
import json
import re
import logging

# Mengatur logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# Header untuk user agent agar mirip dengan browser
user_agent = (
    'Mozilla/5.0 (X11; Linux x86_64) '
    'AppleWebKit/537.36 (KHTML, seperti Gecko) '
    'Chrome/106.0.0.0 Safari/537.36'
)

default_headers = {'User-Agent': user_agent}

def get_max_page() -> int:
    """ Mendapatkan jumlah maksimum halaman """
    session = requests.Session()
    response = session.get('https://pergikuliner.com/restaurants?utf8=%E2%9C%93&search_place=&default_search=Jakarta&search_name_cuisine=kafe&commit=')
    page = BeautifulSoup(response.text, 'html.parser')
    text = page.find('h2', {'id': 'top-total-search-view'})
    text = text.find('strong').text.split('dari')
    page_num = [int(t.strip()) for t in text]
    return int(page_num[1] / page_num[0])

def scrape_page(response):
    page = BeautifulSoup(response.text, 'html.parser')
    cards = page.find_all('div', class_='restaurant-result-wrapper')
    data = []
    for item in cards:
        # judul dan tautan
        title = item.find('h3').text.strip()
        link = 'https://pergikuliner.com' + item.find('a')['href']
        # jenis makanan dan lokasi
        description = item.find('div', class_='item-group').find('div').text.strip()
        description = description.split('|')
        if len(description) > 1:
            location = description[0].strip()
            cuisine = [i.strip() for i in description[1].split(',')]
        else:
            location = description
            cuisine = None
        # peringkat
        full_rate = item.find('div', class_='item-rating-result').find('small').text.strip()
        rate = item.find('div', class_='item-rating-result').text.replace(full_rate, '').strip()
        full_rate = full_rate.replace('/', '')
        # lokasi dan harga
        for p in item.find_all('p', class_='clearfix'):
            if 'icon-map' in p.find('i')['class']:
                place = p.find_all('span', class_='truncate')
                address = place[0].text.strip()
                street = place[1].text.strip()
            elif 'icon-price' in p.find('i')['class']:
                price_text = p.find('span').text.strip()
                if re.findall(r'-', price_text):
                    price_from = price_text.split('-')[0].strip()
                    price_till = price_text.split('-')[1].replace('/orang', '').strip()
                elif re.findall(r'Di atas', price_text):
                    price_from = price_text.replace('Di atas', '').replace('/orang', '').strip()
                    price_till = None
                elif re.findall(r'Di bawah', price_text):
                    price_from = 'Rp. 0'
                    price_till = price_text.replace('Di bawah', '').replace('/orang', '').strip()
                else:
                    logging.info(f"Kondisi lain pada harga")
                    price_from = price_text
                    price_till = price_text
            else:
                logging.info(f"Bagian lain di lokasi dan harga")

        item_data = {
            'title': title,
            'rate': rate,
            'cuisine': cuisine,
            'location': location,
            'address': address,
            'street': street,
            'price_from': price_from,
            'price_till': price_till,
            'url': link,
        }
        data.append(item_data)
    return data

def scrape_restaurant_details(link):
    """ Mengambil detail tambahan dari halaman restoran """
    session = requests.Session()
    session.headers.update(default_headers)
    try:
        response = session.get(link)
        if response.status_code == 200:
            page = BeautifulSoup(response.text, 'html.parser')
            additional_info = {}
            
            # Mengambil deskripsi restoran
            description = page.find('div', class_='restaurant-description')
            additional_info['description'] = description.text.strip() jika description else None
            
            # Mengambil jam buka
            opening_hours = page.find('time', itemprop='openingHours')
            additional_info['opening_hours'] = opening_hours.text.strip() jika opening_hours else None
            
            # Memeriksa ketersediaan Wi-Fi
            wifi_checkbox = page.find('input', {'type': 'checkbox', 'name': 'wifi'})
            additional_info['wifi_available'] = wifi_checkbox.get('checked') is not None jika wifi_checkbox else False
            
            # Memeriksa ketersediaan area merokok
            smoking_area_checkbox = page.find('input', {'type': 'checkbox', 'name': 'smoking_area'})
            additional_info['smoking_area_available'] = smoking_area_checkbox.get('checked') is not None jika smoking_area_checkbox else False
            
            # Memeriksa ketersediaan tempat duduk luar ruangan
            outdoor_seat_checkbox = page.find('input', {'type': 'checkbox', 'name': 'outdoor_seat'})
            additional_info['outdoor_seat_available'] = outdoor_seat_checkbox.get('checked') is not None jika outdoor_seat_checkbox else False
            
            # Memeriksa ketersediaan layanan full-time
            full_time_checkbox = page.find('input', {'type': 'checkbox', 'name': 'full_time'})
            additional_info['full_time_available'] = full_time_checkbox.get('checked') is not None jika full_time_checkbox else False
            
            # Memeriksa ketersediaan ruang VIP
            vip_room_checkbox = page.find('input', {'type': 'checkbox', 'name': 'vip_room'})
            additional_info['vip_room_available'] = vip_room_checkbox.get('checked') is not None jika vip_room_checkbox else False
            
            # Memeriksa ketersediaan reservasi
            reservation_checkbox = page.find('input', {'type': 'checkbox', 'name': 'reservation'})
            additional_info['reservation_available'] = reservation_checkbox.get('checked') is not None jika reservation_checkbox else False
            
            # Memeriksa ketersediaan area parkir
            parking_area_checkbox = page.find('input', {'type': 'checkbox', 'name': 'parking_area'})
            additional_info['parking_area_available'] = parking_area_checkbox.get('checked') is not None jika parking_area_checkbox else False
            
            # Mengambil review body untuk review_11, review_12, dan review_13
            for review_id in ['review_11', 'review_12', 'review_13']:
                review_body = page.find('div', {'id': review_id, 'itemprop': 'reviewBody'})
                additional_info[review_id] = review_body.text.strip() jika review_body else 'None'
            
            # Pastikan jika review_11 ada, review_12 dan review_13 di-set ke 'None' jika tidak ditemukan
            jika additional_info['review_11'] == 'None':
                additional_info['review_12'] = 'None'
                additional_info['review_13'] = 'None'
                
            return additional_info
        else:
            logging.warning(f"Gagal mengambil detail untuk {link} (Status Code: {response.status_code})")
            return {}
    except Exception as e:
        logging.error(f"Kesalahan mengambil detail untuk {link}: {e}")
        return {}

def crawl(npage=None):
    session = requests.Session()
    session.headers.update(default_headers)

    jika npage is None:
        npage = get_max_page() + 1
    
    data = []
    
    for n in range(1, npage):
        params = {'page': n}
        try:
            response = session.get('https://pergikuliner.com/restaurants?utf8=%E2%9C%93&search_place=&default_search=Jakarta&search_name_cuisine=kafe&commit=', params=params)
            logging.info(f"({response.status_code}) GET halaman {n}")
            page_data = scrape_page(response)
            for restaurant in page_data:
                # Mengambil detail tambahan untuk setiap restoran
                additional_data = scrape_restaurant_details(restaurant['url'])
                restaurant.update(additional_data)
            data += page_data
            sleep(1)
        except Exception as e:
            logging.error(f"Kesalahan pada {n}: {e}")
            pass
    return data

def save_data(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f)

if __name__ == '__main__':
    data = crawl()
    save_data(data, "Jakarta.json")


2024-10-19 09:15:51 [INFO] (200) GET page 1
2024-10-19 09:16:28 [INFO] (200) GET page 2
2024-10-19 09:17:03 [INFO] (200) GET page 3
2024-10-19 09:17:38 [INFO] (200) GET page 4
2024-10-19 09:18:13 [INFO] (200) GET page 5
2024-10-19 09:18:50 [INFO] (200) GET page 6
2024-10-19 09:19:24 [INFO] (200) GET page 7
2024-10-19 09:19:51 [INFO] (200) GET page 8
2024-10-19 09:20:22 [INFO] (200) GET page 9
2024-10-19 09:20:59 [INFO] (200) GET page 10
2024-10-19 09:21:27 [INFO] (200) GET page 11
2024-10-19 09:21:53 [INFO] (200) GET page 12
2024-10-19 09:22:27 [INFO] (200) GET page 13
2024-10-19 09:23:00 [INFO] (200) GET page 14
2024-10-19 09:23:31 [INFO] (200) GET page 15
2024-10-19 09:23:57 [INFO] (200) GET page 16
2024-10-19 09:24:34 [INFO] (200) GET page 17
2024-10-19 09:25:00 [INFO] (200) GET page 18
2024-10-19 09:25:27 [INFO] (200) GET page 19
2024-10-19 09:25:53 [INFO] (200) GET page 20
2024-10-19 09:26:19 [INFO] (200) GET page 21
2024-10-19 09:26:45 [INFO] (200) GET page 22
2024-10-19 09:27:17

In [28]:
import pandas as pd

# Load JSON file into DataFrame
df = pd.read_json('Jakarta.json')

# Save DataFrame to CSV
df.to_csv('Jakarta.csv', index=False)

In [29]:
from bs4 import BeautifulSoup
from time import sleep
import requests
import json
import re
import logging

# Mengatur logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# Header untuk user agent agar mirip dengan browser
user_agent = (
    'Mozilla/5.0 (X11; Linux x86_64) '
    'AppleWebKit/537.36 (KHTML, seperti Gecko) '
    'Chrome/106.0.0.0 Safari/537.36'
)

default_headers = {'User-Agent': user_agent}

def get_max_page() -> int:
    """ Mendapatkan jumlah maksimum halaman """
    session = requests.Session()
    response = session.get('https://pergikuliner.com/restaurants?utf8=%E2%9C%93&search_place=&default_search=Surabaya&search_name_cuisine=kafe&commit=')
    page = BeautifulSoup(response.text, 'html.parser')
    text = page.find('h2', {'id': 'top-total-search-view'})
    text = text.find('strong').text.split('dari')
    page_num = [int(t.strip()) for t in text]
    return int(page_num[1] / page_num[0])

def scrape_page(response):
    page = BeautifulSoup(response.text, 'html.parser')
    cards = page.find_all('div', class_='restaurant-result-wrapper')
    data = []
    for item in cards:
        # judul dan tautan
        title = item.find('h3').text.strip()
        link = 'https://pergikuliner.com' + item.find('a')['href']
        # jenis makanan dan lokasi
        description = item.find('div', class_='item-group').find('div').text.strip()
        description = description.split('|')
        if len(description) > 1:
            location = description[0].strip()
            cuisine = [i.strip() for i in description[1].split(',')]
        else:
            location = description
            cuisine = None
        # peringkat
        full_rate = item.find('div', class_='item-rating-result').find('small').text.strip()
        rate = item.find('div', class_='item-rating-result').text.replace(full_rate, '').strip()
        full_rate = full_rate.replace('/', '')
        # lokasi dan harga
        for p in item.find_all('p', class_='clearfix'):
            if 'icon-map' in p.find('i')['class']:
                place = p.find_all('span', class_='truncate')
                address = place[0].text.strip()
                street = place[1].text.strip()
            elif 'icon-price' in p.find('i')['class']:
                price_text = p.find('span').text.strip()
                if re.findall(r'-', price_text):
                    price_from = price_text.split('-')[0].strip()
                    price_till = price_text.split('-')[1].replace('/orang', '').strip()
                elif re.findall(r'Di atas', price_text):
                    price_from = price_text.replace('Di atas', '').replace('/orang', '').strip()
                    price_till = None
                elif re.findall(r'Di bawah', price_text):
                    price_from = 'Rp. 0'
                    price_till = price_text.replace('Di bawah', '').replace('/orang', '').strip()
                else:
                    logging.info(f"Kondisi lain pada harga")
                    price_from = price_text
                    price_till = price_text
            else:
                logging.info(f"Bagian lain di lokasi dan harga")

        item_data = {
            'title': title,
            'rate': rate,
            'cuisine': cuisine,
            'location': location,
            'address': address,
            'street': street,
            'price_from': price_from,
            'price_till': price_till,
            'url': link,
        }
        data.append(item_data)
    return data

def scrape_restaurant_details(link):
    """ Mengambil detail tambahan dari halaman restoran """
    session = requests.Session()
    session.headers.update(default_headers)
    try:
        response = session.get(link)
        if response.status_code == 200:
            page = BeautifulSoup(response.text, 'html.parser')
            additional_info = {}
            
            # Mengambil deskripsi restoran
            description = page.find('div', class_='restaurant-description')
            additional_info['description'] = description.text.strip() jika description else None
            
            # Mengambil jam buka
            opening_hours = page.find('time', itemprop='openingHours')
            additional_info['opening_hours'] = opening_hours.text.strip() jika opening_hours else None
            
            # Memeriksa ketersediaan Wi-Fi
            wifi_checkbox = page.find('input', {'type': 'checkbox', 'name': 'wifi'})
            additional_info['wifi_available'] = wifi_checkbox.get('checked') is not None jika wifi_checkbox else False
            
            # Memeriksa ketersediaan area merokok
            smoking_area_checkbox = page.find('input', {'type': 'checkbox', 'name': 'smoking_area'})
            additional_info['smoking_area_available'] = smoking_area_checkbox.get('checked') is not None jika smoking_area_checkbox else False
            
            # Memeriksa ketersediaan tempat duduk luar ruangan
            outdoor_seat_checkbox = page.find('input', {'type': 'checkbox', 'name': 'outdoor_seat'})
            additional_info['outdoor_seat_available'] = outdoor_seat_checkbox.get('checked') is not None jika outdoor_seat_checkbox else False
            
            # Memeriksa ketersediaan layanan full-time
            full_time_checkbox = page.find('input', {'type': 'checkbox', 'name': 'full_time'})
            additional_info['full_time_available'] = full_time_checkbox.get('checked') is not None jika full_time_checkbox else False
            
            # Memeriksa ketersediaan ruang VIP
            vip_room_checkbox = page.find('input', {'type': 'checkbox', 'name': 'vip_room'})
            additional_info['vip_room_available'] = vip_room_checkbox.get('checked') is not None jika vip_room_checkbox else False
            
            # Memeriksa ketersediaan reservasi
            reservation_checkbox = page.find('input', {'type': 'checkbox', 'name': 'reservation'})
            additional_info['reservation_available'] = reservation_checkbox.get('checked') is not None jika reservation_checkbox else False
            
            # Memeriksa ketersediaan area parkir
            parking_area_checkbox = page.find('input', {'type': 'checkbox', 'name': 'parking_area'})
            additional_info['parking_area_available'] = parking_area_checkbox.get('checked') is not None jika parking_area_checkbox else False
            
            # Mengambil review body untuk review_11, review_12, dan review_13
            for review_id in ['review_11', 'review_12', 'review_13']:
                review_body = page.find('div', {'id': review_id, 'itemprop': 'reviewBody'})
                additional_info[review_id] = review_body.text.strip() jika review_body else 'None'
            
            # Pastikan jika review_11 ada, review_12 dan review_13 di-set ke 'None' jika tidak ditemukan
            jika additional_info['review_11'] == 'None':
                additional_info['review_12'] = 'None'
                additional_info['review_13'] = 'None'
                
            return additional_info
        else:
            logging.warning(f"Gagal mengambil detail untuk {link} (Status Code: {response.status_code})")
            return {}
    except Exception as e:
        logging.error(f"Kesalahan mengambil detail untuk {link}: {e}")
        return {}

def crawl(npage=None):
    session = requests.Session()
    session.headers.update(default_headers)

    jika npage is None:
        npage = get_max_page() + 1
    
    data = []
    
    for n in range(1, npage):
        params = {'page': n}
        try:
            response = session.get('https://pergikuliner.com/restaurants?utf8=%E2%9C%93&search_place=&default_search=Surabaya&search_name_cuisine=kafe&commit=', params=params)
            logging.info(f"({response.status_code}) GET halaman {n}")
            page_data = scrape_page(response)
            for restaurant in page_data:
                # Mengambil detail tambahan untuk setiap restoran
                additional_data = scrape_restaurant_details(restaurant['url'])
                restaurant.update(additional_data)
            data += page_data
            sleep(1)
        except Exception as e:
            logging.error(f"Kesalahan pada {n}: {e}")
            pass
    return data

def save_data(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f)

if __name__ == '__main__':
    data = crawl()
    save_data(data, "Surabaya.json")



2024-10-19 10:15:16 [INFO] (200) GET page 1
2024-10-19 10:15:34 [INFO] (200) GET page 2
2024-10-19 10:15:55 [INFO] (200) GET page 3
2024-10-19 10:16:17 [INFO] (200) GET page 4
2024-10-19 10:16:35 [INFO] (200) GET page 5
2024-10-19 10:16:51 [INFO] (200) GET page 6
2024-10-19 10:17:11 [INFO] (200) GET page 7
2024-10-19 10:17:29 [INFO] (200) GET page 8
2024-10-19 10:17:45 [INFO] (200) GET page 9
2024-10-19 10:18:06 [INFO] (200) GET page 10
2024-10-19 10:18:18 [INFO] (200) GET page 11
2024-10-19 10:18:35 [INFO] (200) GET page 12
2024-10-19 10:18:51 [INFO] (200) GET page 13
2024-10-19 10:19:05 [INFO] (200) GET page 14
2024-10-19 10:19:23 [INFO] (200) GET page 15
2024-10-19 10:19:37 [INFO] (200) GET page 16
2024-10-19 10:19:53 [INFO] (200) GET page 17
2024-10-19 10:20:12 [INFO] (200) GET page 18
2024-10-19 10:20:28 [INFO] (200) GET page 19
2024-10-19 10:20:39 [INFO] (200) GET page 20
2024-10-19 10:20:52 [INFO] (200) GET page 21
2024-10-19 10:21:06 [INFO] (200) GET page 22
2024-10-19 10:21:23

In [30]:
import pandas as pd

# Load JSON file into DataFrame
df = pd.read_json('surabaya.json')

# Save DataFrame to CSV
df.to_csv('surabaya.csv', index=False)

In [31]:
from bs4 import BeautifulSoup
from time import sleep
import requests
import json
import re
import logging

# Mengatur logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# Header untuk user agent agar mirip dengan browser
user_agent = (
    'Mozilla/5.0 (X11; Linux x86_64) '
    'AppleWebKit/537.36 (KHTML, seperti Gecko) '
    'Chrome/106.0.0.0 Safari/537.36'
)

default_headers = {'User-Agent': user_agent}

def get_max_page() -> int:
    """ Mendapatkan jumlah maksimum halaman """
    session = requests.Session()
    response = session.get('https://pergikuliner.com/restaurants?utf8=%E2%9C%93&search_place=&default_search=Bandung&search_name_cuisine=kafe&commit=')
    page = BeautifulSoup(response.text, 'html.parser')
    text = page.find('h2', {'id': 'top-total-search-view'})
    text = text.find('strong').text.split('dari')
    page_num = [int(t.strip()) for t in text]
    return int(page_num[1] / page_num[0])

def scrape_page(response):
    page = BeautifulSoup(response.text, 'html.parser')
    cards = page.find_all('div', class_='restaurant-result-wrapper')
    data = []
    for item in cards:
        # judul dan tautan
        title = item.find('h3').text.strip()
        link = 'https://pergikuliner.com' + item.find('a')['href']
        # jenis makanan dan lokasi
        description = item.find('div', class_='item-group').find('div').text.strip()
        description = description.split('|')
        if len(description) > 1:
            location = description[0].strip()
            cuisine = [i.strip() for i in description[1].split(',')]
        else:
            location = description
            cuisine = None
        # peringkat
        full_rate = item.find('div', class_='item-rating-result').find('small').text.strip()
        rate = item.find('div', class_='item-rating-result').text.replace(full_rate, '').strip()
        full_rate = full_rate.replace('/', '')
        # lokasi dan harga
        for p in item.find_all('p', class_='clearfix'):
            if 'icon-map' in p.find('i')['class']:
                place = p.find_all('span', class_='truncate')
                address = place[0].text.strip()
                street = place[1].text.strip()
            elif 'icon-price' in p.find('i')['class']:
                price_text = p.find('span').text.strip()
                if re.findall(r'-', price_text):
                    price_from = price_text.split('-')[0].strip()
                    price_till = price_text.split('-')[1].replace('/orang', '').strip()
                elif re.findall(r'Di atas', price_text):
                    price_from = price_text.replace('Di atas', '').replace('/orang', '').strip()
                    price_till = None
                elif re.findall(r'Di bawah', price_text):
                    price_from = 'Rp. 0'
                    price_till = price_text.replace('Di bawah', '').replace('/orang', '').strip()
                else:
                    logging.info(f"Kondisi lain pada harga")
                    price_from = price_text
                    price_till = price_text
            else:
                logging.info(f"Bagian lain di lokasi dan harga")

        item_data = {
            'title': title,
            'rate': rate,
            'cuisine': cuisine,
            'location': location,
            'address': address,
            'street': street,
            'price_from': price_from,
            'price_till': price_till,
            'url': link,
        }
        data.append(item_data)
    return data

def scrape_restaurant_details(link):
    """ Mengambil detail tambahan dari halaman restoran """
    session = requests.Session()
    session.headers.update(default_headers)
    try:
        response = session.get(link)
        if response.status_code == 200:
            page = BeautifulSoup(response.text, 'html.parser')
            additional_info = {}
            
            # Mengambil deskripsi restoran
            description = page.find('div', class_='restaurant-description')
            additional_info['description'] = description.text.strip() jika description else None
            
            # Mengambil jam buka
            opening_hours = page.find('time', itemprop='openingHours')
            additional_info['opening_hours'] = opening_hours.text.strip() jika opening_hours else None
            
            # Memeriksa ketersediaan Wi-Fi
            wifi_checkbox = page.find('input', {'type': 'checkbox', 'name': 'wifi'})
            additional_info['wifi_available'] = wifi_checkbox.get('checked') is not None jika wifi_checkbox else False
            
            # Memeriksa ketersediaan area merokok
            smoking_area_checkbox = page.find('input', {'type': 'checkbox', 'name': 'smoking_area'})
            additional_info['smoking_area_available'] = smoking_area_checkbox.get('checked') is not None jika smoking_area_checkbox else False
            
            # Memeriksa ketersediaan tempat duduk luar ruangan
            outdoor_seat_checkbox = page.find('input', {'type': 'checkbox', 'name': 'outdoor_seat'})
            additional_info['outdoor_seat_available'] = outdoor_seat_checkbox.get('checked') is not None jika outdoor_seat_checkbox else False
            
            # Memeriksa ketersediaan layanan full-time
            full_time_checkbox = page.find('input', {'type': 'checkbox', 'name': 'full_time'})
            additional_info['full_time_available'] = full_time_checkbox.get('checked') is not None jika full_time_checkbox else False
            
            # Memeriksa ketersediaan ruang VIP
            vip_room_checkbox = page.find('input', {'type': 'checkbox', 'name': 'vip_room'})
            additional_info['vip_room_available'] = vip_room_checkbox.get('checked') is not None jika vip_room_checkbox else False
            
            # Memeriksa ketersediaan reservasi
            reservation_checkbox = page.find('input', {'type': 'checkbox', 'name': 'reservation'})
            additional_info['reservation_available'] = reservation_checkbox.get('checked') is not None jika reservation_checkbox else False
            
            # Memeriksa ketersediaan area parkir
            parking_area_checkbox = page.find('input', {'type': 'checkbox', 'name': 'parking_area'})
            additional_info['parking_area_available'] = parking_area_checkbox.get('checked') is not None jika parking_area_checkbox else False
            
            # Mengambil review body untuk review_11, review_12, dan review_13
            for review_id in ['review_11', 'review_12', 'review_13']:
                review_body = page.find('div', {'id': review_id, 'itemprop': 'reviewBody'})
                additional_info[review_id] = review_body.text.strip() jika review_body else 'None'
            
            # Pastikan jika review_11 ada, review_12 dan review_13 di-set ke 'None' jika tidak ditemukan
            jika additional_info['review_11'] == 'None':
                additional_info['review_12'] = 'None'
                additional_info['review_13'] = 'None'
                
            return additional_info
        else:
            logging.warning(f"Gagal mengambil detail untuk {link} (Status Code: {response.status_code})")
            return {}
    except Exception as e:
        logging.error(f"Kesalahan mengambil detail untuk {link}: {e}")
        return {}

def crawl(npage=None):
    session = requests.Session()
    session.headers.update(default_headers)

    jika npage is None:
        npage = get_max_page() + 1
    
    data = []
    
    for n in range(1, npage):
        params = {'page': n}
        try:
            response = session.get('https://pergikuliner.com/restaurants?utf8=%E2%9C%93&search_place=&default_search=Bandung&search_name_cuisine=kafe&commit=', params=params)
            logging.info(f"({response.status_code}) GET halaman {n}")
            page_data = scrape_page(response)
            for restaurant in page_data:
                # Mengambil detail tambahan untuk setiap restoran
                additional_data = scrape_restaurant_details(restaurant['url'])
                restaurant.update(additional_data)
            data += page_data
            sleep(1)
        except Exception as e:
            logging.error(f"Kesalahan pada {n}: {e}")
            pass
    return data

def save_data(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f)

if __name__ == '__main__':
    data = crawl()
    save_data(data, "Bandung.json")



2024-10-19 10:35:14 [INFO] (200) GET page 1
2024-10-19 10:35:44 [INFO] (200) GET page 2
2024-10-19 10:36:30 [INFO] (200) GET page 3
2024-10-19 10:36:58 [INFO] (200) GET page 4
2024-10-19 10:37:25 [INFO] (200) GET page 5
2024-10-19 10:37:50 [INFO] (200) GET page 6
2024-10-19 10:38:35 [INFO] (200) GET page 7
2024-10-19 10:39:01 [INFO] (200) GET page 8
2024-10-19 10:39:41 [INFO] (200) GET page 9
2024-10-19 10:40:07 [INFO] (200) GET page 10
2024-10-19 10:40:29 [INFO] (200) GET page 11
2024-10-19 10:40:52 [INFO] (200) GET page 12
2024-10-19 10:41:12 [INFO] (200) GET page 13
2024-10-19 10:41:35 [INFO] (200) GET page 14
2024-10-19 10:41:58 [INFO] (200) GET page 15
2024-10-19 10:42:23 [INFO] (200) GET page 16
2024-10-19 10:42:45 [INFO] (200) GET page 17
2024-10-19 10:43:05 [INFO] (200) GET page 18
2024-10-19 10:43:36 [INFO] (200) GET page 19
2024-10-19 10:44:08 [INFO] (200) GET page 20
2024-10-19 10:44:30 [INFO] (200) GET page 21
2024-10-19 10:44:55 [INFO] (200) GET page 22
2024-10-19 10:45:21

In [32]:
import pandas as pd

# Load JSON file into DataFrame
df = pd.read_json('Bandung.json')

# Save DataFrame to CSV
df.to_csv('Bandung.csv', index=False)