In [None]:
# Just One time. but after install this library you have to restartt the kernel
#!pip install beautifulsoup4 serpapi requests dotenv google-search-results

In [4]:
import os
import requests
import time
from serpapi import GoogleSearch # Import the new library
from dotenv import load_dotenv
import json

In [5]:
# --- Configuration ---
TARGET_DISHES = [
    'nasi goreng spesial', 
    'nasi padang rendang', 
    'nasi lalapan ayam goreng'
]
SAVE_DIRECTORY = '../data/raw/'
IMAGES_TO_DOWNLOAD = 100

# IMPORTANT: Get your free API key from https://serpapi.com/
# The script will look for this environment variable.
load_dotenv('../.env')
SERPAPI_API_KEY = os.getenv('SERPAPI_API_KEY')


In [6]:
# --- Main Scraper Logic ---
def scrape_food_images(dish_name, num_images, api_key):
    if not api_key:
        print("ERROR: SerpApi API key not found. Please set the SERPAPI_API_KEY environment variable.")
        return

    print(f"--- Starting scrape for: {dish_name} ---")
    dish_folder = os.path.join(SAVE_DIRECTORY, dish_name.replace(' ', '_'))
    os.makedirs(dish_folder, exist_ok=True)
    print(f"Saving images to: {dish_folder}")

    params = {
        "engine": "google_images",
        "q": f"{dish_name} food photography",
        "api_key": api_key,
        "num": num_images
    }

    try:
        search = GoogleSearch(params)
        results = search.get_dict()
        image_results = results.get("images_results", [])
    except Exception as e:
        print(f"Error fetching results from SerpApi: {e}")
        return

    if not image_results:
        print(f"No image results found for {dish_name}.")
        return

    downloaded_count = 0
    for i, image_info in enumerate(image_results):
        if downloaded_count >= num_images:
            break
        try:
            img_url = image_info.get('original')
            if not img_url:
                continue
            img_data = requests.get(img_url, timeout=10).content
            file_name = f"{dish_name.replace(' ', '_')}_{downloaded_count + 1}.jpg"
            file_path = os.path.join(dish_folder, file_name)
            with open(file_path, 'wb') as f:
                f.write(img_data)
            downloaded_count += 1
            print(f"Downloaded {file_name}")
            time.sleep(0.1)
        except Exception as e:
            print(f"Could not download an image from {image_info.get('link')}. Error: {e}")
    print(f"--- Finished scrape for {dish_name}. Downloaded {downloaded_count} images. ---\n")


In [7]:
# --- Execution ---
if not SERPAPI_API_KEY:
    print("FATAL ERROR: SERPAPI_API_KEY environment variable is not set.")
    print("Please get a free key from https://serpapi.com/ and set it in the cell above this one.")
else:
    os.makedirs(SAVE_DIRECTORY, exist_ok=True)
    for dish in TARGET_DISHES:
        scrape_food_images(dish, IMAGES_TO_DOWNLOAD, SERPAPI_API_KEY)
        time.sleep(1)
    print("All scraping tasks completed! 🎉")

--- Starting scrape for: nasi goreng spesial ---
Saving images to: ../data/raw/nasi_goreng_spesial
Downloaded nasi_goreng_spesial_1.jpg
Downloaded nasi_goreng_spesial_2.jpg
Downloaded nasi_goreng_spesial_3.jpg
Downloaded nasi_goreng_spesial_4.jpg
Downloaded nasi_goreng_spesial_5.jpg
Downloaded nasi_goreng_spesial_6.jpg
Downloaded nasi_goreng_spesial_7.jpg
Downloaded nasi_goreng_spesial_8.jpg
Downloaded nasi_goreng_spesial_9.jpg
Downloaded nasi_goreng_spesial_10.jpg
Downloaded nasi_goreng_spesial_11.jpg
Downloaded nasi_goreng_spesial_12.jpg
Downloaded nasi_goreng_spesial_13.jpg
Downloaded nasi_goreng_spesial_14.jpg
Downloaded nasi_goreng_spesial_15.jpg
Downloaded nasi_goreng_spesial_16.jpg
Downloaded nasi_goreng_spesial_17.jpg
Downloaded nasi_goreng_spesial_18.jpg
Downloaded nasi_goreng_spesial_19.jpg
Downloaded nasi_goreng_spesial_20.jpg
Downloaded nasi_goreng_spesial_21.jpg
Downloaded nasi_goreng_spesial_22.jpg
Downloaded nasi_goreng_spesial_23.jpg
Downloaded nasi_goreng_spesial_24.jp