In [10]:
from selenium import webdriver
from selenium.common import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd

# Setup browser
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)

# Nama penulis dan afiliasi
first_name = "Kusman"
last_name = "Sadik"
affilitate = "IPB University"
affilitate_full = "Institut Pertanian Bogor"

# List untuk menyimpan data
results = []

def scrape_google_scholar():
    driver.get("https://scholar.google.com")

    # Input pencarian
    driver.find_element(By.ID, "gs_hdr_tsi").send_keys(f"{first_name} {last_name} {affilitate}")
    driver.find_element(By.ID, "gs_hdr_tsb").click()

    # Klik profil penulis
    user_element = WebDriverWait(driver, 5).until(
        EC.visibility_of_any_elements_located((By.CLASS_NAME, "gs_rt2"))
    )[0]
    driver.get(user_element.find_element(By.TAG_NAME, "a").get_attribute("href"))

    # Load semua publikasi
    while True:
        try:
            load_more_button = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.ID, "gsc_bpf_more"))
            )

            # jika tombol tidak aktif, berhenti
            if not load_more_button.is_enabled():
                break

            # cek apakah tombol benar-benar bisa diklik
            WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.ID, "gsc_bpf_more"))
            )
            load_more_button.click()

            # beri jeda agar tidak bentrok saat loading
            WebDriverWait(driver, 2).until(
                EC.invisibility_of_element_located((By.CLASS_NAME, "gs_md_d"))
            )

        except (NoSuchElementException, ElementClickInterceptedException):
            break


    # Ekstrak data publikasi
    publications = driver.find_elements(By.CLASS_NAME, "gsc_a_tr")
    for publication in publications:
        title_element = publication.find_element(By.CLASS_NAME, "gsc_a_t")
        title = title_element.find_element(By.CLASS_NAME, "gsc_a_at").text.replace(';', ',')
        cited_count = publication.find_element(By.CLASS_NAME, "gsc_a_ac").text.replace(';', ',')
        year = publication.find_element(By.CLASS_NAME, "gsc_a_h").text.replace(';', ',')

        descriptions = title_element.find_elements(By.TAG_NAME, "div")
        author = descriptions[0].text.replace(';', ',') if len(descriptions) > 0 else ""
        journal_name = descriptions[1].text.replace(';', ',') if len(descriptions) > 1 else ""

        results.append({
            "source": "Google Scholar",
            "type": "Article",
            "title": title,
            "author": author,
            "publisher": journal_name,
            "year": year,
            "quartile": "N/A",
            "cited": cited_count,
            "is_success": "N/A",
            "info": "N/A"
        })

# Jalankan scraping
scrape_google_scholar()
driver.quit()

# Simpan ke Excel
df = pd.DataFrame(results)
output_path = "schoolar.xlsx"
df.to_excel(output_path, index=False)

print(f"✅ Data berhasil disimpan di: {output_path}")

✅ Data berhasil disimpan di: schoolar.xlsx
