In [8]:
import json
import time
import requests
import pandas as pd
from datetime import datetime

class GoogleSearcher:
    def __init__(self, api_key):
        self.api_key = api_key
        
    def search_google(self, query, journal_info):
        """
        Melakukan pencarian di Google Search dengan filetype:pdf
        """
        search_query = f"{query} site:{journal_info['link']} filetype:pdf"
        
        params = {
            "engine": "google",
            "q": search_query,
            "hl": "id",
            "num": 20,
            "api_key": self.api_key
        }
        
        try:
            response = requests.get("https://serpapi.com/search", params=params)
            response.raise_for_status()
            results = response.json()
            
            articles = []
            if "organic_results" in results:
                for result in results["organic_results"]:
                    link = result.get('link', '')
                    # pastikan link berakhiran .pdf (opsional, bisa dilepas)
                    if not link.lower().endswith('.pdf'):
                        continue
                    
                    article = {
                        'judul': result.get('title', ''),
                        'link': link,
                        'snippet': result.get('snippet', ''),
                        'jurnal': journal_info['nama_jurnal'],
                        'peringkat_sinta': journal_info.get('sinta_rank', ''),
                        'website_jurnal': journal_info['link']
                    }
                    
                    articles.append(article)
            
            return articles
            
        except Exception as e:
            print(f"Error saat mencari di {journal_info['nama_jurnal']}: {str(e)}")
            return []

    def format_results(self, results_df):
        if len(results_df) == 0:
            return "Tidak ditemukan hasil yang sesuai."
            
        output = "=== HASIL PENCARIAN ===\n\n"
        for idx, row in results_df.iterrows():
            output += f"{idx+1}. {row['judul']}\n"
            output += f"   Jurnal: {row['jurnal']} ({row['peringkat_sinta']})\n"
            output += f"   Link: {row['link']}\n"
            if row['snippet']:
                output += f"   Ringkasan: {row['snippet']}\n"
            output += "-" * 80 + "\n"
        
        return output

def main():
    API_KEY = "d3d775e783819ad347d88e1f236ff3a8a6e883e171e836525df0bd7607bfe995"
    searcher = GoogleSearcher(API_KEY)
    
    with open('list_journal.json', 'r', encoding='utf-8') as f:
        journals_data = json.load(f)
    
    while True:
        print("\n=== PENCARIAN FILE PDF JURNAL INDONESIA (Google Search) ===")
        print("Ketik 'keluar' untuk mengakhiri program")
        
        topic = input("\nMasukkan judul atau tema kata kunci: ").strip()
        if topic.lower() == 'keluar':
            break
            
        sinta_rank = input("Peringkat SINTA (1/2/3, kosongkan untuk semua): ").strip()
        
        all_results = []
        processed_journals = 0
        
        for rank in journals_data:
            if rank.startswith("SINTA_"):
                if sinta_rank and rank != f"SINTA_{sinta_rank}":
                    continue
                    
                for journal in journals_data[rank]:
                    processed_journals += 1
                    print(f"\nMencari di {journal['nama_jurnal']} ({rank})...")
                    
                    journal['sinta_rank'] = rank
                    results = searcher.search_google(topic, journal)
                    all_results.extend(results)
                    
                    time.sleep(2)
        
        results_df = pd.DataFrame(all_results)
        
        if len(results_df) > 0:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f"hasil_pdf_google_{topic.replace(' ', '_')}_{timestamp}.csv"
            results_df.to_csv(filename, index=False)
            
            print(f"\nDitemukan {len(results_df)} file PDF dari {processed_journals} jurnal!")
            print(f"Hasil telah disimpan ke: {filename}")
            
            print("\n" + searcher.format_results(results_df))
        else:
            print("\nTidak ditemukan file PDF yang sesuai dengan kriteria pencarian.")
        
        if input("\nIngin mencari topik lain? (ya/tidak): ").lower() != 'ya':
            break

if __name__ == "__main__":
    main()



=== PENCARIAN FILE PDF JURNAL INDONESIA (Google Search) ===
Ketik 'keluar' untuk mengakhiri program

Mencari di Indonesian Journal of Electrical Engineering and Computer Science (IJEECS) (SINTA_1)...

Mencari di TELKOMNIKA (Telecommunication Computing Electronics and Control) (SINTA_1)...

Mencari di International Journal on Electrical Engineering and Informatics (IJEEI) (SINTA_1)...

Mencari di Journal of ICT Research and Applications (Institut Teknologi Bandung) (SINTA_1)...

Mencari di Indonesian Journal of Science & Technology (IJoST) (Universitas Pendidikan Indonesia) (SINTA_1)...

Ditemukan 10 file PDF dari 5 jurnal!
Hasil telah disimpan ke: hasil_pdf_google_machine_learning_20250501_173928.csv

=== HASIL PENCARIAN ===

1. A Novel Deep Learning Approach of Convolutional Neural ...
   Jurnal: International Journal on Electrical Engineering and Informatics (IJEEI) (SINTA_1)
   Link: http://ijeei.org/docs-2082378127617b973c95841.pdf
   Ringkasan: Abstract: Deep learning became more