<a href="https://colab.research.google.com/github/abdfajar/republika_sentiner/blob/main/Republika_allsearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import gradio as gr
import re
import pandas as pd
import json
from datetime import datetime
from urllib.parse import urljoin, quote_plus
import hashlib

def generate_search_id(keyword, startdate, enddate, page):
    """Generate unique search_id based on inputs"""
    input_str = f"{keyword}_{startdate}_{enddate}_{page}"
    return hashlib.md5(input_str.encode()).hexdigest()[:16]

def scrape_republika_search(keyword, startdate, enddate, page=1):
    """
    Scrape search results from Republika.co.id
    """
    try:
        # Build search URL
        q = quote_plus(keyword)
        url = f"https://republika.co.id/search/v3/all/{page}/?q={q}&latest_date=custom&startdate={startdate}&enddate={enddate}"

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        }

        print(f"üîç Scraping: {url}")
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')

        # Target selector based on user description
        selector = "#search > div.main-wrapper > main > div.main-content > div.container > div.results-section"
        results_section = soup.select_one(selector)

        if not results_section:
            # Fallback selectors
            fallback_selectors = [
                'div.results-section',
                '.results-section',
                'main div.container div[class*="result"]',
                '.search-results'
            ]
            for sel in fallback_selectors:
                results_section = soup.select_one(sel)
                if results_section:
                    print(f"‚úÖ Found results with fallback: {sel}")
                    break

        if not results_section:
            return [], "‚ùå Results section not found. Structure may have changed."

        # Extract individual results - assume common patterns in Republika
        # Look for article cards: div with h2/h3 + date + a href
        results_list = []

        # Possible item selectors (adapt based on common Republika structure)
        item_selectors = [
            'div[class*="card"] a',
            'article a',
            '.search-item',
            '.result-item',
            'div.max-card'
        ]

        items = []
        for sel in item_selectors:
            items = results_section.select(sel)
            if items:
                print(f"‚úÖ Found {len(items)} items with selector: {sel}")
                break
        else:
            # Ultimate fallback: find all a with href containing /berita/ or similar
            items = results_section.find_all('a', href=re.compile(r'/berita/|/reads/'))

        for item in items[:20]:  # Limit to top 20 per page
            # Extract title (h1-h4 or text in a)
            title_elem = item.find(['h1', 'h2', 'h3', 'h4']) or item
            title = title_elem.get_text(strip=True)
            if not title or len(title) < 10:
                continue

            # Extract date (look for patterns like "DD Month YYYY, HH:MM")
            date_elem = item.find(class_=re.compile(r'date|time')) or item.find('span')
            date_text = date_elem.get_text(strip=True) if date_elem else ""
            date_match = re.search(r'(\d{1,2}\s+\w+\s+\d{4},\s+\d{1,2}:\d{2})', date_text)
            date = date_match.group(1) if date_match else "Date not found"

            # Extract URL
            href = item.get('href', '')
            if href.startswith('/'):
                full_url = urljoin("https://republika.co.id", href)
            else:
                full_url = href

            results_list.append({
                'title': title[:200],  # Truncate long titles
                'date': date,
                'url': full_url
            })

        status = f"‚úÖ Found {len(results_list)} results on page {page}"
        return results_list, status

    except Exception as e:
        return [], f"‚ùå Error: {str(e)}"

def process_republika_search(keyword, startdate, enddate, page):
    """
    Main processing function for Gradio
    """
    if not keyword.strip():
        return "‚ùå Masukkan keyword pencarian!", pd.DataFrame(), pd.DataFrame()

    # Format dates if provided
    if startdate:
        startdate_str = startdate.strftime('%Y-%m-%d')
    else:
        startdate_str = '2025-10-01'

    if enddate:
        enddate_str = enddate.strftime('%Y-%m-%d')
    else:
        enddate_str = '2025-10-31'

    results_list, status = scrape_republika_search(keyword, startdate_str, enddate_str, page)

    if not results_list:
        return status, pd.DataFrame(), pd.DataFrame()

    # Create KeywordSearchResult schema
    search_id = generate_search_id(keyword, startdate_str, enddate_str, page)
    timestamp_search = datetime.now().isoformat()
    num_results = len(results_list)
    results_json = json.dumps(results_list, ensure_ascii=False)

    df_keyword_search = pd.DataFrame([{
        'search_id': search_id,
        'keyword': keyword,
        'source_type': 'Republika Search',
        'num_results': num_results,
        'results': results_json[:1000] + "..." if len(results_json) > 1000 else results_json,  # Truncate for display
        'timestamp_search': timestamp_search
    }])

    # Results table
    df_results = pd.DataFrame(results_list)

    output_msg = f"""üîç **HASIL PENCARIAN REPUBLIKA.CO.ID**
üìù **Keyword:** {keyword}
üìÖ **Periode:** {startdate_str} s.d. {enddate_str}
üìÑ **Halaman:** {page}
{status}

**üíæ Search ID:** `{search_id}`

**üì§ Export CSV:**
- Klik tombol di bawah untuk download `keyword_search.csv` dan `search_results.csv`"""

    return output_msg, df_results, df_keyword_search

# Gradio Interface
def create_search_interface():
    with gr.Blocks(
        title="üîç Republika.co.id Search Scraper - KeywordSearchResult",
        theme=gr.themes.Soft(),
        css="""
        .search-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); }
        """
    ) as demo:
        gr.Markdown("""
        # üîç **Republika.co.id Search Scraper**
        **Ekstrak hasil pencarian berdasarkan keyword ke dalam skema KeywordSearchResult**

        **Fitur:**
        - ‚úÖ Scraping multi-halaman
        - ‚úÖ Ekstraksi otomatis title, date, URL
        - ‚úÖ Generate Search ID unik
        - ‚úÖ Export ke CSV sesuai skema
        """)

        with gr.Row():
            with gr.Column(scale=1):
                keyword_input = gr.Textbox(
                    label="üîë Keyword Pencarian",
                    placeholder="e.g., MBG",
                    value="MBG"
                )
                startdate_input = gr.Textbox(
                    label="üìÖ Tanggal Mulai (YYYY-MM-DD)",
                    placeholder="2025-10-01",
                    value="2025-10-01"
                )
                enddate_input = gr.Textbox(
                    label="üìÖ Tanggal Selesai (YYYY-MM-DD)",
                    placeholder="2025-10-31",
                    value="2025-10-31"
                )
                page_input = gr.Number(
                    label="üìÑ Nomor Halaman",
                    value=1,
                    minimum=1,
                    maximum=50
                )
                search_btn = gr.Button("üöÄ Cari & Scrap", variant="primary", size="lg")

        with gr.Row():
            output_msg = gr.Markdown(label="üìä Status & Info")

        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("### üìã Tabel Hasil Pencarian")
                results_table = gr.Dataframe(
                    label="Results",
                    headers=["title", "date", "url"],
                    wrap=True,
                    interactive=False
                )
            with gr.Column(scale=1):
                gr.Markdown("### üíæ Skema KeywordSearchResult")
                schema_table = gr.Dataframe(
                    label="Keyword Search Data",
                    interactive=False
                )

        # Export buttons
        with gr.Row():
            export_search = gr.File(
                label="üíæ Download keyword_search.csv",
                visible=False
            )
            export_results = gr.File(
                label="üíæ Download search_results.csv",
                visible=False
            )

        # Events
        def parse_dates(startdate_str, enddate_str):
            try:
                startdate = datetime.strptime(startdate_str, '%Y-%m-%d') if startdate_str else None
                enddate = datetime.strptime(enddate_str, '%Y-%m-%d') if enddate_str else None
                return startdate, enddate
            except ValueError:
                raise gr.Error("Format tanggal harus YYYY-MM-DD!")

        def export_csv(df_keyword, df_results):
            search_csv = df_keyword.to_csv(index=False)
            results_csv = df_results.to_csv(index=False)
            return search_csv.encode(), results_csv.encode()

        search_btn.click(
            fn=lambda k, s, e, p: process_republika_search(
                k,
                parse_dates(s, e)[0],
                parse_dates(s, e)[1],
                p
            ) if s and e else process_republika_search(k, None, None, p),
            inputs=[keyword_input, startdate_input, enddate_input, page_input],
            outputs=[output_msg, results_table, schema_table]
        ).then(
            fn=export_csv,
            inputs=[schema_table, results_table],
            outputs=[export_search, export_results]
        )

        gr.Examples(
            examples=[
                ["MBG", "2025-10-01", "2025-10-31", 1],
                ["Prabowo", "2025-09-01", "2025-09-30", 2],
            ],
            inputs=[keyword_input, startdate_input, enddate_input, page_input]
        )

    return demo

if __name__ == "__main__":
    print("üöÄ Starting Republika Search Scraper...")
    demo = create_search_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7861,
        share=False,
        inbrowser=True
    )

üöÄ Starting Republika Search Scraper...


  with gr.Blocks(
  with gr.Blocks(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

In [2]:
import requests
from bs4 import BeautifulSoup
import gradio as gr
import re
import pandas as pd
import json
from datetime import datetime
from urllib.parse import urljoin, quote_plus
import hashlib
import time
import os

# ==================== FUNGSI SCRAPING ARTIKEL ====================
def clean_text(text):
    """Membersihkan teks dari karakter tidak diinginkan"""
    if not text:
        return ""
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^\w\s.,!?;:()\-]', '', text)
    return text.strip()

def extract_text_from_element(element):
    """Ekstrak teks dari elemen dengan pembersihan"""
    if not element:
        return ""
    element_copy = BeautifulSoup(str(element), 'html.parser')
    for unwanted in element_copy(['script', 'style', 'nav', 'header', 'footer', 'aside', 'figure', 'img', 'video', 'blockquote']):
        unwanted.decompose()
    text = element_copy.get_text(separator='\n', strip=True)
    return clean_text(text)

def extract_republika_article(url):
    """
    Fungsi utama untuk scraping artikel Republika.co.id
    """
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        }
        print(f"üîç Mengakses URL: {url}")
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        metadata = {
            'judul': '',
            'waktu_terbit': '',
            'editor': '',
            'konten': '',
            'url': url,
            'panjang_konten': 0
        }
        main_content = soup.find('div', class_='main-content__left')
        if not main_content:
            return None, "Struktur halaman tidak dikenali. Tidak ditemukan div.main-content__left"
        title_div = main_content.find('div', class_='max-card__title')
        if title_div:
            title_h1 = title_div.find('h1')
            metadata['judul'] = clean_text(title_h1.get_text()) if title_h1 else "Judul tidak ditemukan"
        else:
            title_h1 = main_content.find('h1')
            metadata['judul'] = clean_text(title_h1.get_text()) if title_h1 else "Judul tidak ditemukan"
        date_element = main_content.find('div', class_='date date-item__headline')
        if date_element:
            date_text = clean_text(date_element.get_text())
            date_patterns = [
                r'(\d{1,2}\s+\w+\s+\d{4})\s+(\d{1,2}:\d{2})',
                r'(\d{1,2}/\d{1,2}/\d{4})\s+(\d{1,2}:\d{2})',
                r'(\d{1,2}\s+\w+\s+\d{4})',
            ]
            for pattern in date_patterns:
                match = re.search(pattern, date_text)
                if match:
                    if len(match.groups()) == 2:
                        metadata['waktu_terbit'] = f"{match.group(1)} {match.group(2)} WIB"
                    else:
                        metadata['waktu_terbit'] = f"{match.group(1)}"
                    break
            else:
                metadata['waktu_terbit'] = date_text
        else:
            metadata['waktu_terbit'] = "Waktu tidak ditemukan"
        editor_div = main_content.find('div', class_=lambda x: x == '' or x is None)
        if editor_div:
            editor_text = clean_text(editor_div.get_text())
            editor_patterns = [
                r'Red\s*:\s*([^<]+)',
                r'Editor\s*:\s*([^<]+)',
                r'Reporter\s*:\s*([^<]+)'
            ]
            for pattern in editor_patterns:
                match = re.search(pattern, editor_text)
                if match:
                    metadata['editor'] = clean_text(match.group(1))
                    break
            if not metadata['editor']:
                editor_link = editor_div.find('a')
                if editor_link:
                    metadata['editor'] = clean_text(editor_link.get_text())
        if not metadata['editor']:
            all_text = main_content.get_text()
            editor_match = re.search(r'Red\s*:\s*([^\n<]+)', all_text)
            if editor_match:
                metadata['editor'] = clean_text(editor_match.group(1))
            else:
                metadata['editor'] = "Editor tidak ditemukan"
        article_content = main_content.find('div', class_='article-content')
        if article_content:
            konten_artikel = extract_text_from_element(article_content)
        else:
            fallback_selectors = [
                '.article-content',
                '.article-body',
                '.content',
                '.post-content',
                '[itemprop="articleBody"]',
                '.detail-text'
            ]
            konten_artikel = ""
            for selector in fallback_selectors:
                content_elem = main_content.select_one(selector)
                if content_elem:
                    konten_artikel = extract_text_from_element(content_elem)
                    break
            if not konten_artikel:
                konten_artikel = extract_text_from_element(main_content)
        metadata['konten'] = konten_artikel
        metadata['panjang_konten'] = len(konten_artikel)
        return metadata, None
    except Exception as e:
        return None, f"Error: {str(e)}"

# ==================== FUNGSI SCRAPING PENCARIAN ====================
def generate_search_id(keyword, startdate, enddate):
    """Generate unique search_id based on inputs (without page)"""
    input_str = f"{keyword}_{startdate}_{enddate}"
    return hashlib.md5(input_str.encode()).hexdigest()[:16]

def scrape_republika_search(keyword, startdate, enddate):
    """
    Scrape all pages from Republika.co.id search until no more results
    """
    all_results = []
    page = 1
    max_pages = 50  # Safety limit to prevent infinite loop
    status_msgs = []

    while page <= max_pages:
        try:
            q = quote_plus(keyword)
            url = f"https://republika.co.id/search/v3/all/{page}/?q={q}&latest_date=custom&startdate={startdate}&enddate={enddate}"

            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            }

            print(f"üîç Scraping page {page}: {url}")
            response = requests.get(url, headers=headers, timeout=15)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, 'html.parser')

            selector = "#search > div.main-wrapper > main > div.main-content > div.container > div.results-section"
            results_section = soup.select_one(selector)

            if not results_section:
                fallback_selectors = [
                    'div.results-section',
                    '.results-section',
                    'main div.container div[class*="result"]',
                    '.search-results'
                ]
                for sel in fallback_selectors:
                    results_section = soup.select_one(sel)
                    if results_section:
                        print(f"‚úÖ Found results with fallback: {sel}")
                        break

            if not results_section:
                status_msgs.append(f"‚ùå Results section not found on page {page}. Stopping.")
                break

            items = []
            item_selectors = [
                'div[class*="card"] a',
                'article a',
                '.search-item',
                '.result-item',
                'div.max-card'
            ]

            for sel in item_selectors:
                items = results_section.select(sel)
                if items:
                    print(f"‚úÖ Found {len(items)} items on page {page} with selector: {sel}")
                    break
            else:
                items = results_section.find_all('a', href=re.compile(r'/berita/|/reads/'))

            if not items:
                status_msgs.append(f"‚úÖ No more results on page {page}. Stopping.")
                break

            page_results = []
            for item in items:
                title_elem = item.find(['h1', 'h2', 'h3', 'h4']) or item
                title = title_elem.get_text(strip=True)
                if not title or len(title) < 10:
                    continue

                date_elem = item.find(class_=re.compile(r'date|time')) or item.find('span')
                date_text = date_elem.get_text(strip=True) if date_elem else ""
                date_match = re.search(r'(\d{1,2}\s+\w+\s+\d{4},\s+\d{1,2}:\d{2})', date_text)
                date = date_match.group(1) if date_match else "Date not found"

                href = item.get('href', '')
                if href.startswith('/'):
                    full_url = urljoin("https://republika.co.id", href)
                else:
                    full_url = href

                page_results.append({
                    'title': title[:200],
                    'date': date,
                    'url': full_url
                })

            all_results.extend(page_results)
            status_msgs.append(f"‚úÖ Found {len(page_results)} results on page {page}")

            # Check for next page (look for pagination)
            next_page = soup.find('a', class_='next') or soup.find('a', text=re.compile(r'Next|Selanjutnya'))
            if not next_page:
                status_msgs.append("‚úÖ No next page found. Stopping.")
                break

            page += 1
            time.sleep(2)  # Delay to avoid rate limiting

        except Exception as e:
            status_msgs.append(f"‚ùå Error on page {page}: {str(e)}. Stopping.")
            break

    return all_results, "\n".join(status_msgs)

# ==================== PROSES UTAMA ====================
def process_republika_search(keyword, startdate_str, enddate_str):
    if not keyword.strip():
        return "‚ùå Masukkan keyword pencarian!", pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    startdate = startdate_str or '2025-10-01'
    enddate = enddate_str or '2025-10-31'

    results_list, status = scrape_republika_search(keyword, startdate, enddate)

    if not results_list:
        return status, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

    search_id = generate_search_id(keyword, startdate, enddate)
    timestamp_search = datetime.now().isoformat()
    num_results = len(results_list)
    results_json = json.dumps(results_list, ensure_ascii=False)

    df_keyword_search = pd.DataFrame([{
        'search_id': search_id,
        'keyword': keyword,
        'source_type': 'Republika Search',
        'num_results': num_results,
        'results': results_json,
        'timestamp_search': timestamp_search
    }])

    df_results = pd.DataFrame(results_list)

    # Scraping metadata artikel untuk setiap URL
    articles_metadata = []
    for result in results_list:
        url = result['url']
        metadata, error = extract_republika_article(url)
        if metadata:
            metadata['search_id'] = search_id
            metadata['article_id'] = hashlib.md5(url.encode()).hexdigest()[:16]
            metadata['timestamp_ekstraksi'] = datetime.now().isoformat()
            articles_metadata.append(metadata)
            time.sleep(2)  # Delay
        else:
            print(f"‚ö†Ô∏è Failed to scrape {url}: {error}")

    df_metadata = pd.DataFrame(articles_metadata)

    # Simpan ke CSV
    csv_dir = "scraping_results"
    os.makedirs(csv_dir, exist_ok=True)

    keyword_csv_path = os.path.join(csv_dir, f"keyword_search_{search_id}.csv")
    results_csv_path = os.path.join(csv_dir, f"search_results_{search_id}.csv")
    metadata_csv_path = os.path.join(csv_dir, f"article_metadata_{search_id}.csv")

    df_keyword_search.to_csv(keyword_csv_path, index=False)
    df_results.to_csv(results_csv_path, index=False)
    df_metadata.to_csv(metadata_csv_path, index=False)

    output_msg = f"""üîç **HASIL PENCARIAN REPUBLIKA.CO.ID**
üìù **Keyword:** {keyword}
üìÖ **Periode:** {startdate} s.d. {enddate}
{status}

**üíæ Search ID:** `{search_id}`
**üìö Total Hasil:** {num_results}
**üìë Metadata Artikel Diekstrak:** {len(articles_metadata)}

**üì§ File CSV Tersimpan:**
- {keyword_csv_path}
- {results_csv_path}
- {metadata_csv_path}

**üì§ Export CSV di Interface:**
- Klik tombol untuk download"""

    return output_msg, df_results, df_keyword_search, df_metadata

# ==================== GRADIO INTERFACE ====================
def create_search_interface():
    with gr.Blocks(
        title="üîç Republika.co.id Search Scraper - KeywordSearchResult",
        theme=gr.themes.Soft(),
        css="""
        .search-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); }
        """
    ) as demo:
        gr.Markdown("""
        # üîç **Republika.co.id Search Scraper**
        **Ekstrak hasil pencarian berdasarkan keyword ke dalam skema KeywordSearchResult & ArticleMetadata**

        **Fitur:**
        - ‚úÖ Scraping semua halaman otomatis
        - ‚úÖ Ekstraksi otomatis title, date, URL
        - ‚úÖ Scraping metadata artikel untuk setiap hasil
        - ‚úÖ Generate Search ID unik
        - ‚úÖ Simpan ke CSV otomatis
        """)

        with gr.Row():
            with gr.Column(scale=1):
                keyword_input = gr.Textbox(
                    label="üîë Keyword Pencarian",
                    placeholder="e.g., MBG",
                    value="MBG"
                )
                startdate_input = gr.Textbox(
                    label="üìÖ Tanggal Mulai (YYYY-MM-DD)",
                    placeholder="2025-10-01",
                    value="2025-10-01"
                )
                enddate_input = gr.Textbox(
                    label="üìÖ Tanggal Selesai (YYYY-MM-DD)",
                    placeholder="2025-10-31",
                    value="2025-10-31"
                )
                search_btn = gr.Button("üöÄ Cari & Scrap Semua Halaman", variant="primary", size="lg")

        with gr.Row():
            output_msg = gr.Markdown(label="üìä Status & Info")

        with gr.Row():
            with gr.Column(scale=2):
                gr.Markdown("### üìã Tabel Hasil Pencarian")
                results_table = gr.Dataframe(
                    label="Results",
                    headers=["title", "date", "url"],
                    wrap=True,
                    interactive=False
                )
            with gr.Column(scale=1):
                gr.Markdown("### üíæ Skema KeywordSearchResult")
                schema_table = gr.Dataframe(
                    label="Keyword Search Data",
                    interactive=False
                )

        with gr.Row():
            gr.Markdown("### üìö Tabel Metadata Artikel")
            metadata_table = gr.Dataframe(
                label="Article Metadata",
                wrap=True,
                interactive=False
            )

        # Export buttons
        with gr.Row():
            export_search = gr.File(
                label="üíæ Download keyword_search.csv",
                visible=False
            )
            export_results = gr.File(
                label="üíæ Download search_results.csv",
                visible=False
            )
            export_metadata = gr.File(
                label="üíæ Download article_metadata.csv",
                visible=False
            )

        # Events
        def parse_dates(startdate_str, enddate_str):
            try:
                if startdate_str:
                    datetime.strptime(startdate_str, '%Y-%m-%d')
                if enddate_str:
                    datetime.strptime(enddate_str, '%Y-%m-%d')
                return startdate_str, enddate_str
            except ValueError:
                raise gr.Error("Format tanggal harus YYYY-MM-DD!")

        def export_csv(df_keyword, df_results, df_metadata):
            search_csv = df_keyword.to_csv(index=False).encode()
            results_csv = df_results.to_csv(index=False).encode()
            metadata_csv = df_metadata.to_csv(index=False).encode()
            return search_csv, results_csv, metadata_csv

        search_btn.click(
            fn=lambda k, s, e: process_republika_search(
                k,
                parse_dates(s, e)[0],
                parse_dates(s, e)[1]
            ),
            inputs=[keyword_input, startdate_input, enddate_input],
            outputs=[output_msg, results_table, schema_table, metadata_table]
        ).then(
            fn=export_csv,
            inputs=[schema_table, results_table, metadata_table],
            outputs=[export_search, export_results, export_metadata]
        )

        gr.Examples(
            examples=[
                ["MBG", "2025-10-01", "2025-10-31"],
                ["Prabowo", "2025-09-01", "2025-09-30"],
            ],
            inputs=[keyword_input, startdate_input, enddate_input]
        )

    return demo

if __name__ == "__main__":
    print("üöÄ Starting Republika Search Scraper...")
    demo = create_search_interface()
    demo.launch()

üöÄ Starting Republika Search Scraper...


  with gr.Blocks(
  with gr.Blocks(


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://585a4ca0815013465c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
