In [8]:
from typing import List

from google.api_core.client_options import ClientOptions
from google.cloud import discoveryengine_v1 as discoveryengine

# TODO(developer): Uncomment these variables before running the sample.
project_id = "pedulipasal"
location = "global"          # Values: "global", "us", "eu"
engine_id = "agentpedulipasal_1733849041444"
search_query = "narkotika"

In [None]:
def search_sample(
    project_id: str,
    location: str,
    engine_id: str,
    search_query: str,
) -> List[discoveryengine.SearchResponse]:
    #  For more information, refer to:
    # https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
    client_options = (
        ClientOptions(api_endpoint=f"{location}-discoveryengine.googleapis.com")
        if location != "global"
        else None
    )

    # Create a client
    client = discoveryengine.SearchServiceClient(client_options=client_options)

    # The full resource name of the search app serving config
    serving_config = f"projects/{project_id}/locations/{location}/collections/default_collection/engines/{engine_id}/servingConfigs/default_config"

    # Optional - only supported for unstructured data: Configuration options for search.
    # Refer to the `ContentSearchSpec` reference for all supported fields:
    # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1.types.SearchRequest.ContentSearchSpec
    content_search_spec = discoveryengine.SearchRequest.ContentSearchSpec(
        # For information about snippets, refer to:
        # https://cloud.google.com/generative-ai-app-builder/docs/snippets
        snippet_spec=discoveryengine.SearchRequest.ContentSearchSpec.SnippetSpec(
            return_snippet=True
        ),
        # For information about search summaries, refer to:
        # https://cloud.google.com/generative-ai-app-builder/docs/get-search-summaries
        summary_spec=discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec(
            summary_result_count=5,
            include_citations=True,
            ignore_adversarial_query=True,
            ignore_non_summary_seeking_query=True,
            model_prompt_spec=discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec.ModelPromptSpec(
                preamble="\"Jawablah pertanyaan berikut: '{input}'. Jawab dengan to the point, tidak memberikan catatan, tambahan, atau saran lain. Hanya jawab pertanyaan yang diminta.\"\nSub-klasifikasi: pemalsuan, pencurian, kejahatan-terhadap-keamanan-negara, penghinaan, penadahan, penipuan, tidak-diketahui, kejahatan-terhadap-ketertiban-umum, penggelapan, penganiayaan, lalu-lintas, perjudian, perusakan, kejahatan-terhadap-kesusilaan, pembunuhan, kealfaan-mengakibatkan-kematian-luka, pemerasan-dan-pengancaman, kejahatan-terhadap-asal-usul-perkawinan, senjata-api, mata-uang, sumpah-palsu-dan-keterangan-palsu, kejahatan-terhadap-kemerdekaan-orang-lain, kehutanan, pra-peradilan.\n\nTolong klasifikasikan deskripsi kasus berikut: '{input}' berdasarkan sub-klasifikasi di atas. Output dapat berupa satu atau lebih sub-klasifikasi saja, tanpa tambahan kata atau kalimat lain.\n\nBerikan pengantar bahwa kasus tersebut termasuk sub-klasifikasi dari {sub_klassifikasi}.\nTampilkan juga daftar pasal-pasal pidana Indonesia terbaru yang terkait dengan sub-klasifikasi tersebut, beserta ancaman pidana berupa tahun penjara dan denda. Jangan memberikan catatan, tambahan, atau paragraf lain. Format pasal sebagai berikut:\n\nPasal X: [Deskripsi singkat], Ancaman: [Tahun Penjara], Denda: [Jumlah Denda].\nPasal Y: [Deskripsi singkat], Ancaman: [Tahun Penjara], Denda: [Jumlah Denda]."
            ),
            model_spec=discoveryengine.SearchRequest.ContentSearchSpec.SummarySpec.ModelSpec(
                version="stable",
            ),
        ),
    )

    # Refer to the `SearchRequest` reference for all supported fields:
    # https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1.types.SearchRequest
    request = discoveryengine.SearchRequest(
        serving_config=serving_config,
        query=search_query,
        page_size=10,
        content_search_spec=content_search_spec,
        query_expansion_spec=discoveryengine.SearchRequest.QueryExpansionSpec(
            condition=discoveryengine.SearchRequest.QueryExpansionSpec.Condition.AUTO,
        ),
        spell_correction_spec=discoveryengine.SearchRequest.SpellCorrectionSpec(
            mode=discoveryengine.SearchRequest.SpellCorrectionSpec.Mode.AUTO
        ),
    )

    response = client.search(request)

    return response

In [63]:
sample = search_sample(project_id, location, engine_id, search_query)

In [163]:
print(type(sample))

print(len(sample.results))

print(sample)

<class 'google.cloud.discoveryengine_v1.services.search_service.pagers.SearchPager'>
5
SearchPager<results {
  id: "24b56fcac305817be54f2384f26da165"
  document {
    name: "projects/808921460677/locations/global/collections/default_collection/dataStores/datasetstore_1733850697778/branches/0/documents/24b56fcac305817be54f2384f26da165"
    id: "24b56fcac305817be54f2384f26da165"
    derived_struct_data {
      fields {
        key: "title"
        value {
          string_value: "UU35-2009Narkotika"
        }
      }
      fields {
        key: "snippets"
        value {
          list_value {
            values {
              struct_value {
                fields {
                  key: "snippet"
                  value {
                    string_value: "<b>Narkotika</b> adalah zat atau obat yang berasal dari tanaman atau bukan tanaman, baik sintetis maupun semisintetis, yang dapat menyebabkan penurunan atau perubahan&nbsp;..."
                  }
                }
                f

In [182]:
dictionaries_pasal = []

for result in sample:
    document = result.document
    derived_data = document.derived_struct_data

    for key, value in derived_data.items():
        if key == 'snippets':
            for key, value in value[0].items():
                if value != "NO_SNIPPET_AVAILABLE" and value != "SUCCESS":
                    dictionaries_pasal.append({
                        'value': value
                    })
                    # print(f"snippets Key: {key}, value: {value}")

print(len(dictionaries_pasal))

5


In [None]:
from google.protobuf.json_format import MessageToDict  # Import alat konversi Protobuf ke dict



# Fungsi untuk mengonversi SearchPager ke dictionary
def search_pager_to_dict(search_pager):
    result_list = []
    for result in search_pager:
        document = result.document
        derived_data = MessageToDict(document.derived_struct_data)  # Konversi ke dict
        
        # Ekstrak data dari fields
        title = derived_data.get('fields', {}).get('title', {}).get('stringValue', 'No title available')
        link = derived_data.get('fields', {}).get('link', {}).get('stringValue', 'No link available')
        
        # Ekstrak snippet
        snippets = derived_data.get('fields', {}).get('snippets', {}).get('listValue', {}).get('values', [])
        snippet = (
            snippets[0].get('structValue', {}).get('fields', {}).get('snippet', {}).get('stringValue', 'No snippet available')
            if snippets else 'No snippet available'
        )
        
        # Tambahkan ke hasil sebagai dictionary
        result_list.append({
            "id": document.id,
            "title": title,
            "snippet": snippet,
            "link": link,
        })
    
    return result_list

# Contoh penggunaan
response_dict = search_pager_to_dict(sample)
print(response_dict)


AttributeError: Unknown field for SearchResponse: DESCRIPTOR