In [None]:
import os
from dotenv import load_dotenv, find_dotenv
import pandas as pd
from langchain_openai import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings

# Impor yang diperlukan untuk Google Maps API
import requests
import urllib.parse

In [None]:
# 0. Configuration
# -----------------------------------------------------------------------------
_ = load_dotenv(find_dotenv()) # baca file .env lokal
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
Maps_API_KEY = os.environ["Maps_API_KEY"]

llm = ChatOpenAI(
    temperature=0,
    model="gpt-4o-mini",
    openai_api_key=OPENAI_API_KEY
)
print("Model ChatOpenAI (LLM) berhasil dimuat.")

# Inisialisasi embedder
embedder = OpenAIEmbeddings(model="text-embedding-3-small", api_key=OPENAI_API_KEY)
print("Model OpenAIEmbeddings berhasil dimuat.")

Model ChatOpenAI (LLM) berhasil dimuat.


  embedder = OpenAIEmbeddings(model="text-embedding-3-small", api_key=OPENAI_API_KEY)


Model OpenAIEmbeddings berhasil dimuat.


In [None]:
# 1. Define Google Maps and LLM Tools
# -----------------------------------------------------------------------------
# 1) Mendefinisikan Tool Google Maps
def Maps_search(query: str) -> str:
    """
    Menjalankan pencarian pada Google Maps Places API dan mengembalikan
    hasil yang diformat dengan baik.
    """
    api_key = Maps_API_KEY
    base_url = "https://maps.googleapis.com/maps/api/place/textsearch/json?"

    full_url = f"{base_url}query={urllib.parse.quote(query)}&key={api_key}"

    try:
        response = requests.get(full_url)
        response.raise_for_status()
        data = response.json()

        results = data.get("results", [])
        if not results:
            return "Tidak ada hasil yang ditemukan."

        # Ambil hingga 3 hasil untuk memberikan konteks yang lebih baik ke LLM
        formatted_results = []
        for place in results[:3]:
            name = place.get('name')
            address = place.get('formatted_address')
            formatted_results.append(f"Nama: {name}, Alamat: {address}")

        return "\n".join(formatted_results)

    except requests.exceptions.RequestException as e:
        return f"Error saat memanggil Google Maps API: {e}"
    except Exception as e:
        return f"Terjadi kesalahan tak terduga: {e}"

# 2) Mendefinisikan Tool LLM
def get_similar_restaurants(restaurant_name: str, location: str) -> list[str]:
    """
    Mencari restoran serupa menggunakan Google Maps, kemudian meminta LLM
    untuk mengekstrak nama dari hasil pencarian tersebut.
    """
    # Langkah 1: Memanggil Google Maps tool secara langsung untuk mendapatkan data.
    print(f"üîç Mencari data di Google Maps untuk: 'restoran mirip {restaurant_name} di {location}'")
    search_query = f"restoran dengan nama mirip {restaurant_name} di {location}"
    maps_data = Maps_search(search_query)
    print(f"üìù Data dari Maps:\n---\n{maps_data}\n---")

    if "Tidak ada hasil" in maps_data or "Error" in maps_data:
        return [maps_data]

    # Langkah 2: Membuat prompt untuk LLM dengan menyertakan data dari Maps.
    prompt_template = f"""
    Berdasarkan data pencarian Google Maps berikut:
    \"\"\"
    {maps_data}
    \"\"\"

    Sebutkan HANYA NAMA dari restoran yang paling mirip dengan nama '{restaurant_name}'.
    Tuliskan SATU NAMA restoran saja. Jangan sertakan alamat atau kata-kata tambahan.
    """

    # Langkah 3: Memanggil LLM dengan prompt yang sudah berisi data.
    print("\nüß† Meminta LLM untuk menganalisis data...")
    response = llm.invoke(prompt_template)
    final_result = response.content

    print(f"‚úÖ Jawaban dari LLM: {final_result}")

    # Langkah 4: Proses output dari LLM
    return [line.strip() for line in final_result.splitlines() if line.strip()]

In [4]:
# 4. Membaca file menu.csv
# -----------------------------------------------------------------------------
df_m = pd.read_csv('menu.csv')
df_m = df_m.iloc[:, [0, 1, 3, 4]]
df_m.columns.values[1] = "menu"
df_m

Unnamed: 0,id,menu,restaurantId,halalCertificateId
0,clwy72wl90003tb6ldyfn6flh,Biji Kopi,clwy72weo0000tb6l95t6xe6m,clwy72wep0001tb6l1jws0ic7
1,clwy72wmi0005tb6loalt6v9c,Bubuk Kopi Kofind,clwy72weo0000tb6l95t6xe6m,clwy72wep0001tb6l1jws0ic7
2,clwy72wnl0007tb6l2ebr1f8r,Kopi Susu Kofind,clwy72weo0000tb6l95t6xe6m,clwy72wep0001tb6l1jws0ic7
3,clwy72wta000dtb6lakswvmbi,Es cao,clwy72wo80008tb6lkg91djpf,clwy72wo80009tb6l7vpgfii3
4,clwy72wtx000ftb6l6ikider9,Kroket Kentang Goreng,clwy72wo80008tb6lkg91djpf,clwy72wo8000atb6lp1ww7g7o
...,...,...,...,...
11662,cm3cmfcbl0041ect7xk408yyb,JUS MANGGA,cm3cmexqt001rect7ec01uco5,cm3cmexvz001vect7cvqqrpf4
11663,cm3cmfcdf0043ect7fzhfn84c,MACCA,cm3cmexqt001rect7ec01uco5,cm3cmexvz001vect7cvqqrpf4
11664,cm3cmfcfa0045ect7e5wthhac,MILK TEA,cm3cmexqt001rect7ec01uco5,cm3cmexvz001vect7cvqqrpf4
11665,cm3cmfch70047ect7bqhbbk6d,TARO,cm3cmexqt001rect7ec01uco5,cm3cmexvz001vect7cvqqrpf4


In [5]:
# 5. Membaca file restaurant.csv
# -----------------------------------------------------------------------------
df_r = pd.read_csv('restaurant.csv')
df_r = df_r.iloc[:, [0, 1, 2, 24]]
df_r.columns.values[0] = "restaurantId"
df_r.columns.values[1] = "restaurant"
df_r

Unnamed: 0,restaurantId,restaurant,address,city
0,clwy72y970027tb6l8zm62xlf,Tue-Kue Jajan,"Pasar Sawojajar Bedak C-3, Jl. Danau Bratan Ti...",Malang
1,clwy72yxr0035tb6lo0y0ujsn,Ratna Catering,"Jl. Bendul Merisi Selatan I No.109, Bendul Mer...",Surabaya
2,clwy7316p0065tb6lxd8qado4,Cemal-Cemil,"Jl. Juwingan No.136, Kertajaya, Kec. Gubeng, S...",Surabaya
3,clwy733ro009ltb6lst1pp7u9,Dapoer Mekar,"Jl. Simpang Sulfat Utara No.9, Pandanwangi, Ke...",Malang
4,clwy73dps00n4tb6lvn88szcw,Dapur Mak Brintik,"Jl. MT. Haryono No.193, Dinoyo, Kec. Lowokwaru...",Malang
...,...,...,...,...
2945,cm2v1gkoi0050e2ow2mvq53ys,Purbarasa,"Nglanggeran Wetan, RT 16 RW 04, Kel. Nglangger...",Kab. Gunungkidul
2946,cm2v1gl6e0055e2owxv5hd7qp,BIOTA LAUT,"Jl. Berangas KM. 19 RT 001 RW 001, Desa Teluk ...",Kab. Kota Baru
2947,cm33qx6qf003dmcyrubwqbhly,Boemboe Mandeh,"Jalan Datuak Bandaro Kuniang, Perumahan Kharis...",Kabupaten Tanah Datar
2948,cm33qx8v90051mcyrwerr1ym1,Pusako Bundo,Jl Ade Irma Suryani no 20 A Payakumbuh Utara,Kota Payakumbuh


In [6]:
# 6. Menghitung nilai tidak null pada kolom 'city'
# -----------------------------------------------------------------------------
df_r['city'].notnull().sum()

2756

In [7]:
# 7. Split kolom 'address' pada tabel df_r
# -----------------------------------------------------------------------------
df_r['address'] = df_r['address'].str.split(", ")
df_r

Unnamed: 0,restaurantId,restaurant,address,city
0,clwy72y970027tb6l8zm62xlf,Tue-Kue Jajan,"[Pasar Sawojajar Bedak C-3, Jl. Danau Bratan T...",Malang
1,clwy72yxr0035tb6lo0y0ujsn,Ratna Catering,"[Jl. Bendul Merisi Selatan I No.109, Bendul Me...",Surabaya
2,clwy7316p0065tb6lxd8qado4,Cemal-Cemil,"[Jl. Juwingan No.136, Kertajaya, Kec. Gubeng, ...",Surabaya
3,clwy733ro009ltb6lst1pp7u9,Dapoer Mekar,"[Jl. Simpang Sulfat Utara No.9, Pandanwangi, K...",Malang
4,clwy73dps00n4tb6lvn88szcw,Dapur Mak Brintik,"[Jl. MT. Haryono No.193, Dinoyo, Kec. Lowokwar...",Malang
...,...,...,...,...
2945,cm2v1gkoi0050e2ow2mvq53ys,Purbarasa,"[Nglanggeran Wetan, RT 16 RW 04, Kel. Nglangge...",Kab. Gunungkidul
2946,cm2v1gl6e0055e2owxv5hd7qp,BIOTA LAUT,"[Jl. Berangas KM. 19 RT 001 RW 001, Desa Teluk...",Kab. Kota Baru
2947,cm33qx6qf003dmcyrubwqbhly,Boemboe Mandeh,"[Jalan Datuak Bandaro Kuniang, Perumahan Khari...",Kabupaten Tanah Datar
2948,cm33qx8v90051mcyrwerr1ym1,Pusako Bundo,[Jl Ade Irma Suryani no 20 A Payakumbuh Utara],Kota Payakumbuh


In [8]:
# 8. Mengisi nilai null pada kolom 'city'
# -----------------------------------------------------------------------------
mask = df_r['city'].isna()
df_r.loc[mask, 'city'] = (
    df_r.loc[mask, 'address']
        .apply(lambda addr: addr[-3] if len(addr) > 2 else addr[0])
)
df_r

Unnamed: 0,restaurantId,restaurant,address,city
0,clwy72y970027tb6l8zm62xlf,Tue-Kue Jajan,"[Pasar Sawojajar Bedak C-3, Jl. Danau Bratan T...",Malang
1,clwy72yxr0035tb6lo0y0ujsn,Ratna Catering,"[Jl. Bendul Merisi Selatan I No.109, Bendul Me...",Surabaya
2,clwy7316p0065tb6lxd8qado4,Cemal-Cemil,"[Jl. Juwingan No.136, Kertajaya, Kec. Gubeng, ...",Surabaya
3,clwy733ro009ltb6lst1pp7u9,Dapoer Mekar,"[Jl. Simpang Sulfat Utara No.9, Pandanwangi, K...",Malang
4,clwy73dps00n4tb6lvn88szcw,Dapur Mak Brintik,"[Jl. MT. Haryono No.193, Dinoyo, Kec. Lowokwar...",Malang
...,...,...,...,...
2945,cm2v1gkoi0050e2ow2mvq53ys,Purbarasa,"[Nglanggeran Wetan, RT 16 RW 04, Kel. Nglangge...",Kab. Gunungkidul
2946,cm2v1gl6e0055e2owxv5hd7qp,BIOTA LAUT,"[Jl. Berangas KM. 19 RT 001 RW 001, Desa Teluk...",Kab. Kota Baru
2947,cm33qx6qf003dmcyrubwqbhly,Boemboe Mandeh,"[Jalan Datuak Bandaro Kuniang, Perumahan Khari...",Kabupaten Tanah Datar
2948,cm33qx8v90051mcyrwerr1ym1,Pusako Bundo,[Jl Ade Irma Suryani no 20 A Payakumbuh Utara],Kota Payakumbuh


In [9]:
# 9. Menghitung nilai tidak null pada kolom 'city'
# -----------------------------------------------------------------------------
df_r['city'].notnull().sum()

2950

In [10]:
# 10. Join tabel df_m dan df_r
# -----------------------------------------------------------------------------
wave = pd.merge(df_m, df_r, on='restaurantId')
wave = wave.drop(['id', 'restaurantId', 'halalCertificateId'], axis=1)
wave

Unnamed: 0,menu,restaurant,address,city
0,Biji Kopi,KOFIND,"[Jl. Kebonsari Tengah No.103, Kebonsari, Kec. ...",Surabaya
1,Bubuk Kopi Kofind,KOFIND,"[Jl. Kebonsari Tengah No.103, Kebonsari, Kec. ...",Surabaya
2,Kopi Susu Kofind,KOFIND,"[Jl. Kebonsari Tengah No.103, Kebonsari, Kec. ...",Surabaya
3,Es cao,Dapur F2,"[Jl. Kejawan Gebang No.7, Gebang Putih, Kec. S...",Surabaya
4,Kroket Kentang Goreng,Dapur F2,"[Jl. Kejawan Gebang No.7, Gebang Putih, Kec. S...",Surabaya
...,...,...,...,...
11662,JUS MANGGA,Putri Jus,"[KARAH V-D/87-B3, RT/RW -, KARAH, JAMBANGAN]",KOTA SURABAYA
11663,MACCA,Putri Jus,"[KARAH V-D/87-B3, RT/RW -, KARAH, JAMBANGAN]",KOTA SURABAYA
11664,MILK TEA,Putri Jus,"[KARAH V-D/87-B3, RT/RW -, KARAH, JAMBANGAN]",KOTA SURABAYA
11665,TARO,Putri Jus,"[KARAH V-D/87-B3, RT/RW -, KARAH, JAMBANGAN]",KOTA SURABAYA


In [11]:
# 11. Mengelompokkan menu berdasarkan restoran
# -----------------------------------------------------------------------------
wave_group = wave.groupby(["restaurant"])["menu"].apply(list).reset_index()
wave_group

Unnamed: 0,restaurant,menu
0,BP Kerupuk Sayur,"[Kerupuk Labu BP Kerupuk Sayur, Kerupuk Wortel..."
1,Bumbu Rawon,"[Bumbu Rawon UKG (Usaha Kerta Gemilang), Bumbu..."
2,JAMU BU SOLIKAH,"[JAMU KUNIR SURUH, JAMU KUNIR, JAMU BERAS KENC..."
3,RANIS KDS,[KRUPUK BAWANG RANIS KDS ]
4,Winarti Snack,[Kacang Goreng Winarti Snack]
...,...,...
2827,virinda food,"[Rolade, Sempol]"
2828,wouwcake,"[risoles wouwcake, kue tar wouwcake, kroket ke..."
2829,yuk tri,"[kue nastar, kue kastengel, kue putri salju]"
2830,zara,[stik keju zara]


In [12]:
# 12. Join tabel wave_group dan df_r
# -----------------------------------------------------------------------------
wave_group = pd.merge(wave_group, df_r, on='restaurant', how='left')
wave_group = wave_group.drop(['restaurantId', 'address'], axis=1)
wave_group

Unnamed: 0,restaurant,menu,city
0,BP Kerupuk Sayur,"[Kerupuk Labu BP Kerupuk Sayur, Kerupuk Wortel...",Surabaya
1,Bumbu Rawon,"[Bumbu Rawon UKG (Usaha Kerta Gemilang), Bumbu...",Kabupaten Gresik
2,JAMU BU SOLIKAH,"[JAMU KUNIR SURUH, JAMU KUNIR, JAMU BERAS KENC...",Malang
3,RANIS KDS,[KRUPUK BAWANG RANIS KDS ],Malang
4,Winarti Snack,[Kacang Goreng Winarti Snack],Surabaya
...,...,...,...
2914,virinda food,"[Rolade, Sempol]",Malang
2915,wouwcake,"[risoles wouwcake, kue tar wouwcake, kroket ke...",Surabaya
2916,yuk tri,"[kue nastar, kue kastengel, kue putri salju]",Malang
2917,zara,[stik keju zara],Malang


In [13]:
# 13. Drop data restoran duplikat pada tabel wave_group
# -----------------------------------------------------------------------------
# Sort the dataframe to prioritize rows with non-null city values
wave_group_sorted = wave_group.sort_values(by='city', ascending=False)

# Drop duplicate rows based on the 'restaurant' column, keeping the first occurrence
wave_group = wave_group_sorted.drop_duplicates(subset=['restaurant'], keep='first').sort_index().reset_index()

wave_group = wave_group.drop(['index'], axis=1)

wave_group

Unnamed: 0,restaurant,menu,city
0,BP Kerupuk Sayur,"[Kerupuk Labu BP Kerupuk Sayur, Kerupuk Wortel...",Surabaya
1,Bumbu Rawon,"[Bumbu Rawon UKG (Usaha Kerta Gemilang), Bumbu...",Kabupaten Gresik
2,JAMU BU SOLIKAH,"[JAMU KUNIR SURUH, JAMU KUNIR, JAMU BERAS KENC...",Malang
3,RANIS KDS,[KRUPUK BAWANG RANIS KDS ],Malang
4,Winarti Snack,[Kacang Goreng Winarti Snack],Surabaya
...,...,...,...
2827,virinda food,"[Rolade, Sempol]",Malang
2828,wouwcake,"[risoles wouwcake, kue tar wouwcake, kroket ke...",Surabaya
2829,yuk tri,"[kue nastar, kue kastengel, kue putri salju]",Malang
2830,zara,[stik keju zara],Malang


In [14]:
hasil = []
for idx, row in wave_group.iterrows():
    alts = get_similar_restaurants(row["restaurant"], row["city"])
    emb = embedder.embed_documents(alts)

    hasil.append({
        "restaurant": row["restaurant"],
        "menu": row["menu"],
        "city": row["city"],
        "alt_resto": alts,
        "embedding": emb
    })

    print(f"Jumlah embedding saat ini: {len(hasil)}")

df_hasil = pd.DataFrame(hasil)
df_hasil

üîç Mencari data di Google Maps untuk: 'restoran mirip  BP Kerupuk Sayur di Surabaya'
üìù Data dari Maps:
---
Nama: Dapur Fizzul (Surabaya) - Rumah Makan Khas Maluku No.1 di Dunia, Alamat: depan Politeknik Kesehatan Kemeskes Surabaya Samping Hotel Kita, Jl. Karang Menjangan No.15, Mojo, Kec. Gubeng, Surabaya, Jawa Timur 60285, Indonesia
Nama: Warung Kuning, Alamat: Jl. Kalibokor Selatan, Baratajaya, Kec. Gubeng, Surabaya, Jawa Timur 60284, Indonesia
Nama: Arunaya Resto, Alamat: Jl. Bengawan No.5-7, Darmo, Kec. Wonokromo, Surabaya, Jawa Timur 60241, Indonesia
---

üß† Meminta LLM untuk menganalisis data...
‚úÖ Jawaban dari LLM: Dapur Fizzul
Jumlah embedding saat ini: 1
üîç Mencari data di Google Maps untuk: 'restoran mirip  Bumbu Rawon di Kabupaten Gresik'
üìù Data dari Maps:
---
Nama: Rawon Balungan H. Mufid, Alamat: GKB, Jl. Kalimantan No.77-79 Kav.5, Wonorejo, Yosowilangun, Kec. Manyar, Kabupaten Gresik, Jawa Timur 61151, Indonesia
Nama: Rawon Tulang Pak Nawi, Alamat: Jl. Gubern

Unnamed: 0,restaurant,menu,city,alt_resto,embedding
0,BP Kerupuk Sayur,"[Kerupuk Labu BP Kerupuk Sayur, Kerupuk Wortel...",Surabaya,[Dapur Fizzul],"[[-0.019313940289107857, 0.026720912450162143,..."
1,Bumbu Rawon,"[Bumbu Rawon UKG (Usaha Kerta Gemilang), Bumbu...",Kabupaten Gresik,[Rawon Balungan H. Mufid],"[[-0.0012182795813163079, 0.038871749931996775..."
2,JAMU BU SOLIKAH,"[JAMU KUNIR SURUH, JAMU KUNIR, JAMU BERAS KENC...",Malang,[Sambat Luwe],"[[0.01761782175153306, -0.006151651518221604, ..."
3,RANIS KDS,[KRUPUK BAWANG RANIS KDS ],Malang,[KDS Cantonese Restaurant],"[[-0.004616941175124867, -0.033262963255641514..."
4,Winarti Snack,[Kacang Goreng Winarti Snack],Surabaya,[Nathania Snack House],"[[-0.023377033971692975, -0.013525635448501444..."
...,...,...,...,...,...
2827,virinda food,"[Rolade, Sempol]",Malang,[Javanine Resto],"[[-0.04503288453259202, -0.04508640205255267, ..."
2828,wouwcake,"[risoles wouwcake, kue tar wouwcake, kroket ke...",Surabaya,[Warung wong lue (WWL)],"[[-0.04631182359458489, -0.01782587469255922, ..."
2829,yuk tri,"[kue nastar, kue kastengel, kue putri salju]",Malang,[Trimurti Resto],"[[0.006737446523112387, 0.007663352942509597, ..."
2830,zara,[stik keju zara],Malang,[Lafayette Coffee & Eatery],"[[-0.05076225249692837, -0.05180014440515231, ..."


In [49]:
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


In [15]:
df_hasil.to_csv("wave_emb_small.csv")