In [1]:
!pip install langchain_community
!pip install replicate
!pip install langchain_experimental



In [46]:
import pandas as pd
import numpy as np
import os
import folium
from folium.plugins import MarkerCluster
from google.colab import userdata

from langchain_community.llms import Replicate
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain

from sklearn.preprocessing import MinMaxScaler

In [3]:
api_token = userdata.get("api_token")
os.environ["REPLICATE_API_TOKEN"] = api_token

llm = Replicate(
    model="ibm-granite/granite-3.3-8b-instruct",
)

In [14]:
DATA_PATH = "archivebalidataset.zip"
df = pd.read_csv(DATA_PATH)

df = df.dropna(subset=['kabupaten_kota', 'kategori', 'rating'])

# Preview
df.head()

Unnamed: 0,nama,kategori,kabupaten_kota,rating,preferensi,link_lokasi,latitude,longitude,link_gambar
0,Taman Mumbul Sangeh,Alam,Kabupaten Badung,4.6,Wisata Alam,https://www.google.com/maps/place/Taman+Mumbul...,-8.483959,115.212288,https://lh3.googleusercontent.com/p/AF1QipPniB...
1,Sangeh Monkey Forest,Alam,Kabupaten Badung,4.6,Wisata Alam,https://www.google.com/maps/place/Sangeh+Monke...,-8.481774,115.206739,https://lh3.googleusercontent.com/p/AF1QipN2_T...
2,Objek Wisata Sangeh,Umum,Kabupaten Badung,4.6,Wisata Umum,https://www.google.com/maps/place/Objek+Wisata...,-8.481566,115.206546,https://lh3.googleusercontent.com/p/AF1QipO1t7...
3,Satria Gatotkaca Park,Umum,Kabupaten Badung,4.6,Wisata Umum,https://www.google.com/maps/place/Satria+Gatot...,-8.744388,115.178867,https://lh3.googleusercontent.com/p/AF1QipP3kd...
4,Desa Wisata Penarungan,Umum,Kabupaten Badung,5.0,Wisata Umum,https://www.google.com/maps/place/Desa+Wisata+...,-8.539637,115.198893,https://lh3.googleusercontent.com/p/AF1QipO-JZ...


In [10]:
# Jumlah wisata per kabupaten
wisata_count = df.groupby('kabupaten_kota').size().reset_index(name='jumlah_wisata')

# Rata-rata rating per kabupaten
rating_avg = df.groupby('kabupaten_kota')['rating'].mean().reset_index(name='rata_rata_rating')

# Gabung
gap_df = pd.merge(wisata_count, rating_avg, on='kabupaten_kota')
gap_df.head()

Unnamed: 0,kabupaten_kota,jumlah_wisata,rata_rata_rating
0,Kabupaten Badung,68,4.56804
1,Kabupaten Bangli,45,4.586667
2,Kabupaten Buleleng,120,4.502945
3,Kabupaten Gianyar,119,4.518487
4,Kabupaten Jembrana,65,4.475385


In [54]:
# Variasi kategori
variasi_kategori = df.groupby('kabupaten_kota')['kategori'].nunique().reset_index(name='variasi_kategori')

# Gabungkan semua metrik
investment_df = pd.merge(gap_df, variasi_kategori, on='kabupaten_kota')

# Normalisasi
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
investment_df[['jumlah_norm', 'rating_norm', 'variasi_norm']] = scaler.fit_transform(
    investment_df[['jumlah_wisata', 'rata_rata_rating', 'variasi_kategori']]
)

# Skoring akhir
investment_df['skor_investasi'] = (
    0.4 * investment_df['rating_norm'] +
    0.3 * investment_df['jumlah_norm'] +
    0.3 * investment_df['variasi_norm']
)

# Urutkan skor tertinggi
investment_df = investment_df.sort_values(by='skor_investasi', ascending=False)
investment_df.head()

Unnamed: 0,kabupaten_kota,jumlah_wisata,rata_rata_rating,variasi_kategori,jumlah_norm,rating_norm,variasi_norm,skor_investasi
5,Kabupaten Karangasem,120,4.571501,4,1.0,0.873618,0.0,0.649447
7,Kabupaten Tabanan,119,4.53888,4,0.986667,0.601776,0.0,0.536711
3,Kabupaten Gianyar,119,4.518487,4,0.986667,0.431839,0.0,0.468736
0,Kabupaten Badung,68,4.56804,4,0.306667,0.844776,0.0,0.42991
2,Kabupaten Buleleng,120,4.502945,4,1.0,0.30232,0.0,0.420928


In [61]:
investment_summary_prompt = PromptTemplate(
    input_variables=["data"],
    template="""
    Berikut data analisis skor investasi wisata di berbagai kabupaten di Bali:

    {data}

    Berikan insight mendalam: kabupaten mana yang perlu prioritas pengembangan, mana yang sudah optimal, dan rekomendasi strategis berbasis data.
    """
)

investment_chain = LLMChain(llm=llm, prompt=investment_summary_prompt)
data_text = investment_df[['kabupaten_kota', 'jumlah_wisata', 'rata_rata_rating', 'variasi_kategori', 'skor_investasi']].to_string(index=False)

investment_insight = investment_chain.run(data=data_text)
print("\n\n🔍 INVESTMENT INSIGHT:\n")
print(investment_insight)



🔍 INVESTMENT INSIGHT:

Based on the provided data, here are the insights:

1. Prioritized Development Areas:
   - Kabupaten Karangasem: With the highest number of tourists (120) and a relatively high investment score (0.649447), this area might benefit from targeted development to maintain its popularity and capitalize on its current success.
   - Kabupaten Tabanan: Although it ranks second in tourist numbers, its investment score (0.536711) is lower than Karangasem. Improving infrastructure or services could enhance its competitiveness.
   - Kabupaten Gianyar: Similar to Tabanan, it has a high number of tourists but a relatively lower investment score (0.468736). It could also benefit from strategic development.

2. Optimal Areas:
   - Kabupaten Jembrana: Despite having a lower number of tourists (65) compared to others, it has the lowest investment score (0.109060), indicating it might already be optimally managed or less developed, focusing on quality over quantity.
   - Kabupaten

In [56]:
# FOLIUM VISUALIZATION

df['kabupaten_kota_clean'] = df['kabupaten_kota'].str.replace('Kabupaten ', '', regex=False).str.replace('Kota ', '', regex=False).str.strip()
investment_df['kabupaten_kota_clean'] = investment_df['kabupaten_kota'].str.replace('Kabupaten ', '', regex=False).str.replace('Kota ', '', regex=False).str.strip()

# Hitung koordinat rata-rata per kabupaten dari data wisata
koordinat_df = df.groupby('kabupaten_kota_clean')[['latitude', 'longitude']].mean().reset_index()

investment_df = investment_df.drop(columns=['latitude', 'longitude'], errors='ignore')

# Gabungkan koordinat ke investment_df
investment_df = pd.merge(investment_df, koordinat_df, on='kabupaten_kota_clean', how='left')

# Buat peta dasar
m = folium.Map(location=[-8.4, 115.1], zoom_start=9)

# Tambahkan lingkaran skor investasi
for _, row in investment_df.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=row['skor_investasi'] * 20,
        color='green',
        fill=True,
        fill_opacity=0.7,
        popup=(f"{row['kabupaten_kota']}<br>Skor Investasi: {row['skor_investasi']:.2f}")
    ).add_to(m)

# Wisata unggulan (> 4.5)
unggulan = df[df['rating'] > 4.5]
cluster_unggulan = MarkerCluster(name='Unggulan (Rating > 4.5)').add_to(m)

for _, row in unggulan.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"{row['nama']}<br>Rating: {row['rating']}<br>Kategori: {row['kategori']}",
        icon=folium.Icon(color='blue', icon='star')
    ).add_to(cluster_unggulan)

# Wisata cukup (4.0 - 4.5) — warna hijau
cukup = df[(df['rating'] >= 4.0) & (df['rating'] <= 4.5)]
cluster_cukup = MarkerCluster(name='Cukup (Rating 4.0–4.5)').add_to(m)

for _, row in cukup.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"{row['nama']}<br>Rating: {row['rating']}<br>Kategori: {row['kategori']}",
        icon=folium.Icon(color='green', icon='info-sign')
    ).add_to(cluster_cukup)

# Wisata rating rendah (< 4.0)
rendah = df[df['rating'] < 4.0]
cluster_rendah = MarkerCluster(name='Perlu Evaluasi (Rating < 4.0)').add_to(m)

for _, row in rendah.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"{row['nama']}<br>Rating: {row['rating']}<br>Kategori: {row['kategori']}",
        icon=folium.Icon(color='red', icon='exclamation-sign')
    ).add_to(cluster_rendah)

# Tambahkan layer control agar bisa toggle kategori
folium.LayerControl().add_to(m)


# Tampilkan peta
m