In [25]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import scipy
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [26]:
gunung = pd.read_csv("gunung_indonesia.csv")
gunung.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207 entries, 0 to 206
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Nama                207 non-null    object 
 1   Provinsi            207 non-null    object 
 2   Kabupaten           207 non-null    object 
 3   Kecamatan           207 non-null    object 
 4   Koordinat           207 non-null    object 
 5   Ketinggian (dpl)    207 non-null    int64  
 6   Jenis Gunung        207 non-null    object 
 7   Status              207 non-null    object 
 8   Akses               207 non-null    object 
 9   Jarak (km)          207 non-null    float64
 10  Jarak (m)           207 non-null    int64  
 11  Elevation gain (m)  207 non-null    int64  
 12  Estimated Time      207 non-null    object 
 13  Latitude            207 non-null    float64
 14  Longitude           207 non-null    float64
dtypes: float64(3), int64(3), object(9)
memory usage: 24.4+ KB

In [27]:
gunung.describe()

Unnamed: 0,Ketinggian (dpl),Jarak (km),Jarak (m),Elevation gain (m),Latitude,Longitude
count,207.0,207.0,207.0,207.0,207.0,207.0
mean,2223.536232,17.5343,17577.777778,1319.21256,4.312702,114.163074
std,789.473886,13.690023,13677.072761,636.128775,3.010902,10.698226
min,422.0,1.9,1900.0,195.0,0.054199,95.63325
25%,1709.5,9.0,9000.0,863.0,1.273095,106.8485
50%,2145.0,13.0,13000.0,1250.0,3.950813,114.046
75%,2705.0,21.0,21000.0,1713.0,7.310167,122.13735
max,4884.0,90.0,90000.0,3500.0,10.11571,140.5857


In [28]:
print("Missing values:\n", gunung.isnull().sum())

Missing values:
 Nama                  0
Provinsi              0
Kabupaten             0
Kecamatan             0
Koordinat             0
Ketinggian (dpl)      0
Jenis Gunung          0
Status                0
Akses                 0
Jarak (km)            0
Jarak (m)             0
Elevation gain (m)    0
Estimated Time        0
Latitude              0
Longitude             0
dtype: int64


In [29]:
# Gabungkan lokasi
gunung['lokasi'] = gunung['Kecamatan'] + ' ' + gunung['Kabupaten'] + ' ' + gunung['Provinsi']

In [30]:
recom_features = gunung[['lokasi', 'Ketinggian (dpl)']]

In [31]:
recom_features

Unnamed: 0,lokasi,Ketinggian (dpl)
0,Ketambe Gayo Lues Nanggroe Aceh Darussalam,3030
1,Puteri Betung Gayo Lues Nanggroe Aceh Darussalam,3315
2,Serba Jadi Aceh Timur Nanggroe Aceh Darussalam,3085
3,Serba Jadi Aceh Timur Nanggroe Aceh Darussalam,2055
4,Timang Gajah Bener Meriah Nanggroe Aceh Daruss...,2624
...,...,...
202,Sentani Jayapura Papua,2034
203,Bomela Yahukimo Papua,4595
204,Manokwari Manokwari Papua,2582
205,Kwok I Arfak Papua,2939


In [32]:
scaler = MinMaxScaler()
numerical_features = scaler.fit_transform(recom_features[['Ketinggian (dpl)']])

# Vektorisasi fitur teks lokasi
vectorizer = TfidfVectorizer(ngram_range=(1, 3))
location_features = vectorizer.fit_transform(recom_features['lokasi'])

In [33]:
combined_recom_features = scipy.sparse.hstack([location_features, numerical_features])

In [34]:
def rekomendasikan_gunung(input_lokasi, input_ketinggian, top_n=5, similarity_threshold=0.3):
    # Validasi input kosong
    if not input_lokasi or input_ketinggian is None:
        print("❌ Error: Harus mengisi semua kolom yang ada (lokasi dan ketinggian)!")
        return None

    # Validasi tipe data
    if not isinstance(input_lokasi, str):
        print("❌ Error: Input lokasi harus berupa teks!")
        return None

    if not isinstance(input_ketinggian, (int, float)):
        print("❌ Error: Input ketinggian harus berupa angka!")
        return None

    try:
        # Proses rekomendasi
        input_lokasi_vec = vectorizer.transform([input_lokasi])
        input_numerik = scaler.transform([[input_ketinggian]])
        input_combined = scipy.sparse.hstack([input_lokasi_vec, input_numerik])

        similarity_scores = cosine_similarity(input_combined, combined_recom_features).flatten()

        filter_akses = gunung['Akses'] == 'Buka'
        qualified_indices = [
            i for i, score in enumerate(similarity_scores)
            if score >= similarity_threshold and filter_akses[i]
        ]

        if not qualified_indices:
            print("⚠ Tidak ditemukan rekomendasi yang memenuhi kriteria.")
            return None

        # Urutkan dan ambil terbaik
        final_indices = sorted(qualified_indices,
                             key=lambda i: similarity_scores[i],
                             reverse=True)[:top_n]

        # Tampilkan jumlah hasil yang valid
        if len(final_indices) < top_n:
            print(f"ℹ Hanya ditemukan {len(final_indices)} rekomendasi yang memenuhi kriteria (gunung buka).")

        return gunung.iloc[final_indices][['Nama', 'Provinsi', 'Ketinggian (dpl)', 'Akses']]

    except Exception as e:
        print(f"❌ Terjadi error: {str(e)}")
        return None

In [35]:
rekomendasikan_gunung(
    input_lokasi='lampung',
    input_ketinggian=1800,
)




Unnamed: 0,Nama,Provinsi,Ketinggian (dpl),Akses
46,Gunung Rajabasa,Lampung,1281,Buka
43,Gunung Krakatau,Lampung,813,Buka
47,Gunung Seminung,Lampung,1881,Buka
48,Gunung Tanggamus,Lampung,2100,Buka
44,Gunung Pesagi,Lampung,3221,Buka


# Eval

In [36]:
def precision_at_k(input_lokasi, input_ketinggian, top_k=5):
    rekomendasi = rekomendasikan_gunung(input_lokasi, input_ketinggian, top_n=top_k)
    recommended_mountains = gunung.loc[rekomendasi.index]
    relevan = recommended_mountains['lokasi'].apply(
        lambda x: input_lokasi.lower() in x.lower()
    ).sum()

    precision = relevan / top_k
    print(f"Precision@{top_k}: {precision:.2f} (Input: '{input_lokasi}' cocok dengan {relevan} dari {top_k} rekomendasi)")

    # Tampilkan detail rekomendasi untuk debugging
    print("\nDetail Rekomendasi:")
    print(recommended_mountains[['Nama', 'lokasi']])

    return precision

In [37]:
precision_at_k(
    input_lokasi='bali',
    input_ketinggian=2000,
)


Precision@5: 1.00 (Input: 'bali' cocok dengan 5 dari 5 rekomendasi)

Detail Rekomendasi:
                 Nama                    lokasi
160   Gunung Batukaru      Penebel Tabanan Bali
158      Gunung Agung  Bebandem Karangasem Bali
164      Gunung Tapak     Baturiti Tabanan Bali
163  Gunung Sanghyang      Penebel Tabanan Bali
165      Gunung Adeng     Baturiti Tabanan Bali




np.float64(1.0)

# Save model

In [38]:
import joblib

def save_recommendation_models(vectorizer, scaler, combined_features, gunung_df, folder_path='model'):
    import os
    os.makedirs('model', exist_ok=True)

    joblib.dump(vectorizer, f'{folder_path}/vectorizer.pkl')
    joblib.dump(scaler, f'{folder_path}/scaler.pkl')
    joblib.dump(combined_features, f'{folder_path}/combined_features.pkl')
    joblib.dump(gunung, f'{folder_path}/gunung_data.pkl')

    print("✅ Semua model dan data berhasil disimpan.")


In [39]:
save_recommendation_models(vectorizer, scaler, combined_recom_features, gunung)

✅ Semua model dan data berhasil disimpan.


In [40]:
def load_recommendation_models(folder_path='model'):
    vectorizer = joblib.load(f'{folder_path}/vectorizer.pkl')
    scaler = joblib.load(f'{folder_path}/scaler.pkl')
    combined_features = joblib.load(f'{folder_path}/combined_features.pkl')
    gunung = joblib.load(f'{folder_path}/gunung_data.pkl')

    print("✅ Semua model dan data berhasil dimuat.")
    return vectorizer, scaler, combined_features, gunung


In [41]:
vectorizer, scaler, combined_recom_features, gunung = load_recommendation_models()


✅ Semua model dan data berhasil dimuat.


In [42]:
import sys
import pkgutil

# Cek semua modul yang sudah diimport
imported = {name for _, name, _ in pkgutil.iter_modules()}

# Tambahkan daftar package utama dari notebook-mu
used_libs = ['joblib', 'matplotlib', 'numpy', 'pandas', 'scipy', 'seaborn', 'scikit-learn']

# Dapatkan versi package yang terinstall
with open("requirements.txt", "w") as f:
    for lib in used_libs:
        try:
            mod = __import__(lib)
            version = getattr(mod, '__version__', 'latest')
            f.write(f"{lib}=={version}\n")
        except ImportError:
            f.write(f"{lib}\n")
