# Import Library

In [1]:
!pip install Sastrawi

import pandas as pd  # Pandas untuk manipulasi dan analisis data
pd.options.mode.chained_assignment = None  # Menonaktifkan peringatan chaining
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory  # Stemming (penghilangan imbuhan kata) dalam bahasa Indonesia
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory  # Menghapus kata-kata berhenti dalam bahasa Indonesia
import scipy.sparse as sp 



In [2]:
dataset= pd.read_csv("https://drive.usercontent.google.com/download?id=1nhEVUP5RotxRkXKcdh7mBMYbZuEPf7VY&export=download&authuser=3&confirm=t&uuid=4a95c9f5-6f20-4be1-bea1-07a6f9f7e8a7&at=AEz70l4lEbei1hLUIq3K5bV2nu4W:1742999252488")
dataset.drop(['place_id'], axis=1, inplace=True)
dataset.head()

Unnamed: 0,place_name,place_description,category,city,price,rating,description_location,place_img,gallery_photo_img1,gallery_photo_img2,gallery_photo_img3,place_map
0,Taman Nasional Gunung Leuser,Taman Nasional Gunung Leuser adalah salah satu...,"Budaya,Taman Nasional",Aceh,"Rp25,000",4.5,"Barisan mountain range, Aceh 24653",https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://www.google.com/maps/search/Taman+Nasio...
1,Desa Wisata Munduk,Desa Wisata Munduk adalah sebuah desa di pegun...,Desa Wisata,Bali,"Rp10,000",4.5,"Munduk, Banjar, Kabupaten Buleleng, Bali",https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://goo.gl/maps/LyeJ2mAeFGysTE9v9
2,Desa Wisata Penglipuran,Desa Wisata Penglipuran adalah sebuah desa wis...,"Budaya,Desa Wisata",Bali,"Rp25,000",4.8,"Jl. Penglipuran, Kubu, Kec. Bangli, Kabupaten ...",https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://www.google.com/maps/search/Desa+Wisata...
3,Taman Nasional Bali Barat,Taman Nasional Bali Barat adalah kawasan konse...,"Taman Nasional,Cagar Alam",Bali,"Rp15,000",4.5,"Sumber Klampok, Bali",https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://www.google.com/maps/search/Taman+Nasio...
4,Bukit Jamur,Bukit Jamur Ciwidey adalah satu dari sekian ba...,Cagar Alam,Bandung,"Rp12,000",4.2,"Sugihmukti, Kec. Pasirjambu, Kabupaten Bandung...",https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://storage.googleapis.com/travelee-capsto...,https://www.google.com/maps/search/Bukit+Jamur


In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   place_name            205 non-null    object 
 1   place_description     205 non-null    object 
 2   category              205 non-null    object 
 3   city                  205 non-null    object 
 4   price                 205 non-null    object 
 5   rating                205 non-null    float64
 6   description_location  205 non-null    object 
 7   place_img             205 non-null    object 
 8   gallery_photo_img1    205 non-null    object 
 9   gallery_photo_img2    202 non-null    object 
 10  gallery_photo_img3    118 non-null    object 
 11  place_map             205 non-null    object 
dtypes: float64(1), object(11)
memory usage: 19.3+ KB


In [4]:
df = dataset[['place_name','place_description','category','city','rating','description_location']]

# Cleaning Data + Preprocessing

In [5]:

# CLEANING DATA
text_columns = ['place_description', 'description_location']
for col in text_columns:
    df[col] = df[col].fillna('')

categorical_columns = ['category', 'city']
for col in categorical_columns:
    df[col] = df[col].fillna(df[col].mode()[0])

numeric_columns = ['rating']
for col in numeric_columns:
    df[col] = df[col].fillna(df[col].median())

df['combined_text'] = df['place_description'] + " " + df['description_location']

indonesian_stopwords = [
    'yang', 'dan', 'di', 'ke', 'dari', 'ini', 'itu', 'untuk', 'pada', 'dengan',
    'adalah', 'atau', 'jika', 'saya', 'kita', 'akan', 'tidak', 'tersebut'
]

from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words=indonesian_stopwords, max_features=1000)
tfidf_matrix = vectorizer.fit_transform(df['combined_text'])
print("TF-IDF matrix shape:", tfidf_matrix.shape)

from sklearn.preprocessing import LabelEncoder
le_category = LabelEncoder()
df['category_encoded'] = le_category.fit_transform(df['category'])

le_city = LabelEncoder()
df['city_encoded'] = le_city.fit_transform(df['city'])

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df[['rating_scaled']] = scaler.fit_transform(df[[ 'rating']])

other_features = df[['category_encoded', 'city_encoded', 'rating_scaled']]
other_features_sparse = sp.csr_matrix(other_features.values)
final_feature_matrix = sp.hstack([tfidf_matrix, other_features_sparse])
print("Final feature matrix shape:", final_feature_matrix.shape)


TF-IDF matrix shape: (205, 1000)
Final feature matrix shape: (205, 1003)


# Model

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

# Hitung cosine similarity antara item
similarity_matrix = cosine_similarity(final_feature_matrix, dense_output=False)
print("Similarity matrix shape:", similarity_matrix.shape)

def get_recommendations(place_index, similarity_matrix, df, top_n=5):

    similarity_scores = list(enumerate(similarity_matrix[place_index].toarray().flatten()))

    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]

    recommended_indices = [i[0] for i in similarity_scores]
    return recommended_indices

recommended_indices = get_recommendations(4, similarity_matrix, dataset, top_n=5)
recommended_places = dataset.iloc[recommended_indices]
print("Recommended indices:", recommended_indices)
print(recommended_places[[ 'place_name', 'category', 'city', 'rating','place_img', 'gallery_photo_img1', 'gallery_photo_img2', 'gallery_photo_img3']])

Similarity matrix shape: (205, 205)
Recommended indices: [18, 34, 22, 10, 8]
                place_name    category     city  rating  \
18       Happyfarm Ciwidey  Cagar Alam  Bandung     4.3   
34   Taman Bunga Cihideung  Cagar Alam  Bandung     4.2   
22  Kebun Binatang Bandung  Cagar Alam  Bandung     4.1   
10   Curug Luhur Waterfall  Cagar Alam  Bandung     4.2   
8             Curug Cimahi  Cagar Alam  Bandung     4.1   

                                            place_img  \
18  https://storage.googleapis.com/travelee-capsto...   
34  https://storage.googleapis.com/travelee-capsto...   
22  https://storage.googleapis.com/travelee-capsto...   
10  https://storage.googleapis.com/travelee-capsto...   
8   https://storage.googleapis.com/travelee-capsto...   

                                   gallery_photo_img1  \
18  https://storage.googleapis.com/travelee-capsto...   
34  https://storage.googleapis.com/travelee-capsto...   
22  https://storage.googleapis.com/travelee-capsto... 

# Recommend

In [7]:
recommended_indices = get_recommendations(4, similarity_matrix, dataset, top_n=5)
recommended_places = dataset.iloc[recommended_indices]
print("Recommended indices:", recommended_indices)
print(recommended_places[[ 'place_name', 'category', 'city', 'rating','place_img', 'gallery_photo_img1', 'gallery_photo_img2', 'gallery_photo_img3']])

Recommended indices: [18, 34, 22, 10, 8]
                place_name    category     city  rating  \
18       Happyfarm Ciwidey  Cagar Alam  Bandung     4.3   
34   Taman Bunga Cihideung  Cagar Alam  Bandung     4.2   
22  Kebun Binatang Bandung  Cagar Alam  Bandung     4.1   
10   Curug Luhur Waterfall  Cagar Alam  Bandung     4.2   
8             Curug Cimahi  Cagar Alam  Bandung     4.1   

                                            place_img  \
18  https://storage.googleapis.com/travelee-capsto...   
34  https://storage.googleapis.com/travelee-capsto...   
22  https://storage.googleapis.com/travelee-capsto...   
10  https://storage.googleapis.com/travelee-capsto...   
8   https://storage.googleapis.com/travelee-capsto...   

                                   gallery_photo_img1  \
18  https://storage.googleapis.com/travelee-capsto...   
34  https://storage.googleapis.com/travelee-capsto...   
22  https://storage.googleapis.com/travelee-capsto...   
10  https://storage.googleapis.co