In [18]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder

In [19]:
data = pd.read_excel('Data_v.3.xlsx')
data = data.copy()

In [20]:
data

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Lat,Long
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,-6.175392,106.827153
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,-6.137645,106.817125
2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Hiburan,Jakarta,270000,4.6,-6.125312,106.833538
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Hiburan,Jakarta,10000,4.5,-6.302446,106.895156
4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Hiburan,Jakarta,94000,4.5,-6.124190,106.839134
...,...,...,...,...,...,...,...,...,...
679,680,Teluk Kiluan,Teluk terkenal dengan atraksi lumba-lumba di L...,Alam,Tangamus,0,4.6,-5.779489,105.093217
680,681,Pantai Tanjung Setia,Pantai dengan ombak besar untuk berselancar di...,Alam,Pesisir Selatan,0,4.5,-5.302336,103.999850
681,682,Pantai Dewi Mandapa,Pantai dengan pemandangan indah dan fasilitas ...,Alam,Pesarawan,0,3.9,-5.572197,105.243705
682,683,Bendungan Margotirto,Bendungan dengan pemandangan Alam yang indah d...,Alam,Tangamus,0,4.3,-5.421126,104.725779


In [21]:
# Step 1: Data Preprocessing
le_category = LabelEncoder()
le_city = LabelEncoder()

# Fit the encoders with all unique categories and cities
le_category.fit(data['Category'].unique())
le_city.fit(data['City'].unique())

# Transform the actual data
data['Category'] = data['Category'].apply(lambda x: le_category.transform([x])[0] if x in le_category.classes_ else -1)
data['City'] = data['City'].apply(lambda x: le_city.transform([x])[0] if x in le_city.classes_ else -1)

# Handle unknown categories and cities
if -1 in data['Category'].values:
    max_category = data['Category'].max()
    data.loc[data['Category'] == -1, 'Category'] = max_category + 1
    le_category.classes_ = np.append(le_category.classes_, 'Unknown')

if -1 in data['City'].values:
    max_city = data['City'].max()
    data.loc[data['City'] == -1, 'City'] = max_city + 1
    le_city.classes_ = np.append(le_city.classes_, 'Unknown')

X = data[['Category', 'City', 'Rating', 'Price','Lat','Long']].values

scaler = MinMaxScaler()
X[:, 2:] = scaler.fit_transform(X[:, 2:])

In [22]:
# Step 2: Feature Encoding
num_categories = len(le_category.classes_)
num_cities = len(le_city.classes_)
X_encoded = np.zeros((X.shape[0], num_categories + num_cities + 4))
X_encoded[:, :num_categories] = tf.keras.utils.to_categorical(X[:, 0], num_classes=num_categories)
X_encoded[:, num_categories:num_categories+num_cities] = tf.keras.utils.to_categorical(X[:, 1], num_classes=num_cities)
X_encoded[:, -4:] = X[:, 2:]  # Rating and Price

# Step 3-4: Model Architecture and Training (same as before)
input_dim = X_encoded.shape[1]
hidden_dim = 64
model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_dim, activation='relu', input_shape=(input_dim,)),
    tf.keras.layers.Dense(hidden_dim, activation='relu'),
    tf.keras.layers.Dense(input_dim, activation='sigmoid')
])
model.compile(optimizer='adam', loss='mse')

from sklearn.model_selection import train_test_split
X_train, X_val = train_test_split(X_encoded, test_size=0.2, random_state=42)
model.fit(X_train, X_train, epochs=50, batch_size=32, validation_data=(X_val, X_val), verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2983b56ffa0>

In [23]:
# Step 5: Inference
def get_feature_vector(attraction_name):
    attraction = data[data['Place_Name'] == attraction_name].iloc[0]
    features = [
        attraction['Category'],
        attraction['City'],
        attraction['Rating'],
        attraction['Price'],
        attraction['Lat'],
        attraction['Long']
    ]
    
    encoded = np.zeros((1, input_dim))
    encoded[0, :num_categories] = tf.keras.utils.to_categorical(features[0], num_classes=num_categories)
    encoded[0, num_categories:num_categories+num_cities] = tf.keras.utils.to_categorical(features[1], num_classes=num_cities)
    encoded[0, -4:] = scaler.transform([features[2:]])
    
    return encoded


In [24]:
def get_recommendations(attraction_name, top_k=10):
    query_vector = get_feature_vector(attraction_name)
    query_embedding = model.predict(query_vector)
    
    all_embeddings = model.predict(X_encoded)
    similarities = np.dot(all_embeddings, query_embedding.T).flatten()
    
    top_indices = similarities.argsort()[::-1][1:top_k+1]  # Exclude the query itself
    top_attractions = data.iloc[top_indices]['Place_Name'].tolist()
    top_similarities = similarities[top_indices].tolist()
    
    recommendations = []
    for idx, (attraction, similarity) in enumerate(zip(top_attractions, top_similarities)):
        attraction_data = data[data['Place_Name'] == attraction].iloc[0]
        
        try:
            category = le_category.inverse_transform([attraction_data['Category']])[0]
        except ValueError:
            category = 'Unknown'
        
        try:
            city = le_city.inverse_transform([attraction_data['City']])[0]
        except ValueError:
            city = 'Unknown'
        
        recommendations.append({
            'rank': idx + 1,
            'name': attraction,
            'similarity': similarity,
            'category': category,
            'city': city,
            'rating': attraction_data['Rating'],
            'price': attraction_data['Price'],
            'lat': attraction_data['Lat'],
            'long': attraction_data['Long']
        })
    
    return recommendations

In [26]:
query_attraction = "Museum Rumah Cut Nyak Dien"  # Replace with an actual attraction from your dataset
recommendations = get_recommendations(query_attraction, top_k=10)
print(f"\nTop 10 recommendations similar to {query_attraction}:")
for rec in recommendations:
    print(f"{rec['rank']}. {rec['name']} (Similarity: {rec['similarity']:.3f})")
    print(f"   Category: {rec['category']}")
    print(f"   City: {rec['city']}")
    print(f"   Rating: {rec['rating']}")
    print(f"   Price: {rec['price']}")
    print(f"   Latitude: {rec['lat']}")
    print(f"   Longitude: {rec['long']}")
    print()


Top 10 recommendations similar to Museum Rumah Cut Nyak Dien:
1. Museum Aceh (Similarity: 2.399)
   Category: Budaya
   City: Banda Aceh
   Rating: 4.6
   Price: 0
   Latitude: 5.54863494996462
   Longitude: 95.3209554

2. Tugu Nol Kilometer (Similarity: 2.313)
   Category: Budaya
   City: Sabang
   Rating: 4.6
   Price: 0
   Latitude: 5.91423754846702
   Longitude: 95.2181735802234

3. Museum Rumah Cut Nyak Dien (Similarity: 2.241)
   Category: Budaya
   City: Aceh Besar
   Rating: 4.6
   Price: 0
   Latitude: 5.51692007053272
   Longitude: 95.2716937288352

4. Barelang Bridge Batam (Similarity: 2.091)
   Category: Budaya
   City: Batam
   Rating: 4.5
   Price: 0
   Latitude: 0.98135051690167
   Longitude: 104.049599940111

5. Istana Siak Sri Indrapura (Similarity: 2.031)
   Category: Budaya
   City: Siak
   Rating: 4.6
   Price: 0
   Latitude: 0.794994672656606
   Longitude: 102.048911397986

6. Candi Muara Takus (Similarity: 1.980)
   Category: Budaya
   City: Kampar
   Rating: 4.4