In [2]:
import os
for dirname, _, filenames in os.walk('/datasets'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Reshape, Dot
from tensorflow.keras.optimizers import Adam

from tqdm.auto import tqdm

In [5]:
data_tourism_with_id = pd.read_csv('./datasets/indonesia-tourism/tourism_with_id.csv')
df_tourism = pd.read_csv('./datasets/clean_dataset/df_tourism.csv')

# Run pertama

In [14]:
# Vectorize the 'Description' column using TF-IDF
vectorizer = TfidfVectorizer()
description_tfidf = vectorizer.fit_transform(df_tourism['Description']).toarray()

# Encode the 'Place_Name' as categorical labels
label_encoder = LabelEncoder()
place_labels = label_encoder.fit_transform(df_tourism['Place_Name'])
place_labels = to_categorical(place_labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(description_tfidf, place_labels, test_size=0.2, random_state=42)

# Define the Keras model
model = Sequential()
model.add(Dense(512, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(place_labels.shape[1], activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Summarize the model
model.summary()



Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 10.3738
Test Accuracy: 0.00%
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               3347968   
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 440)               113080    
                                                                 
Total params: 3592376 (13.70 MB)
Trainable params: 3592376 (13.70 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Run kedua

In [19]:
# Load the user interests data
user_interests = pd.DataFrame({
    'User_Id': [1],
    'Place_Name': ['Candi Borobudur'],
    'Category': [''],
    'City': ['']
})

print(user_interests['Place_Name'][0])

# Find the description for Kidzania
description = df_tourism[df_tourism['Place_Name'] == user_interests['Place_Name'][0]]['Description'].iloc[0]

# Vectorize the description
vectorizer = TfidfVectorizer()
vectorizer.fit(df_tourism['Description'])
user_vec = vectorizer.transform([description]).toarray()

# Load the trained model
model = Sequential()
model.add(Dense(512, input_shape=(user_vec.shape[1],), activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(df_tourism['Place_Name'].nunique(), activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Predict the recommendations
recommendations = model.predict(user_vec)

# Get the top 5 recommended place indices
recommended_place_indices = recommendations[0].argsort()[-5:][::-1]

# Map indices to place names
recommended_places = label_encoder.inverse_transform(recommended_place_indices)

# Create a DataFrame for the recommendations
recommendations_df = pd.DataFrame(recommended_places, columns=['Recommended Places'])

# Display the recommendations
print(recommendations_df.head())

Candi Borobudur
               Recommended Places
0      Nol Kilometer Jl.Malioboro
1                Taman Barunawati
2                    Pulau Tidung
3  Monumen Bambu Runcing Surabaya
4           Observatorium Bosscha


In [24]:
# Hypothetical user choices
user_choices = [1 ,2]

# Create a DataFrame for user interests
user_id = 1
user_interests = pd.DataFrame({'User_Id': [user_id]*len(user_choices), 'Place_Id': user_choices})

# Save the user interests to a CSV file
user_interests_file_path = 'user_interests.csv'
user_interests.to_csv(user_interests_file_path, index=False)

# Display the head of the user interests DataFrame
user_interests.head()

Unnamed: 0,User_Id,Place_Id
0,1,1
1,1,2


In [31]:
# Preprocess the Category and City using Label Encoding
category_encoder = LabelEncoder()
city_encoder = LabelEncoder()
df_tourism['Category_encoded'] = category_encoder.fit_transform(df_tourism['Category'])
df_tourism['City_encoded'] = city_encoder.fit_transform(df_tourism['City'])

# Preprocess the Description using TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer()
description_tfidf = tfidf_vectorizer.fit_transform(df_tourism['Description'])

# print(description_tfidf)
# Combine the encoded category and city with the tf-idf vectors
features = np.hstack((to_categorical(df_tourism['Category_encoded']),
                        description_tfidf.toarray(),
                        to_categorical(df_tourism['City_encoded'])))

# For simplicity, we'll use a placeholder target variable as we don't have user ratings or similar data
# In a real scenario, this would be based on user interactions or ratings
target = np.random.randint(0, 2, size=(len(df_tourism), 1))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Define the Keras model
model = Sequential()
model.add(Dense(128, input_dim=features.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# Save the model
model.save('tourism_recommendation_model.h5')

# Output the training history
history.history

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  saving_api.save_model(


{'loss': [0.6934603452682495,
  0.6723602414131165,
  0.6472170948982239,
  0.6066151857376099,
  0.5432822108268738],
 'accuracy': [0.5382436513900757,
  0.5609065294265747,
  0.6402266025543213,
  0.7478753328323364,
  0.8611897826194763],
 'val_loss': [0.705738365650177,
  0.7105624675750732,
  0.718838632106781,
  0.7335019111633301,
  0.7443929314613342],
 'val_accuracy': [0.37078651785850525,
  0.37078651785850525,
  0.37078651785850525,
  0.3820224702358246,
  0.40449437499046326]}

In [32]:
def recommend_places(user_interests, df_tourism, model, tfidf_vectorizer, category_encoder, city_encoder):
    # Filter the tourism DataFrame based on user interests
    # df_tourism.to_csv('df_tourism.csv', index=False)
    df_user_interests = df_tourism[df_tourism['Place_Id'].isin(user_interests['Place_Id'])]
    print(df_user_interests)
    
    # Get the indices of the user's interests to avoid recommending the same places
    indices_to_exclude = df_user_interests.index
    # print(indices_to_exclude)
    # Prepare the rest of the places for prediction
    df_other_places = df_tourism.drop(indices_to_exclude)
    # print(df_other_places)
    # print(df_other_places)
    other_places_features = np.hstack((
        to_categorical(category_encoder.transform(df_other_places['Category'])),
        to_categorical(city_encoder.transform(df_other_places['City'])),
        tfidf_vectorizer.transform(df_other_places['Description']).toarray()
    ))
    
    # Predict the likelihood of the user being interested in these other places
    predictions = model.predict(other_places_features)
    # print(predictions)
    # Get the top 5 recommendations
    top_indices = predictions.flatten().argsort()[-5:][::-1]
    # [406 296 367 198 323 332 309  97 429 141]
    # 406 164 392 190 371 296  84 345 363 390
    # [406 392 371  84 363 367 180 362 403 318 323 114 374]
    print(top_indices)
    recommendations = df_other_places.iloc[top_indices]
    
    return recommendations[['Place_Name', 'Description', 'Category', 'City']]

recomendations = recommend_places(user_interests, df_tourism, model, tfidf_vectorizer, category_encoder, city_encoder)
print(recomendations)

   Place_Id       Place_Ratings        Place_Name  \
0       1.0  3.7222222222222223  Monumen Nasional   
1       2.0                2.84          Kota Tua   

                                         Description Category     City  \
0  Monumen Nasional atau yang populer disingkat d...   Budaya  Jakarta   
1  Kota tua di Jakarta, yang juga bernama Kota Tu...   Budaya  Jakarta   

                    Location  Category_encoded  City_encoded  
0  (-6.1753924, 106.8271528)                 5             4  
1  (-6.1376448, 106.8171245)                 5             4  
[366 380  80 358 427]
                Place_Name                                        Description  \
368     Tirto Argo Siwarak  Kolam Renang Tirto Argo Siwarak, merupakan kol...   
382   Masjid Agung Ungaran  Masjid Agung Al-Mabrur berlokasi di Jln. Ahmad...   
82      Alive Museum Ancol  Museum kini tidak hanya menawarkan benda – ben...   
360  Masjid Kapal Semarang  Masjid Safinatun Najah atau pengunjung biasa m...   
4

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=738037d9-6841-4167-9a3b-dc0ac9a5875c' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>