In [1]:
import pandas as pd

df = pd.read_csv('tourism_with_id.csv')  # replace 'yourfile.csv' with your uploaded file name

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Load data
df = pd.read_csv('tourism_with_id.csv')

# Preprocessing the descriptions
df = df['Description'].apply(lambda x: x.lower().split())

# model

In [3]:
import io
import re
import string
import tqdm

import numpy as np

import tensorflow as tf
from tensorflow.keras import layers

In [4]:
SEED = 42
AUTOTUNE = tf.data.AUTOTUNE

In [5]:
sentence = "The wide road shimmered in the hot sun"
tokens = list(sentence.lower().split())
print(len(tokens))

8


In [6]:
vocab, index = {}, 1  # start indexing from 1
vocab['<pad>'] = 0  # add a padding token
for token in tokens:
  if token not in vocab:
    vocab[token] = index
    index += 1
vocab_size = len(vocab)
print(vocab)

{'<pad>': 0, 'the': 1, 'wide': 2, 'road': 3, 'shimmered': 4, 'in': 5, 'hot': 6, 'sun': 7}


In [7]:
inverse_vocab = {index: token for token, index in vocab.items()}
print(inverse_vocab)

{0: '<pad>', 1: 'the', 2: 'wide', 3: 'road', 4: 'shimmered', 5: 'in', 6: 'hot', 7: 'sun'}


In [8]:
example_sequence = [vocab[word] for word in tokens]
print(example_sequence)

[1, 2, 3, 4, 5, 1, 6, 7]


In [9]:
window_size = 2
positive_skip_grams, _ = tf.keras.preprocessing.sequence.skipgrams(
      example_sequence,
      vocabulary_size=vocab_size,
      window_size=window_size,
      negative_samples=0)
print(len(positive_skip_grams))

26


In [10]:
for target, context in positive_skip_grams[:5]:
  print(f"({target}, {context}): ({inverse_vocab[target]}, {inverse_vocab[context]})")

(1, 5): (the, in)
(3, 5): (road, in)
(6, 7): (hot, sun)
(4, 3): (shimmered, road)
(1, 3): (the, road)


In [11]:
# Get target and context words for one positive skip-gram.
target_word, context_word = positive_skip_grams[0]

# Set the number of negative samples per positive context.
num_ns = 4

context_class = tf.reshape(tf.constant(context_word, dtype="int64"), (1, 1))
negative_sampling_candidates, _, _ = tf.random.log_uniform_candidate_sampler(
    true_classes=context_class,  # class that should be sampled as 'positive'
    num_true=1,  # each positive skip-gram has 1 positive context class
    num_sampled=num_ns,  # number of negative context words to sample
    unique=True,  # all the negative samples should be unique
    range_max=vocab_size,  # pick index of the samples from [0, vocab_size]
    seed=SEED,  # seed for reproducibility
    name="negative_sampling"  # name of this operation
)
print(negative_sampling_candidates)
print([inverse_vocab[index.numpy()] for index in negative_sampling_candidates])

tf.Tensor([2 1 4 3], shape=(4,), dtype=int64)
['wide', 'the', 'shimmered', 'road']


In [12]:
# Reduce a dimension so you can use concatenation (in the next step).
squeezed_context_class = tf.squeeze(context_class, 1)

# Concatenate a positive context word with negative sampled words.
context = tf.concat([squeezed_context_class, negative_sampling_candidates], 0)

# Label the first context word as `1` (positive) followed by `num_ns` `0`s (negative).
label = tf.constant([1] + [0]*num_ns, dtype="int64")
target = target_word


In [13]:
sampling_table = tf.keras.preprocessing.sequence.make_sampling_table(size=60)
print(sampling_table)

[0.00315225 0.00315225 0.00547597 0.00741556 0.00912817 0.01068435
 0.01212381 0.01347162 0.01474487 0.0159558  0.0171136  0.01822533
 0.01929662 0.02033198 0.02133515 0.02230924 0.02325687 0.02418031
 0.02508148 0.02596208 0.02682359 0.02766731 0.02849441 0.02930593
 0.03010279 0.03088585 0.03165585 0.0324135  0.03315943 0.0338942
 0.03461837 0.03533241 0.03603678 0.0367319  0.03741815 0.03809591
 0.0387655  0.03942724 0.04008143 0.04072834 0.04136824 0.04200136
 0.04262794 0.0432482  0.04386234 0.04447055 0.04507302 0.04566992
 0.04626142 0.04684768 0.04742884 0.04800505 0.04857644 0.04914315
 0.04970529 0.05026299 0.05081636 0.0513655  0.05191052 0.05245153]


In [14]:
# Generates skip-gram pairs with negative sampling for a list of sequences
# (int-encoded sentences) based on window size, number of negative samples
# and vocabulary size.
def generate_training_data(sequences, window_size, num_ns, vocab_size, seed):
  # Elements of each training example are appended to these lists.
  targets, contexts, labels = [], [], []

  # Build the sampling table for `vocab_size` tokens.
  sampling_table = tf.keras.preprocessing.sequence.make_sampling_table(vocab_size)

  # Iterate over all sequences (sentences) in the dataset.
  for sequence in tqdm.tqdm(sequences):

    # Generate positive skip-gram pairs for a sequence (sentence).
    positive_skip_grams, _ = tf.keras.preprocessing.sequence.skipgrams(
          sequence,
          vocabulary_size=vocab_size,
          sampling_table=sampling_table,
          window_size=window_size,
          negative_samples=0)

    # Iterate over each positive skip-gram pair to produce training examples
    # with a positive context word and negative samples.
    for target_word, context_word in positive_skip_grams:
      context_class = tf.expand_dims(
          tf.constant([context_word], dtype="int64"), 1)
      negative_sampling_candidates, _, _ = tf.random.log_uniform_candidate_sampler(
          true_classes=context_class,
          num_true=1,
          num_sampled=num_ns,
          unique=True,
          range_max=vocab_size,
          seed=seed,
          name="negative_sampling")

      # Build context and label vectors (for one target word)
      context = tf.concat([tf.squeeze(context_class,1), negative_sampling_candidates], 0)
      label = tf.constant([1] + [0]*num_ns, dtype="int64")

      # Append each element from the training example to global lists.
      targets.append(target_word)
      contexts.append(context)
      labels.append(label)

  return targets, contexts, labels

In [15]:
import pandas as pd

def column_to_txt(csv_file, column_name, txt_file):
    df = pd.read_csv(csv_file)
    column = df[column_name]
    column.to_csv(txt_file, index=False, header=False)


# Usage
csv_file = '/content/tourism_with_id.csv'  # Replace with the path to your CSV file
column_index = 'Description'  # Replace with the index of the column you want to extract (0-based index)
txt_file = '/content/tourism_with_id.txt'  # Replace with the desired path for the TXT file

column_to_txt(csv_file, column_index, txt_file)

In [16]:
with open('/content/tourism_with_id.txt') as f:
  lines = f.read().splitlines()
for line in lines[:20]:
  print(line)

"Monumen Nasional atau yang populer disingkat dengan Monas atau Tugu Monas adalah monumen peringatan setinggi 132 meter (433 kaki) yang didirikan untuk mengenang perlawanan dan perjuangan rakyat Indonesia untuk merebut kemerdekaan dari pemerintahan kolonial Hindia Belanda. Pembangunan monumen ini dimulai pada tanggal 17 Agustus 1961 di bawah perintah presiden Soekarno dan dibuka untuk umum pada tanggal 12 Juli 1975. Tugu ini dimahkotai lidah api yang dilapisi lembaran emas yang melambangkan semangat perjuangan yang menyala-nyala. Monumen Nasional terletak tepat di tengah Lapangan Medan Merdeka, Jakarta Pusat."
"Kota tua di Jakarta, yang juga bernama Kota Tua, berpusat di Alun-Alun Fatahillah, yaitu alun-alun yang ramai dengan pertunjukan rutin tarian tradisional. Museum Sejarah Jakarta adalah bangunan era Belanda dengan lukisan dan barang antik, sedangkan Museum Wayang memamerkan boneka kayu khas Jawa. Desa Glodok, atau Chinatown, terkenal dengan makanan kaki lima, seperti pangsit dan 

In [17]:
text_ds = tf.data.TextLineDataset('/content/tourism_with_id.txt').filter(lambda x: tf.cast(tf.strings.length(x), bool))

In [19]:
# Now, create a custom standardization function to lowercase the text and
# remove punctuation.
def custom_standardization(input_data):
  lowercase = tf.strings.lower(input_data)
  return tf.strings.regex_replace(lowercase,
                                  '[%s]' % re.escape(string.punctuation), '')


# Define the vocabulary size and the number of words in a sequence.
vocab_size = 10000
sequence_length = 100

# Use the `TextVectorization` layer to normalize, split, and map strings to
# integers. Set the `output_sequence_length` length to pad all samples to the
# same length.
vectorize_layer = layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=vocab_size,
    output_mode='int',
    output_sequence_length=sequence_length)

In [20]:
vectorize_layer.adapt(text_ds.batch(1024))

In [21]:
# Save the created vocabulary for reference.
inverse_vocab = vectorize_layer.get_vocabulary()
print(inverse_vocab[:20])

['', '[UNK]', 'yang', 'di', 'dan', 'ini', 'dengan', 'dari', 'wisata', 'untuk', 'taman', 'pantai', 'adalah', 'pada', 'kota', 'tempat', 'merupakan', 'oleh', 'museum', 'bandung']


In [22]:
# Vectorize the data in text_ds.
text_vector_ds = text_ds.batch(1024).prefetch(AUTOTUNE).map(vectorize_layer).unbatch()

In [23]:
sequences = list(text_vector_ds.as_numpy_iterator())
print(len(sequences))

437


In [24]:
for seq in sequences[:5]:
  print(f"{seq} => {[inverse_vocab[i] for i in seq]}")

[ 102  121   27    2  288 1039    6 1836   27  267 1836   12  102  977
  492 6821   79 6686  508    2  195    9 1176 4160    4  501  578   24
    9 4599  646    7  382  547  604  123  307  102    5 1045   13  105
  832  521 3145    3  212 1781  360  752    4  178    9  140   13  105
  736  600 1629  267    5 2919 2659  320    2 2014 4968  900    2 1189
 2306  501    2 4628  102  121   33  269    3   78  312 1445  537   40
   60    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0] => ['monumen', 'nasional', 'atau', 'yang', 'populer', 'disingkat', 'dengan', 'monas', 'atau', 'tugu', 'monas', 'adalah', 'monumen', 'peringatan', 'setinggi', '132', 'meter', '433', 'kaki', 'yang', 'didirikan', 'untuk', 'mengenang', 'perlawanan', 'dan', 'perjuangan', 'rakyat', 'indonesia', 'untuk', 'merebut', 'kemerdekaan', 'dari', 'pemerintahan', 

In [25]:
targets, contexts, labels = generate_training_data(
    sequences=sequences,
    window_size=2,
    num_ns=4,
    vocab_size=vocab_size,
    seed=SEED)

targets = np.array(targets)
contexts = np.array(contexts)
labels = np.array(labels)

print('\n')
print(f"targets.shape: {targets.shape}")
print(f"contexts.shape: {contexts.shape}")
print(f"labels.shape: {labels.shape}")


100%|██████████| 437/437 [00:12<00:00, 36.20it/s]




targets.shape: (25763,)
contexts.shape: (25763, 5)
labels.shape: (25763, 5)


In [26]:
BATCH_SIZE = 1024
BUFFER_SIZE = 10000
dataset = tf.data.Dataset.from_tensor_slices(((targets, contexts), labels))
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
print(dataset)

<_BatchDataset element_spec=((TensorSpec(shape=(1024,), dtype=tf.int64, name=None), TensorSpec(shape=(1024, 5), dtype=tf.int64, name=None)), TensorSpec(shape=(1024, 5), dtype=tf.int64, name=None))>


In [27]:
dataset = dataset.cache().prefetch(buffer_size=AUTOTUNE)
print(dataset)

<_PrefetchDataset element_spec=((TensorSpec(shape=(1024,), dtype=tf.int64, name=None), TensorSpec(shape=(1024, 5), dtype=tf.int64, name=None)), TensorSpec(shape=(1024, 5), dtype=tf.int64, name=None))>


In [28]:
class Word2Vec(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim):
    super(Word2Vec, self).__init__()
    self.target_embedding = layers.Embedding(vocab_size,
                                      embedding_dim,
                                      input_length=1,
                                      name="w2v_embedding")
    self.context_embedding = layers.Embedding(vocab_size,
                                       embedding_dim,
                                       input_length=num_ns+1)

  def call(self, pair):
    target, context = pair
    # target: (batch, dummy?)  # The dummy axis doesn't exist in TF2.7+
    # context: (batch, context)
    if len(target.shape) == 2:
      target = tf.squeeze(target, axis=1)
    # target: (batch,)
    word_emb = self.target_embedding(target)
    # word_emb: (batch, embed)
    context_emb = self.context_embedding(context)
    # context_emb: (batch, context, embed)
    dots = tf.einsum('be,bce->bc', word_emb, context_emb)
    # dots: (batch, context)
    return dots

In [29]:
embedding_dim = 200
word2vec = Word2Vec(vocab_size, embedding_dim)
word2vec.compile(optimizer='adam',
                 loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                 metrics=['accuracy'])

In [30]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs")

In [31]:
word2vec.fit(dataset, epochs=500, callbacks=[tensorboard_callback])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x7f91944ffcd0>

In [32]:
weights = word2vec.get_layer('w2v_embedding').get_weights()[0]
vocab = vectorize_layer.get_vocabulary()

In [33]:
out_v = io.open('vectors.tsv', 'w', encoding='utf-8')
out_m = io.open('metadata.tsv', 'w', encoding='utf-8')

for index, word in enumerate(vocab):
  if index == 0:
    continue  # skip 0, it's padding.
  vec = weights[index]
  out_v.write('\t'.join([str(x) for x in vec]) + "\n")
  out_m.write(word + "\n")
out_v.close()
out_m.close()

In [34]:
try:
  from google.colab import files
  files.download('vectors.tsv')
  files.download('metadata.tsv')
except Exception:
  pass

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#Infer 1

In [35]:
vectors = pd.read_csv('/content/vectors.tsv', sep='\t',header=None)

vectors

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,0.012185,-0.007413,-0.007445,-0.010108,-0.037646,-0.008132,-0.031018,0.040280,-0.034450,-0.035600,...,-0.010139,0.004775,-0.029332,-0.040077,-0.048427,-0.011647,0.047132,-0.042753,-0.008390,-0.000906
1,0.374855,0.150414,0.168149,0.227495,-0.154822,-0.486873,-0.467131,0.011872,0.412152,0.425536,...,-0.174313,0.226812,-0.482615,-0.623973,0.673608,-0.274283,0.499735,0.441734,0.438083,0.685143
2,-0.050210,0.727791,0.537909,-0.012950,0.100592,1.107585,0.366915,-0.177306,-0.535385,-0.270581,...,-0.496187,0.243968,-0.194211,-0.935017,0.333405,0.023197,0.329336,-0.891354,-0.065612,0.413283
3,-0.464998,-0.107493,-0.184454,0.510075,-0.550720,-0.450847,0.254194,-0.202023,-0.502048,0.593456,...,-0.402751,-0.407494,0.031665,0.727383,0.192734,0.248471,0.037811,0.083275,-0.268975,-0.156787
4,-0.235418,0.418085,-1.012643,0.090193,0.755287,-0.034515,0.251581,-0.763079,0.890076,0.393950,...,0.349482,0.249063,0.192692,0.263981,-0.043281,-0.004011,0.363282,-0.434360,-0.556742,-0.133567
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6845,0.058644,0.095096,-0.137615,-0.272455,0.307687,-0.393347,0.430051,-0.447797,-0.280422,0.220414,...,0.097135,0.305960,-0.321467,0.215769,0.297547,0.233251,-0.118416,-0.351965,0.192822,0.034216
6846,0.352876,-0.412503,-0.167254,-0.012964,0.032107,-0.333195,0.247926,-0.025925,-0.000899,0.185917,...,0.307351,0.013376,-0.157420,0.390807,-0.277255,-0.341318,0.085387,0.060694,-0.420623,0.292423
6847,0.412247,-0.128431,0.332800,-0.030894,-0.245293,-0.208442,-0.141770,-0.349242,-0.305067,0.277546,...,-0.137207,0.358577,0.218383,0.314519,-0.012844,-0.292406,0.342496,-0.090694,-0.266073,0.070375
6848,0.379328,0.138793,-0.417788,0.408432,-0.174740,-0.448646,0.383583,0.206432,-0.110327,-0.196900,...,-0.227372,0.114846,-0.053447,0.498333,-0.053963,-0.294249,0.270412,0.247259,-0.041193,-0.417402


In [36]:
vectors_list = vectors.values.tolist()
vectors_array = np.array(vectors_list)

In [37]:
metadata = pd.read_csv('/content/metadata.tsv', sep='\t', header=None)

metadata

Unnamed: 0,0
0,[UNK]
1,yang
2,di
3,dan
4,ini
...,...
6845,0500
6846,0222301990
6847,02
6848,01rw


In [57]:
metadata.values[:6639]

array([['[UNK]'],
       ['yang'],
       ['di'],
       ...,
       ['927'],
       ['92'],
       ['9001000']], dtype=object)

In [58]:
vectors_dict = {}
for i,row in enumerate(metadata.values):
  vectors_dict[row[0]] = vectors_array[i]

bersehin df

In [68]:
df = pd.read_csv('tourism_with_id.csv')
df_clean = df[['Place_Name',
               'Description',
               'Category',
               'Price',
               'City',
               'Rating']]
df_clean

Unnamed: 0,Place_Name,Description,Category,Price,City,Rating
0,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,20000,Jakarta,4.6
1,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,0,Jakarta,4.6
2,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,270000,Jakarta,4.6
3,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,10000,Jakarta,4.5
4,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,94000,Jakarta,4.5
...,...,...,...,...,...,...
432,Museum Mpu Tantular,Museum Negeri Mpu Tantular adalah sebuah museu...,Budaya,2000,Surabaya,4.4
433,Taman Bungkul,Taman Bungkul adalah taman wisata kota yang te...,Taman Hiburan,0,Surabaya,4.6
434,Taman Air Mancur Menari Kenjeran,Air mancur menari atau dancing fountain juga a...,Taman Hiburan,0,Surabaya,4.4
435,Taman Flora Bratang Surabaya,Taman Flora adalah salah satu taman kota di Su...,Taman Hiburan,0,Surabaya,4.6


In [69]:
desc_vs = []
df['Description'] = df['Description'].apply(lambda x: x.lower().split())
for desc in df['Description']:
  desc_v = [vectors_dict[token] for token in desc if token in vectors_dict.keys()]
  desc_vs.append(np.array(desc_v).mean(axis=0))

In [70]:
df_clean['Description_vecs'] = desc_vs
#df_clean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['Description_vecs'] = desc_vs


In [None]:
import pickle

# Save the dictionary to a pickle file
with open('./vector.pickle', 'wb') as pickle_file:
    pickle.dump(vectors_dict, pickle_file)

In [None]:
import pickle

# Save the dictionary to a pickle file
with open('./df_clean.pickle', 'wb') as pickle_file:
    pickle.dump(df_clean, pickle_file)

In [None]:
import pickle

# Save the dictionary to a pickle file
with open('./vector.pickle', 'wb') as pickle_file:
    pickle.dump(vectors_dict, pickle_file)

# FIX INFER

In [10]:
import pickle

# Assuming you have a pickle file called 'data.pickle' that you want to load
file_path = '/content/vector.pickle'

# Load the dictionary from the pickle file
with open(file_path, 'rb') as pickle_file:
    vectors_dict = pickle.load(pickle_file)

In [11]:
import pickle

# Assuming you have a pickle file called 'data.pickle' that you want to load
file_path = '/content/df_clean.pickle'

# Load the dictionary from the pickle file
with open(file_path, 'rb') as pickle_file:
    df_clean = pickle.load(pickle_file)

In [23]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np

def vectorize(sentences):
    word_v = [vectors_dict[token] for token in sentences if token in vectors_dict.keys()]
    return np.array(word_v).mean(axis=0)

def recommend_destinations(user_input, city=None):
    # Filter destinations based on city if city is not None
    if city is not None:
        city_df = df_clean[df_clean['City'] == city]
    else:
        city_df = df

    # Transform the user input into the same vector space
    user_input_vector = vectorize(user_input.lower().split())

    # Calculate the cosine similarity between the user input vector and the description_vectors
    city_description_vectors = np.stack(city_df['Description_vecs'].values,axis=1).T
    cosine_sim = cosine_similarity([user_input_vector], city_description_vectors)

    # Get the indices of the destinations sorted by their similarity to the user input
    sim_scores = list(enumerate(cosine_sim[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the top 3 most similar destinations
    destination_indices = [i[0] for i in sim_scores[:10]]

    locations = [] 
    for i in destination_indices:
      locations.append({'id': i,
                       'place_name':city_df['Place_Name'].iloc[i],
                       'description':city_df['Description'].iloc[i],
                       'rating':city_df['Rating'].iloc[i]})

    # Return the top 3 most similar destinations
    return locations
# Test the function with a user input
print(recommend_destinations("tempat bermain", city='Jakarta'))

[{'id': 42, 'place_name': 'The Escape Hunt', 'description': 'Escape Hunt adalah salah satu tempat rekreasi yang lokasinya terletak di Jakarta Selatan. Di tempat ini, kamu akan berperan sebagai seorang detektif yang berada di sebuah ruangan terkunci dan hanya diberikan waktu 1 jam untuk memecahkan teka-teki yang ada. Tidak perlu khawatir atau takut sendirian, karena di sini kamu bisa mengajak hingga 4 temanmu dalam satu ruangan yang sama dan memecahkan kasus bersama-sama. Menarik sekali, bukan? Pertama kali datang di tempat ini, kamu akan dituntun untuk mengisi data di formulir yang telah disediakan dan diberikan briefing oleh pemandu game mengenai jalan cerita tema/kasus yang sudah kamu tentukan.', 'rating': 4.4}, {'id': 83, 'place_name': 'Kawasan Kuliner BSM', 'description': 'Tidak seperti Pecenongan, Kawasan Kuliner BSM buka dari pukul 09:00 hingga 18:00. Berlokasi di Jalan M. H. Thamrin, Jakarta Pusat, Kawasan Kuliner BSM merupakan tempat makan murah favorit warga sekitar serta pela

In [24]:
recommend_destinations("tempat bermain", city='Jakarta')

[{'id': 42,
  'place_name': 'The Escape Hunt',
  'description': 'Escape Hunt adalah salah satu tempat rekreasi yang lokasinya terletak di Jakarta Selatan. Di tempat ini, kamu akan berperan sebagai seorang detektif yang berada di sebuah ruangan terkunci dan hanya diberikan waktu 1 jam untuk memecahkan teka-teki yang ada. Tidak perlu khawatir atau takut sendirian, karena di sini kamu bisa mengajak hingga 4 temanmu dalam satu ruangan yang sama dan memecahkan kasus bersama-sama. Menarik sekali, bukan? Pertama kali datang di tempat ini, kamu akan dituntun untuk mengisi data di formulir yang telah disediakan dan diberikan briefing oleh pemandu game mengenai jalan cerita tema/kasus yang sudah kamu tentukan.',
  'rating': 4.4},
 {'id': 83,
  'place_name': 'Kawasan Kuliner BSM',
  'description': 'Tidak seperti Pecenongan, Kawasan Kuliner BSM buka dari pukul 09:00 hingga 18:00. Berlokasi di Jalan M. H. Thamrin, Jakarta Pusat, Kawasan Kuliner BSM merupakan tempat makan murah favorit warga sekitar