In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from flask import Flask, request, jsonify
from sklearn.metrics.pairwise import cosine_similarity

# Memuat data
data = pd.read_excel('data.xlsx')
data = data[['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price', 'Rating']]

In [2]:
# Fungsi preprocessing teks
def preprocess_text(text):
    text = tf.strings.regex_replace(text, "[^a-zA-Z0-9 ]", "")
    text = tf.strings.lower(text)
    return text

In [3]:
# Vektorisasi fitur teks
description_vectorizer = TextVectorization(output_mode='tf_idf', ngrams=2)
category_vectorizer = TextVectorization(output_mode='int', output_sequence_length=1)
city_vectorizer = TextVectorization(output_mode='int', output_sequence_length=1)

description_ds = tf.data.Dataset.from_tensor_slices(data['Description'].astype(str).values).map(preprocess_text)
category_ds = tf.data.Dataset.from_tensor_slices(data['Category'].astype(str).values)
city_ds = tf.data.Dataset.from_tensor_slices(data['City'].astype(str).values)

description_vectorizer.adapt(description_ds)
category_vectorizer.adapt(category_ds)
city_vectorizer.adapt(city_ds)

description_vectorized = tf.cast(description_vectorizer(tf.constant(data['Description'].astype(str).values)), tf.float32)
category_vectorized = tf.cast(category_vectorizer(tf.constant(data['Category'].astype(str).values)), tf.float32)
city_vectorized = tf.cast(city_vectorizer(tf.constant(data['City'].astype(str).values)), tf.float32)

price = tf.expand_dims(tf.constant(data['Price'].values, dtype=tf.float32), -1)
rating = tf.expand_dims(tf.constant(data['Rating'].values, dtype=tf.float32), -1)

features = tf.concat([description_vectorized, category_vectorized, city_vectorized, price, rating], axis=1)


In [4]:
input_dim = features.shape[1]
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(input_dim,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu', name='embedding')
])

model.compile(optimizer='adam', loss='mse')

In [5]:
embeddings = model.predict(features)

similarity_matrix = cosine_similarity(embeddings)


def recommend(place_id, top_k=5):
    place_index = data[data['Place_Id'] == place_id].index[0]
    similarity_scores = list(enumerate(similarity_matrix[place_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similar_indices = [i[0] for i in similarity_scores[1:top_k+1]]
    similar_places = data.iloc[similar_indices]['Place_Name'].tolist()
    return similar_places

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


In [6]:
recommend(1)

['Candi Muara Takus',
 'Rahmat Zoo and Park',
 'Graha Maria Annai Velangkanni',
 'Ulu Kasok',
 'Barelang Bridge Batam']