In [None]:
!pip install keras-tuner



In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import keras_tuner as kt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, TFAutoModel

In [None]:
# 1. Load Dataset
df = pd.read_csv("/content/cleaned_data_restoran.csv")
df['Description'] = df['Description'].str.lower()
print("Dataset info:")
print(df.info())

Dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67 entries, 0 to 66
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Name         67 non-null     object
 1   Description  67 non-null     object
 2   Categories   67 non-null     object
 3   Lokasi       67 non-null     object
dtypes: object(4)
memory usage: 2.2+ KB
None


In [None]:
# 2. Prepare tokenizer and pretrained IndoBERT model (TensorFlow)
model_name = "indobenchmark/indobert-base-p2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
bert_model = TFAutoModel.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some layers from the model checkpoint at indobenchmark/indobert-base-p2 were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassifica

In [None]:
# 3. Generate embeddings from text - returns numpy array (1, hidden_size)
def get_bert_embeddings(texts):
    # texts: list of strings or single string
    if isinstance(texts, str):
        texts = [texts]
    inputs = tokenizer(texts, return_tensors='tf', padding=True, truncation=True, max_length=512)
    outputs = bert_model(inputs)
    embeddings = outputs.last_hidden_state[:, 0, :]  # CLS token
    return embeddings.numpy()

In [None]:
# 4. Prepare Training Data for Keras model
# Generate embeddings for all restoran descriptions
hotel_desc_embeddings = get_bert_embeddings(df['Description'].tolist())

# Encode restaurant names as class labels for classification task
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(df['Name'])

# Re-encode labels after combining rare classes
#labels = label_encoder.fit_transform(df['Name'])

# Generate embeddings again after modifying the dataset
restoran_desc_embeddings = get_bert_embeddings(df['Description'].tolist())

# Split into train and validation set
X_train, X_val, y_train, y_val = train_test_split(hotel_desc_embeddings, labels, test_size=0.3, random_state=42)

In [None]:
# 5. Build tunable keras model function for hyperparameter tuning
def build_model(hp):
    inputs = tf.keras.Input(shape=(restoran_desc_embeddings.shape[1],))
    x = inputs
    for i in range(hp.Int('num_layers', 1, 5)):
        units = hp.Int(f'units_{i}', min_value=64, max_value=512, step=64)
        x = tf.keras.layers.Dense(units, activation='relu')(x)
        dropout_rate = hp.Float(f'dropout_{i}', 0.1, 0.5, step=0.1)
        x = tf.keras.layers.Dropout(dropout_rate)(x)

    outputs = tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

In [None]:
tuner = kt.Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=20,
    factor=3,
    directory='kt_tuner_dir',
    project_name='restoran_nlp_recommendation'
)
#eraly stop
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

print("Starting hyperparameter search...")
tuner.search(X_train, y_train, epochs=20, validation_data=(X_val, y_val), callbacks=[stop_early])

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best hyperparameters found:")
print(f"Number of layers: {best_hps.get('num_layers')}")
for i in range(best_hps.get('num_layers')):
    print(f"Layer {i} units: {best_hps.get(f'units_{i}')}, dropout: {best_hps.get(f'dropout_{i}')}")

print(f"Learning rate: {best_hps.get('learning_rate')}")

Reloading Tuner from kt_tuner_dir/restoran_nlp_recommendation/tuner0.json
Starting hyperparameter search...
Best hyperparameters found:
Number of layers: 3
Layer 0 units: 128, dropout: 0.30000000000000004
Layer 1 units: 256, dropout: 0.2
Layer 2 units: 128, dropout: 0.4
Learning rate: 0.0001


In [None]:
# 7. Build the best model and train fully
model = tuner.hypermodel.build(best_hps)

history = model.fit(
    X_train, y_train,
    epochs=10,
    validation_data=(X_val, y_val),
    callbacks=[stop_early]
)

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 225ms/step - accuracy: 0.0394 - loss: 4.8001 - val_accuracy: 0.0000e+00 - val_loss: 4.2905
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.0000e+00 - loss: 4.6552 - val_accuracy: 0.0000e+00 - val_loss: 4.2836
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - accuracy: 0.0145 - loss: 4.6053 - val_accuracy: 0.0000e+00 - val_loss: 4.2791
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.0145 - loss: 4.4710 - val_accuracy: 0.0476 - val_loss: 4.2729
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 0.0000e+00 - loss: 4.5469 - val_accuracy: 0.0476 - val_loss: 4.2676
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - accuracy: 0.0249 - loss: 4.5453 - val_accuracy: 0.0476 - val_loss: 4.2651
Epoch 7/10
[1m2/2[0m 

In [None]:
# 8. Helper function to recommend hotels based on similarity of embeddings refined by trained Keras model
def recommend_hotels(user_prompt, location, top_n=5):
    user_embedding = get_bert_embeddings(user_prompt)
    intermediate_layer_model = tf.keras.Model(
        inputs=model.input,
        outputs=model.layers[-2].output
    )
    user_refined_embedding = intermediate_layer_model(user_embedding).numpy()

    filtered_df = df[df['Lokasi'].str.lower() == location.lower()]
    filtered_embeddings = []
    filtered_names = []
    for idx, row in filtered_df.iterrows():
        emb = hotel_desc_embeddings[idx:idx+1]
        refined_emb = intermediate_layer_model(emb).numpy()
        filtered_embeddings.append(refined_emb[0])
        filtered_names.append(row['Name'])

    filtered_embeddings = np.array(filtered_embeddings)
    user_vec = user_refined_embedding

    sims = cosine_similarity(user_vec, filtered_embeddings)[0]
    top_indices = sims.argsort()[::-1][:top_n]
    recommendations = [(filtered_names[i], sims[i]) for i in top_indices]

    return recommendations

In [None]:
# 9. Example usage: Getting recommendations after tuning and training
if __name__ == "__main__":
    user_prompt = "makanan dengan harga terjangkau"
    location = "Kota Malang"

    print(f"Rekomendasi makanan di {location} untuk prompt '{user_prompt}':")
    recs = recommend_hotels(user_prompt, location)
    for name, score in recs:
        print(f"- {name}: {score:.4f}")

Rekomendasi makanan di Kota Malang untuk prompt 'makanan dengan harga terjangkau':
- Depot Gang Djangkrik Kawi Atas Malang: 0.8672
- Ayam Goreng Tenes: 0.8073
- Rumah Makan Kertanegara: 0.7753
- Gang Djangkrik Restaurant: 0.7441
- signora pasta malang: 0.7438


In [None]:
# Simpan model dalam format SavedModel
model.export("restoran_model_tfjs")

Saved artifact at 'restoran_model_tfjs'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 768), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 67), dtype=tf.float32, name=None)
Captures:
  137028470830480: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137028470832016: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137028470831824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137028461216400: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137028461217744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137028461218896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137028461218512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137028461220048: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [None]:
!pip install tensorflowjs



In [None]:
!tensorflowjs_converter --input_format=tf_saved_model \
    --output_format=tfjs_graph_model \
    restoran_model_tfjs restoran_model_tfjs_web

2025-06-12 13:38:40.579542: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749735520.629629   22226 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749735520.637406   22226 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[32m🌲 Try [0m[34mhttps://ydf.readthedocs.io[0m[32m, the successor of TensorFlow Decision Forests with more features and faster training![0m
2025-06-12 13:38:46.759910: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
I0000 00:00:1749735526.881408   22226 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capabi

In [None]:
!zip -r /content/restoran_model_tfjs.zip /content/restoran_model_tfjs

  adding: content/restoran_model_tfjs/ (stored 0%)
  adding: content/restoran_model_tfjs/fingerprint.pb (stored 0%)
  adding: content/restoran_model_tfjs/variables/ (stored 0%)
  adding: content/restoran_model_tfjs/variables/variables.index (deflated 62%)
  adding: content/restoran_model_tfjs/variables/variables.data-00000-of-00001 (deflated 8%)
  adding: content/restoran_model_tfjs/saved_model.pb (deflated 85%)
  adding: content/restoran_model_tfjs/assets/ (stored 0%)
