<a href="https://colab.research.google.com/github/andrewputrahartanto/Capstone_Data/blob/main/Machine%20Learning/Notebook/model_CBFfix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Memuat data CSV
data = pd.read_csv('https://raw.githubusercontent.com/PenditWiguna/Capstone/main/Machine%20Learning/Dataset/Dataset%20-%20tourismBali.csv')

# Memisahkan data
df = data[['Place_Id', 'Description', 'Category']]

# Encode kategori
category_encoder = LabelEncoder()
df['Category_Encoded'] = category_encoder.fit_transform(df['Category'])

# Tokenisasi dan padding deskripsi
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Description'])
sequences = tokenizer.texts_to_sequences(df['Description'])
padded_sequences = pad_sequences(sequences, padding='post')

# Tentukan panjang maksimal sequence
max_sequence_length = padded_sequences.shape[1]

# Data input untuk model
X_category = df['Category_Encoded'].values
X_description = padded_sequences


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Category_Encoded'] = category_encoder.fit_transform(df['Category'])


In [None]:
set(df['Category'])

{'Agrowisata',
 'Alam',
 'Belanja',
 'Budaya',
 'Cagar Alam',
 'Pantai',
 'Rekreasi',
 'Religius'}

In [None]:
# Tentukan ukuran embedding
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
category_count = len(df['Category_Encoded'].unique())

# Input kategori
category_input = tf.keras.layers.Input(shape=(1,), name='category_input')
category_embedding = tf.keras.layers.Embedding(input_dim=category_count, output_dim=embedding_dim, name='category_embedding')(category_input)
category_flatten = tf.keras.layers.Flatten()(category_embedding)

# Input deskripsi
description_input = tf.keras.layers.Input(shape=(max_sequence_length,), name='description_input')
description_embedding = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, name='description_embedding')(description_input)
description_flatten = tf.keras.layers.GlobalAveragePooling1D()(description_embedding)

# Gabungkan embedding
concatenated = tf.keras.layers.Concatenate()([category_flatten, description_flatten])
output = tf.keras.layers.Dense(embedding_dim, activation='relu')(concatenated)

# Model
model = tf.keras.Model(inputs=[category_input, description_input], outputs=output)
model.compile(optimizer='adam', loss='mse')

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 category_input (InputLayer  [(None, 1)]                  0         []                            
 )                                                                                                
                                                                                                  
 description_input (InputLa  [(None, 65)]                 0         []                            
 yer)                                                                                             
                                                                                                  
 category_embedding (Embedd  (None, 1, 50)                400       ['category_input[0][0]']      
 ing)                                                                                         

In [None]:
# Melatih model dengan menggunakan input yang sama sebagai target (autoencoder)
model.fit([X_category, X_description], model.predict([X_category, X_description]), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7ab93190ac50>

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Mendapatkan embedding untuk semua tempat wisata
embeddings = model.predict([X_category, X_description])

# Fungsi untuk memberikan rekomendasi
def recommend(place_id, embeddings, top_k=5):
    place_idx = df.index[df['Place_Id'] == place_id].tolist()[0]
    place_embedding = embeddings[place_idx]
    similarities = cosine_similarity([place_embedding], embeddings)[0]
    similar_indices = similarities.argsort()[::-1][1:top_k+1]
    similar_places = df.iloc[similar_indices]['Place_Id'].values
    return similar_places



In [None]:
# Memuat data CSV untuk mapping prediction
data = pd.read_csv('https://raw.githubusercontent.com/PenditWiguna/Capstone/main/Machine%20Learning/Dataset/Dataset%20-%20tourismBali.csv')
df_convert = data[['Place_Id', 'Place_Name']]

df2 = df_convert.set_index('Place_Id').to_dict()['Place_Name']

In [None]:
# Contoh rekomendasi untuk tempat wisata dengan Place_Id
place_id_predict = 23 # Goa Gajah (Religius)
recommendations = recommend(place_id_predict, embeddings)
place_record = []
for i in recommendations:
  place_record.append(df2[i])

print(f"Rekomendasi untuk {df2[place_id_predict]}: {place_record}")

Rekomendasi untuk Goa Gajah: ['Pura Puseh Batuan', 'Pura Saraswati', 'Pura Kehen', 'Tirta Gangga', 'Pura Taman Ayun']


#Model Saving

In [None]:
# Menyimpan model ke dalam file .h5
model.save('cbf_model.h5')

  saving_api.save_model(


In [None]:
# Konversi model ke format TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Simpan model ke file .tflite
with open('recommender_model.tflite', 'wb') as f:
    f.write(tflite_model)

In [None]:
# Simpan arsitektur model ke file JSON
model_json = model.to_json()
with open('model_architecture.json', 'w') as json_file:
    json_file.write(model_json)

# Simpan bobot model ke file HDF5
model.save_weights('model_weights.h5')

In [None]:
!pip install tensorflowjs

Collecting tensorflowjs
  Downloading tensorflowjs-4.20.0-py3-none-any.whl (89 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/89.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.1/89.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-decision-forests>=1.5.0 (from tensorflowjs)
  Downloading tensorflow_decision_forests-1.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.5/15.5 MB[0m [31m65.1 MB/s[0m eta [36m0:00:00[0m
Collecting packaging~=23.1 (from tensorflowjs)
  Downloading packaging-23.2-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow<3,>=2.13.0 (from tensorflowjs)
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2

In [None]:
import tensorflowjs as tfjs

# Simpan model Keras sebagai file SavedModel
model.export('saved_model/my_model')

# Konversi model SavedModel ke format TensorFlow.js
tfjs.converters.convert_tf_saved_model('saved_model/my_model', 'tfjs_model')

Saved artifact at 'saved_model/my_model'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 1), dtype=tf.float32, name='category_input'), TensorSpec(shape=(None, 65), dtype=tf.float32, name='description_input')]
Output Type:
  TensorSpec(shape=(None, 50), dtype=tf.float32, name=None)
Captures:
  132464496292640: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132464496288768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132464496592672: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132464496593200: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [None]:
import shutil
import os

# Path ke folder tfjs_model
tfjs_model_dir = 'tfjs_model'

# Path untuk menyimpan file zip
output_filename = 'tfjs_model.zip'

# Membuat file zip dari folder tfjs_model
shutil.make_archive(output_filename.replace('.zip', ''), 'zip', tfjs_model_dir)

print(f'Folder {tfjs_model_dir} telah berhasil disimpan sebagai {output_filename}')

Folder tfjs_model telah berhasil disimpan sebagai tfjs_model.zip


#Vatiable Saving

In [None]:
# Data input untuk model
X_category = df['Category_Encoded'].values
X_description = padded_sequences

# Gabungkan X_category dan X_description ke dalam satu DataFrame
X_data = np.hstack((X_category.reshape(-1, 1), X_description))

# Simpan ke file CSV
np.savetxt('X_data.csv', X_data, delimiter=',', fmt='%d')

In [None]:
import json

# Gabungkan ke dalam satu dictionary
data = {
    "X_category": X_category.tolist(),
    "X_description": X_description.tolist()
}

# Simpan ke dalam file JSON
with open('data.json', 'w') as f:
    json.dump(data, f)

print("Data telah disimpan ke data.json")

Data telah disimpan ke data.json


#Variable Testing

In [None]:
X_category

array([0, 2, 1, 1, 1, 1, 0, 4, 3, 6, 3, 0, 1, 0, 1, 1, 1, 3, 3, 3, 2, 3,
       7, 1, 1, 7, 7, 0, 5, 2, 1, 2, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, 2, 7, 7, 7, 7, 7, 7, 7, 5, 7, 7,
       2, 1, 3, 3, 0, 7, 6, 0, 0])

In [None]:
X_description

array([[375, 376,  60, ...,   0,   0,   0],
       [380,  30,  75, ...,   0,   0,   0],
       [219,  12, 220, ...,   0,   0,   0],
       ...,
       [966,   9,  13, ...,   0,   0,   0],
       [373, 374,   4, ...,   0,   0,   0],
       [ 49,  73,  11, ...,   0,   0,   0]], dtype=int32)