In [8]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import joblib

# Memuat data
df = pd.read_csv("dataset_tempat_wisata_bali.csv")
df_clean = df.dropna(subset=['rating'])  # Menghapus baris dengan nilai kosong pada kolom 'rating'

# Fitur dan target
X = df_clean[['kategori', 'kabupaten_kota', 'rating']]  # Menggunakan fitur kategori, kabupaten_kota, dan rating
y = df_clean['preferensi']  # Target adalah preferensi wisata

# Encode label target dengan LabelEncoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Preprocessing pipeline untuk fitur kategorikal dan numerik
categorical_features = ['kategori', 'kabupaten_kota']
numeric_features = ['rating']

# ColumnTransformer untuk fitur kategorikal (OneHotEncoder) dan numerik (StandardScaler)
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', StandardScaler(), numeric_features)
    ]
)

# Membagi data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Preprocessing data menggunakan pipeline
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

# Bangun model Deep Learning dengan TensorFlow
model = models.Sequential([
    layers.InputLayer(input_shape=(X_train_preprocessed.shape[1],)),  # Sesuaikan dengan bentuk data input
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.Dense(4, activation='softmax')  # 4 kelas untuk preferensi wisata
])

# Kompilasi model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Karena target y_encoded adalah integer
              metrics=['accuracy'])

# Latih model dengan data train
history = model.fit(X_train_preprocessed, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluasi model dengan data test
test_loss, test_acc = model.evaluate(X_test_preprocessed, y_test)
print(f'Test accuracy: {test_acc}')

# Simpan model dan encoder
model.save('model_preferensi_tensorflow.h5')  # Menyimpan model TensorFlow
joblib.dump(label_encoder, 'label_encoder.pkl')  # Menyimpan label encoder untuk prediksi
joblib.dump(preprocessor, 'preprocessor.pkl')  # Menyimpan preprocessor untuk preprocessing input saat inferensi




Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.4475 - loss: 1.3252 - val_accuracy: 0.7167 - val_loss: 1.1356
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7368 - loss: 1.0499 - val_accuracy: 0.8083 - val_loss: 0.8540
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8500 - loss: 0.7380 - val_accuracy: 0.9167 - val_loss: 0.5497
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9457 - loss: 0.4422 - val_accuracy: 0.9750 - val_loss: 0.2975
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.9887 - loss: 0.2477 - val_accuracy: 1.0000 - val_loss: 0.1416
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 1.0000 - loss: 0.1144 - val_accuracy: 1.0000 - val_loss: 0.0651
Epoch 7/10
[1m15/15[0m [32m━━━━



Test accuracy: 1.0


['preprocessor.pkl']