In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.optimizers import Adam

In [None]:
# Load the dataset
file_path = '/content/wisata eng.xlsx'
data = pd.read_excel(file_path)

In [None]:
# Display the first few rows of the dataset
data.head(5)

Unnamed: 0,Place_Id,Place_Name,Description,Categories,City,Price,Ratings,Time_Minutes,Coordinate,Lat,Long,Column1,_1,Rating_Count
0,1,Monumen Nasional,The National Monument or what is popularly kno...,Culture,Jakarta,20000,46,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-61753924,1068271528,,1,18
1,2,Kota Tua,"The old city in Jakarta, which is also called ...",Culture,Jakarta,0,46,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-61376448,1068171245,,2,25
2,3,Dunia Fantasi,Dunia Fantasi or also called Dufan is an enter...,amusement parks,Jakarta,270000,46,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-61253124,1068335377,,3,19
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah is a tourist park a...,amusement parks,Jakarta,10000,45,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-63024459,1068951559,,4,21
4,5,Atlantis Water Adventure,Atlantis Water Adventure or known as Atlantis ...,amusement parks,Jakarta,94000,45,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-612419,106839134,,5,24


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 437 entries, 0 to 436
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Place_Id      437 non-null    int64  
 1   Place_Name    437 non-null    object 
 2   Description   437 non-null    object 
 3   Categories    437 non-null    object 
 4   City          437 non-null    object 
 5   Price         437 non-null    int64  
 6   Ratings       437 non-null    int64  
 7   Time_Minutes  205 non-null    float64
 8   Coordinate    437 non-null    object 
 9   Lat           437 non-null    int64  
 10  Long          437 non-null    int64  
 11  Column1       0 non-null      float64
 12  _1            437 non-null    int64  
 13  Rating_Count  437 non-null    int64  
dtypes: float64(2), int64(7), object(5)
memory usage: 47.9+ KB


In [None]:
data.isna().sum()

Place_Id          0
Place_Name        0
Description       0
Categories        0
City              0
Price             0
Ratings           0
Time_Minutes    232
Coordinate        0
Lat               0
Long              0
Column1         437
_1                0
Rating_Count      0
dtype: int64

In [None]:
# Menghapus baris dengan missing value
data.dropna(subset=['Time_Minutes'], inplace=True)

In [None]:
data.describe()

Unnamed: 0,Place_Id,Price,Ratings,Time_Minutes,Lat,Long,Column1,_1,Rating_Count
count,205.0,205.0,205.0,205.0,205.0,205.0,0.0,205.0,205.0
mean,225.55122,22192.682927,42.946341,82.609756,-61605700.0,913714900.0,,225.55122,22.068293
std,131.261203,51002.049124,7.549967,52.872339,24073390.0,388613900.0,,131.261203,4.475995
min,1.0,0.0,4.0,10.0,-81759270.0,107617.0,,1.0,11.0
25%,117.0,0.0,43.0,45.0,-76078740.0,1068190000.0,,117.0,19.0
50%,225.0,5000.0,44.0,60.0,-69888810.0,1076875000.0,,225.0,22.0
75%,350.0,15000.0,46.0,120.0,-61951800.0,1104247000.0,,350.0,25.0
max,435.0,375000.0,49.0,360.0,-68963.0,1128217000.0,,435.0,37.0


In [None]:
#Menghapus kolom URLdata
data.drop('Column1', axis=1, inplace=True)

In [None]:
#Menghapus kolom URLdata
data.drop('Place_Id', axis=1, inplace=True)

In [None]:
data.head()

Unnamed: 0,Place_Name,Description,Categories,City,Price,Ratings,Time_Minutes,Coordinate,Lat,Long,_1,Rating_Count
0,Monumen Nasional,The National Monument or what is popularly kno...,Culture,Jakarta,20000,46,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-61753924,1068271528,1,18
1,Kota Tua,"The old city in Jakarta, which is also called ...",Culture,Jakarta,0,46,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-61376448,1068171245,2,25
2,Dunia Fantasi,Dunia Fantasi or also called Dufan is an enter...,amusement parks,Jakarta,270000,46,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-61253124,1068335377,3,19
4,Atlantis Water Adventure,Atlantis Water Adventure or known as Atlantis ...,amusement parks,Jakarta,94000,45,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-612419,106839134,5,24
5,Taman Impian Jaya Ancol,Taman Impian Jaya Ancol is a tourist attractio...,amusement parks,Jakarta,25000,45,10.0,"{'lat': -6.117333200000001, 'lng': 106.8579951}",-61173332,1068579951,6,24


In [None]:
# Menghapus baris yang memiliki categories []
data = data[data['Categories'].apply(lambda x: len(x) > 0)]

In [None]:
# Menambahkan kolom label berdasarkan genre
def label_Categories(Categories_list):
    if 'Culture' in Categories_list:
        return 0
    elif 'Amusement parks' in Categories_list:
        return 1
    elif 'Nature preserve' in Categories_list:
        return 2
    elif 'Nautical' in Categories_list:
        return 3
    elif 'Shopping center' in Categories_list:
        return 4
    elif 'Worship place' in Categories_list:
        return 5
    else:
        return None

data['Categories_Label'] = data['Categories'].apply(label_Categories)

# Menampilkan data dengan kolom label genre
print(data[['Categories', 'Categories_Label']])

          Categories  Categories_Label
0            Culture               0.0
1            Culture               0.0
2    amusement parks               NaN
4    amusement parks               NaN
5    amusement parks               NaN
..               ...               ...
428          Culture               0.0
429  amusement parks               NaN
431  amusement parks               NaN
432          Culture               0.0
434  amusement parks               NaN

[205 rows x 2 columns]


In [None]:
data.isna().sum()

Place_Name           0
Description          0
Categories           0
City                 0
Price                0
Ratings              0
Time_Minutes         0
Coordinate           0
Lat                  0
Long                 0
_1                   0
Rating_Count         0
Categories_Label    72
dtype: int64

In [None]:
data.dropna(subset=['Categories_Label'], inplace=True)

In [None]:
# Encode labels
le = LabelEncoder()
data['Categories_Label'] = le.fit_transform(data['Categories_Label'])

# Text vectorization (TF-IDF)
tfidf = TfidfVectorizer(max_features=5000)
X_description = tfidf.fit_transform(data['Description']).toarray()

# Combine features
X = np.hstack((X_description, data[['Ratings', 'Rating_Count']].values))
y = data['Categories_Label'].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = Sequential([
    Dense(512, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dense(len(le.classes_), activation='softmax')
])

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

Test Accuracy: 85.19%


In [None]:
model.save('my_model.h5')

  saving_api.save_model(
