# Setup and Initialization

## Import Library

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from collections import defaultdict
import numpy as np
import numpy.ma as ma
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import plotly.graph_objects as go
from numpy import genfromtxt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import colors as mcolors
from scipy.stats import linregress
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from yellowbrick.cluster import KElbowVisualizer, SilhouetteVisualizer
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.cluster import KMeans
from tabulate import tabulate
from collections import Counter

%matplotlib inline
pd.set_option("display.precision", 1)

## Loading Dataset

In [None]:
url = "https://raw.githubusercontent.com/davanoraffi/CapstoneBangkit/main/Dataset/datasetfix.csv"
df = pd.read_csv(url,encoding="ISO-8859-1")

In [None]:
df.head()

Unnamed: 0,id,nama,age,gender,harga_beli,kind_of_food,restaurant,friends,jumlah_klik_produk_tertentu,review,lokasi_resto
0,1,User1,39,Male,66.0,Beverage,JCo,313,1,"Excellent taste, worth every penny!",Jogja
1,2,User2,40,Male,44.2,Healthy,Burger King,483,2,"Excellent taste, worth every penny!",Jakarta
2,3,User3,24,Female,8.9,Dessert,Burger King,172,17,The food was bland and tasteless.,Malang
3,4,User4,37,Male,86.2,Beverage,Starbucks,17,17,The food was bland and tasteless.,Bandung
4,5,User5,61,Male,18.6,Beverage,McDonald's,431,5,Very tasty and well-prepared.,Malang


In [None]:
dataset = df.dropna()

# Step 1: Define positive and negative keywords
positive_keywords = ['excellent', 'tasty', 'delicious', 'worth', 'well-prepared', 'fresh', 'great', 'amazing', 'highly']
negative_keywords = ['bland', 'tasteless', 'poor', 'terrible', 'awful', 'disappointing', 'unimpressed', 'not']

# Step 2: Define a function to categorize the review
def categorize_review(review):
    review = review.lower()
    positive_count = sum([review.count(word) for word in positive_keywords])
    negative_count = sum([review.count(word) for word in negative_keywords])
    if positive_count > negative_count:
        return 'Positive'
    elif negative_count > positive_count:
        return 'Negative'
    else:
        return 'Neutral'

# Step 3: Apply the function to the dataset
dataset['review_category'] = dataset['review'].apply(categorize_review)

# Display the first few rows with the new category column
dataset[['review', 'review_category']].head(10)

Unnamed: 0,review,review_category
0,"Excellent taste, worth every penny!",Positive
1,"Excellent taste, worth every penny!",Positive
2,The food was bland and tasteless.,Negative
3,The food was bland and tasteless.,Negative
4,Very tasty and well-prepared.,Positive
5,"Poor quality, not worth the money.",Negative
6,"Excellent taste, worth every penny!",Positive
7,The food was delicious and fresh!,Positive
8,"Terrible experience, food was awful.",Negative
9,"Unimpressed with the taste, won't return.",Negative


In [None]:
# Drop specified columns in place
dataset.drop(columns=['review'], inplace=True)
dataset.drop(columns=['friends'], inplace=True)

# Display the columns after dropping
print("Columns after dropping:", dataset.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   id                           10000 non-null  int64  
 1   nama                         10000 non-null  object 
 2   age                          10000 non-null  int64  
 3   gender                       10000 non-null  object 
 4   harga_beli                   10000 non-null  float64
 5   kind_of_food                 10000 non-null  object 
 6   restaurant                   10000 non-null  object 
 7   jumlah_klik_produk_tertentu  10000 non-null  int64  
 8   lokasi_resto                 10000 non-null  object 
 9   review_category              10000 non-null  object 
dtypes: float64(1), int64(3), object(6)
memory usage: 781.4+ KB
Columns after dropping: None


# Modeling Data

### Versi 2 (GPT)

In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

# Encode categorical variables
label_encoders = {}
for column in ['gender', 'restaurant', 'kind_of_food',
               'review_category', 'lokasi_resto']:
    le = LabelEncoder()
    dataset[column + '_encoded'] = le.fit_transform(dataset[column])
    label_encoders[column] = le

# Standardize numerical variables
num_label_encoders = {}
for column in ['harga_beli', 'age', 'jumlah_klik_produk_tertentu']:
    scaler = StandardScaler()
    dataset[column + '_num_encoded'] = scaler.fit_transform(dataset[[column]])
    num_label_encoders[column] = scaler

# Features and target
user_features = ['age_num_encoded', 'gender_encoded']
item_features = ['harga_beli_num_encoded', 'kind_of_food_encoded', 'restaurant_encoded', 'jumlah_klik_produk_tertentu_num_encoded', 'lokasi_resto_encoded', 'review_category_encoded']

X_user = dataset[user_features].values
X_item = dataset[item_features].values
y = dataset['jumlah_klik_produk_tertentu_num_encoded'].values

# Save original indices
dataset['index'] = dataset.index

# Split the data, including indices
X_user_train, X_user_test, X_item_train, X_item_test, y_train, y_test, index_train, index_test = train_test_split(
    X_user, X_item, y, dataset['index'], test_size=0.2, random_state=42)

In [None]:
dataset

Unnamed: 0,id,nama,age,gender,harga_beli,kind_of_food,restaurant,jumlah_klik_produk_tertentu,lokasi_resto,review_category,gender_encoded,jumlah_klik_produk_tertentu_encoded,restaurant_encoded,kind_of_food_encoded,review_category_encoded,lokasi_resto_encoded,harga_beli_num_encoded,age_num_encoded,jumlah_klik_produk_tertentu_num_encoded,index
0,1,User1,39,Male,66.0,Beverage,JCo,1,Jogja,Positive,1,1,1,0,1,2,0.5,-0.3,-1.5e+00,0
1,2,User2,40,Male,44.2,Healthy,Burger King,2,Jakarta,Positive,1,2,0,3,1,1,-0.3,-0.2,-1.3e+00,1
2,3,User3,24,Female,8.9,Dessert,Burger King,17,Malang,Negative,0,17,0,1,0,3,-1.6,-1.3,1.3e+00,2
3,4,User4,37,Male,86.2,Beverage,Starbucks,17,Bandung,Negative,1,17,5,0,0,0,1.2,-0.4,1.3e+00,3
4,5,User5,61,Male,18.6,Beverage,McDonald's,5,Malang,Positive,1,5,3,0,1,3,-1.2,1.2,-7.8e-01,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,User9996,34,Female,41.8,Fast Food,JCo,9,Bandung,Positive,0,9,1,2,1,0,-0.4,-0.6,-9.3e-02,9995
9996,9997,User9997,56,Male,31.9,Fast Food,KFC,18,Surabaya,Positive,1,18,2,2,1,4,-0.7,0.8,1.5e+00,9996
9997,9998,User9998,58,Male,22.4,Dessert,KFC,17,Jakarta,Positive,1,17,2,1,1,1,-1.1,1.0,1.3e+00,9997
9998,9999,User9999,64,Female,93.8,Fast Food,Starbucks,2,Jakarta,Positive,0,2,5,2,1,1,1.5,1.4,-1.3e+00,9998


In [None]:
# Normalize the features
scaler_user = StandardScaler()
scaler_item = StandardScaler()
scaler_target = StandardScaler()  # Untuk menormalisasi target

X_user_train = scaler_user.fit_transform(X_user_train)
X_item_train = scaler_item.fit_transform(X_item_train)
X_user_test = scaler_user.transform(X_user_test)
X_item_test = scaler_item.transform(X_item_test)
y_train = scaler_target.fit_transform(y_train.reshape(-1, 1))
y_test = scaler_target.transform(y_test.reshape(-1, 1))

# Set configuration variables
num_user_features = X_user_train.shape[1]
num_item_features = X_item_train.shape[1]

In [None]:
num_outputs = 32
tf.random.set_seed(1)

user_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear'),
])

item_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear'),
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features,))
vu = user_NN(input_user)
vu = tf.keras.layers.LayerNormalization(axis=1)(vu)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_item_features,))
vm = item_NN(input_item)
vm = tf.keras.layers.LayerNormalization(axis=1)(vm)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = Model([input_user, input_item], output)

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 2)]                  0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 6)]                  0         []                            
                                                                                                  
 sequential (Sequential)     (None, 32)                   37792     ['input_1[0][0]']             
                                                                                                  
 sequential_1 (Sequential)   (None, 32)                   38816     ['input_2[0][0]']             
                                                                                              

In [None]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

In [None]:
history = model.fit([X_user_train, X_item_train], y_train, epochs=10, batch_size=32, validation_data=([X_user_test, X_item_test], y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
import numpy as np
import pandas as pd

# Evaluate the model
results = model.evaluate([X_user_test, X_item_test], y_test)
print(f'Evaluation Results: {results}')

# Recommendation function with inverse transform
def recommend(user_features, item_features, model, scaler_target, top_n=10):
    predictions = model.predict([user_features, item_features])
    predictions_unscaled = scaler_target.inverse_transform(predictions.reshape(-1, 1)).flatten()
    top_indices = predictions_unscaled.argsort()[-top_n:][::-1]
    return top_indices

# Example usage for recommendations:
user_example = X_user_test[1].reshape(1, -1)
user_example_repeated = np.repeat(user_example, X_item_test.shape[0], axis=0)

recommended_indices = recommend(user_example_repeated, X_item_test, model, scaler_target)

# Specify columns to display
columns_to_display = ["harga_beli", "kind_of_food", "restaurant", "jumlah_klik_produk_tertentu"]

# Get the recommended items
recommended_items = dataset.iloc[recommended_indices][columns_to_display]
print("Recommended Items:")
print(recommended_items)

Evaluation Results: 0.0010878327302634716
Recommended Items:
      harga_beli kind_of_food   restaurant  jumlah_klik_produk_tertentu
1877        28.4      Healthy   McDonald's                           17
1583        87.1    Fast Food          KFC                           15
621         92.2    Fast Food  Burger King                           14
1816        81.7    Fast Food  Burger King                            3
508         49.6      Dessert    Starbucks                           17
1853        59.3      Dessert  Burger King                            4
747         94.5      Dessert          KFC                            1
507         77.8      Healthy   McDonald's                           17
313         61.0      Dessert  Burger King                           16
1453        45.8    Fast Food          JCo                           14


In [None]:
# Make predictions for the entire test set
y_p = model.predict([X_user_test, X_item_test])

# Unscale y prediction
y_pu = scaler_target.inverse_transform(y_p.reshape(-1, 1))

# Add predictions to the dataset as a new column using the original indices
dataset.loc[index_test, 'predicted_jumlah_klik_produk_tertentu'] = y_pu.flatten()

# Sort the dataset by the new predictions column in descending order
sorted_dataset = dataset.sort_values(by='predicted_jumlah_klik_produk_tertentu', ascending=False)

# Display the sorted dataset with the new predictions column
sorted_dataset.head(10)



Unnamed: 0,id,nama,age,gender,harga_beli,kind_of_food,restaurant,jumlah_klik_produk_tertentu,lokasi_resto,review_category,...,jumlah_klik_produk_tertentu_encoded,restaurant_encoded,kind_of_food_encoded,review_category_encoded,lokasi_resto_encoded,harga_beli_num_encoded,age_num_encoded,jumlah_klik_produk_tertentu_num_encoded,index,predicted_jumlah_klik_produk_tertentu
6595,6596,User6596,26,Male,46.6,Fast Food,KFC,19,Malang,Positive,...,19,2,2,1,3,-0.2,-1.2,1.6,6595,1.7
9966,9967,User9967,18,Male,10.1,Dessert,KFC,19,Malang,Positive,...,19,2,1,1,3,-1.5,-1.7,1.6,9966,1.7
4011,4012,User4012,33,Male,64.5,Dessert,Burger King,19,Jakarta,Positive,...,19,0,1,1,1,0.4,-0.7,1.6,4011,1.7
1937,1938,User1938,26,Male,49.3,Fast Food,KFC,19,Jakarta,Positive,...,19,2,2,1,1,-0.1,-1.2,1.6,1937,1.7
3846,3847,User3847,35,Male,23.9,Dessert,JCo,19,Malang,Positive,...,19,1,1,1,3,-1.0,-0.6,1.6,3846,1.7
4822,4823,User4823,28,Female,75.0,Fast Food,JCo,19,Jakarta,Negative,...,19,1,2,0,1,0.8,-1.0,1.6,4822,1.7
2183,2184,User2184,60,Male,43.8,Fast Food,JCo,19,Jakarta,Positive,...,19,1,2,1,1,-0.3,1.1,1.6,2183,1.7
135,136,User136,61,Male,44.9,Healthy,KFC,19,Jakarta,Positive,...,19,2,3,1,1,-0.3,1.2,1.6,135,1.7
7146,7147,User7147,35,Male,30.0,Dessert,McDonald's,19,Bandung,Negative,...,19,3,1,0,0,-0.8,-0.6,1.6,7146,1.7
2991,2992,User2992,60,Male,59.3,Fast Food,Burger King,19,Jakarta,Positive,...,19,0,2,1,1,0.3,1.1,1.6,2991,1.7


In [None]:
import numpy as np

# Step 1: Definisikan data pengguna baru dengan semua variabel
new_user_data = {
    'age': 30,  # Umur
    'gender': 'Male',  # Gender
    'harga_beli': 50.0,  # Harga beli
    'kind_of_food': 'Beverage',  # Jenis makanan
    'restaurant': 'Starbucks',  # Restoran
    'jumlah_klik_produk_tertentu': 10,  # Jumlah klik produk tertentu
    'review': 'Excellent taste, worth every penny!',  # Review
    'lokasi_resto': 'Jakarta'  # Lokasi restoran
}
# new_user_data.rename(columns={'jumlah_klik_produk_tertentu': 'jumlah_klik_produk_tertentu'}, inplace=True)
new_user_data['review_category'] = categorize_review(new_user_data['review'])

# Step 2: Normalisasi dan encode fitur-fitur pengguna baru
# Normalisasi umur dan harga beli menggunakan scaler yang sudah dilatih
new_user_age_normalized = num_label_encoders['age'].transform([[new_user_data['age']]])
new_user_harga_beli_normalized = num_label_encoders['harga_beli'].transform([[new_user_data['harga_beli']]])
new_user_jumlah_klik_normalized = num_label_encoders['jumlah_klik_produk_tertentu'].transform([[new_user_data['jumlah_klik_produk_tertentu']]])

# Encode gender, jenis makanan, restoran, review, dan lokasi restoran menggunakan label encoder yang sudah dilatih
new_user_gender_encoded = label_encoders['gender'].transform([new_user_data['gender']])
new_user_kind_of_food_encoded = label_encoders['kind_of_food'].transform([new_user_data['kind_of_food']])
new_user_restaurant_encoded = label_encoders['restaurant'].transform([new_user_data['restaurant']])
new_user_review_encoded = label_encoders['review_category'].transform([new_user_data['review_category']])
new_user_lokasi_resto_encoded = label_encoders['lokasi_resto'].transform([new_user_data['lokasi_resto']])

# Gabungkan fitur-fitur pengguna baru menjadi satu array untuk pengguna dan item
new_user_features = np.array([new_user_age_normalized[0][0], new_user_gender_encoded[0]]).reshape(1, -1)
new_item_features = np.array([new_user_harga_beli_normalized[0][0], new_user_kind_of_food_encoded[0], new_user_restaurant_encoded[0], new_user_jumlah_klik_normalized[0][0], new_user_lokasi_resto_encoded[0]]).reshape(1, -1)

# Step 3: Ulangi data pengguna baru untuk setiap item dalam data uji
new_user_repeated = np.repeat(new_user_features, X_item_test.shape[0], axis=0)
new_item_repeated = np.tile(new_item_features, (X_user_test.shape[0], 1))

# Step 4: Gunakan fungsi recommend untuk mendapatkan rekomendasi
recommended_indices = recommend(new_user_repeated, X_item_test, model, scaler_target)

# Step 5: Tampilkan item yang direkomendasikan
columns_to_display = ["harga_beli", "kind_of_food", "restaurant", "jumlah_klik_produk_tertentu"]
recommended_items = dataset.iloc[recommended_indices][columns_to_display]

print("Recommended Items:")
print(recommended_items)

Recommended Items:
      harga_beli kind_of_food   restaurant  jumlah_klik_produk_tertentu
747         94.5      Dessert          KFC                            1
1853        59.3      Dessert  Burger King                            4
313         61.0      Dessert  Burger King                           16
507         77.8      Healthy   McDonald's                           17
1453        45.8    Fast Food          JCo                           14
1583        87.1    Fast Food          KFC                           15
1877        28.4      Healthy   McDonald's                           17
1303        86.6      Healthy    Mi Gacoan                           19
666         88.5    Fast Food    Starbucks                           14
309         44.9    Fast Food  Burger King                           17


# Save The Model

In [None]:
! pip show keras

Name: keras
Version: 2.15.0
Summary: Deep learning for humans.
Home-page: https://keras.io/
Author: Keras team
Author-email: keras-users@googlegroups.com
License: Apache 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: 
Required-by: tensorflow


In [None]:
model.save('ML_Model.h5')

In [None]:
!pip install tensorflowjs

Collecting tensorflowjs
  Downloading tensorflowjs-4.20.0-py3-none-any.whl (89 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/89.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.1/89.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-decision-forests>=1.5.0 (from tensorflowjs)
  Downloading tensorflow_decision_forests-1.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.5/15.5 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
Collecting packaging~=23.1 (from tensorflowjs)
  Downloading packaging-23.2-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow<3,>=2.13.0 (from tensorflowjs)
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2

In [None]:
!tensorflowjs_converter --input_format=keras /content/ML_Model.h5 /content/my_tfjs_model

