<a href="https://colab.research.google.com/github/imammarzuki/Eksperimen/blob/main/HybridNCF_CBF_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from tensorflow.keras.optimizers import Adam


In [2]:
data = {
    'user_id': [1, 1, 2, 2, 3, 3, 4, 4],
    'item_id': [101, 102, 101, 103, 102, 104, 101, 104],
    'description': [
        'nature beach sunset',
        'historic culture museum',
        'nature mountains hiking',
        'urban shopping nightlife',
        'historic culture museum',
        'nature beach sunset',
        'urban shopping nightlife',
        'nature mountains hiking'
    ],
    'rating1': [3, 2, 4, 3, 1, 5, 5, 4],
    'rating2': [2, 4, 3, 5, 3, 4, 4, 2]
}

df = pd.DataFrame(data)
# Menggabungkan rating dengan rata-rata
df['rating'] = df[['rating1', 'rating2']].mean(axis=1)

# TF-IDF Vectorizer
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['description']).toarray()

# Encoding user_id and item_id
user_ids = df['user_id'].unique().tolist()
user_id_mapping = {x: i for i, x in enumerate(user_ids)}
item_ids = df['item_id'].unique().tolist()
item_id_mapping = {x: i for i, x in enumerate(item_ids)}

df['user_id'] = df['user_id'].map(user_id_mapping)
df['item_id'] = df['item_id'].map(item_id_mapping)

# Train test split
X = df[['user_id', 'item_id']]
y = df['rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
X_train_tfidf = tfidf_matrix[X_train.index]
X_test_tfidf = tfidf_matrix[X_test.index]


In [3]:
# Input layers
user_input = Input(shape=(1,))
item_input = Input(shape=(1,))
tfidf_input = Input(shape=(tfidf_matrix.shape[1],))

# Embedding layers
user_embedding = Embedding(input_dim=len(user_id_mapping), output_dim=8)(user_input)
item_embedding = Embedding(input_dim=len(item_id_mapping), output_dim=8)(item_input)

# Flatten the embeddings
user_vec = Flatten()(user_embedding)
item_vec = Flatten()(item_embedding)

# Concatenate features
concat = Concatenate()([user_vec, item_vec, tfidf_input])

# Neural network for deep features
dense = Dense(128, activation='relu')(concat)
dropout = Dropout(0.5)(dense)
output = Dense(1)(dropout)

model = Model(inputs=[user_input, item_input, tfidf_input], outputs=output)
model.compile(optimizer=Adam(0.01), loss='mean_squared_error')

# Model summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 embedding (Embedding)       (None, 1, 8)                 32        ['input_1[0][0]']             
                                                                                                  
 embedding_1 (Embedding)     (None, 1, 8)                 32        ['input_2[0][0]']             
                                                                                              

In [4]:
model.fit([X_train.user_id, X_train.item_id, X_train_tfidf], y_train, epochs=20, batch_size=2, verbose=1)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7ef03ae8d720>

In [5]:
# Predictions
predictions = model.predict([X_test.user_id, X_test.item_id, X_test_tfidf]).flatten()

# Metrics
mae = mean_absolute_error(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
predicted_classes = np.round(predictions)
cm = confusion_matrix(y_test.astype('int'), predicted_classes.astype('int'), labels=[1, 2, 3, 4, 5])

print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print("Confusion Matrix:\n", cm)

MAE: 1.8516911268234253
RMSE: 1.9680307761431184
Confusion Matrix:
 [[0 0 0 0 0]
 [0 0 0 0 0]
 [0 1 0 0 0]
 [0 1 0 0 0]
 [0 0 0 0 0]]
