<a href="https://colab.research.google.com/github/imammarzuki/Eksperimen/blob/main/HybridCF_CBF_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_absolute_error, mean_squared_error, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

In [12]:
data = {
    'user_id': [1, 1, 2, 2, 3, 3, 4, 4],
    'item_id': [101, 102, 101, 103, 102, 104, 101, 104],
    'description': [
        'nature beach sunset',
        'historic culture museum',
        'nature mountains hiking',
        'urban shopping nightlife',
        'historic culture museum',
        'nature beach sunset',
        'urban shopping nightlife',
        'nature mountains hiking'
    ],
    'rating1': [3, 2, 4, 3, 1, 5, 5, 4],
    'rating2': [2, 4, 3, 5, 3, 4, 4, 2]
}

df = pd.DataFrame(data)
# Menggabungkan beberapa rating menjadi satu dengan rata-rata (sederhana)
df['rating'] = df[['rating1', 'rating2']].mean(axis=1)


In [13]:
# Menghitung TF-IDF dari deskripsi
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description']).toarray()

# Membuat ID yang unik untuk user dan item
user_ids = df['user_id'].unique().tolist()
user_id_mapping = {x: i for i, x in enumerate(user_ids)}
item_ids = df['item_id'].unique().tolist()
item_id_mapping = {x: i for i, x in enumerate(item_ids)}

# Membuat kolom baru untuk user dan item ID yang di-mapped
df['user_id'] = df['user_id'].map(user_id_mapping)
df['item_id'] = df['item_id'].map(item_id_mapping)

# Splitting the data
X = df[['user_id', 'item_id']]
y = df['rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
X_train_tfidf = tfidf_matrix[X_train.index]
X_test_tfidf = tfidf_matrix[X_test.index]


In [14]:
# Input layers
user_input = Input(shape=(1,))
item_input = Input(shape=(1,))
tfidf_input = Input(shape=(tfidf_matrix.shape[1],))

# Embeddings layers
user_embedding = Embedding(output_dim=5, input_dim=len(user_id_mapping), input_length=1)(user_input)
item_embedding = Embedding(output_dim=5, input_dim=len(item_id_mapping), input_length=1)(item_input)
user_vec = Flatten()(user_embedding)
item_vec = Flatten()(item_embedding)

# Concatenate all features
concat = Concatenate()([user_vec, item_vec, tfidf_input])

# Neural network
dense = Dense(128, activation='relu')(concat)
dropout = Dropout(0.5)(dense)
output = Dense(1)(dropout)

model = Model(inputs=[user_input, item_input, tfidf_input], outputs=output)
model.compile(optimizer=Adam(0.001), loss='mean_squared_error')

# Model summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 embedding (Embedding)       (None, 1, 5)                 20        ['input_1[0][0]']             
                                                                                                  
 embedding_1 (Embedding)     (None, 1, 5)                 20        ['input_2[0][0]']             
                                                                                              

In [15]:
model.fit([X_train.user_id, X_train.item_id, X_train_tfidf], y_train,
          epochs=10, batch_size=4, verbose=1,
          validation_data=([X_test.user_id, X_test.item_id, X_test_tfidf], y_test))

# Predictions
predictions = model.predict([X_test.user_id, X_test.item_id, X_test_tfidf]).flatten()

# Evaluation metrics
mae = mean_absolute_error(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
cm = confusion_matrix(y_test.astype('int'), predictions.round().astype('int'))

print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print("Confusion Matrix:\n", cm)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
MAE: 3.325044631958008
RMSE: 3.3988572642044126
Confusion Matrix:
 [[0 0 0]
 [1 0 0]
 [1 0 0]]
