In [18]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.optimizers import Adam

# Load datasets
courses_df = pd.read_excel('dr01_courses_cleaned.xlsx')
ratings_df = pd.read_csv('ratings.csv')
ratings_df = ratings_df.rename(columns={'respondent_identifier': 'userId'})

In [19]:
# Ubah id jadi string
ratings_df['course_id'] = ratings_df['course_id'].astype(str)

# Preprocessing
# Encode
user_encoder = LabelEncoder()
course_encoder = LabelEncoder()

ratings_df['userId'] = user_encoder.fit_transform(ratings_df['userId'])
ratings_df['course_id'] = course_encoder.fit_transform(ratings_df['course_id'])

num_users = ratings_df['userId'].nunique()
num_courses = ratings_df['course_id'].nunique()

In [20]:
# Split data
X = ratings_df[['userId', 'course_id']]
y = ratings_df['rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build model
def build_model(num_users, num_courses, embedding_size=50):
    user_input = Input(shape=(1,), name='user_input')
    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size, name='user_embedding')(user_input)
    user_vec = Flatten(name='flatten_users')(user_embedding)

    course_input = Input(shape=(1,), name='course_input')
    course_embedding = Embedding(input_dim=num_courses, output_dim=embedding_size, name='course_embedding')(course_input)
    course_vec = Flatten(name='flatten_courses')(course_embedding)

    concat = Concatenate()([user_vec, course_vec])

    dense = Dense(128, activation='relu')(concat)
    dense = Dense(64, activation='relu')(dense)
    output = Dense(1)(dense)

    model = Model([user_input, course_input], output)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

    return model

model = build_model(num_users, num_courses)
model.summary()

Model: "model_13"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 course_input (InputLayer)   [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 50)                6814250   ['user_input[0][0]']          
                                                                                                  
 course_embedding (Embeddin  (None, 1, 50)                3400      ['course_input[0][0]']        
 g)                                                                                        

In [21]:
# Train model
history = model.fit([X_train['userId'], X_train['course_id']], y_train, epochs=1, batch_size=32, validation_split=0.2)

# Evaluate
loss = model.evaluate([X_test['userId'], X_test['course_id']], y_test)
print(f'Test Loss: {loss}')

Test Loss: 0.1332286149263382


In [22]:
# Prediksi rating
def predict_rating(user_id, course_id):
    if user_id not in user_encoder.classes_:
        raise ValueError(f"User ID {user_id} is not recognized.")
    if course_id not in course_encoder.classes_:
        raise ValueError(f"Course ID {course_id} is not recognized.")

    user_encoded = user_encoder.transform([user_id])[0]
    course_encoded = course_encoder.transform([course_id])[0]
    rating = model.predict([np.array([user_encoded]), np.array([course_encoded])])
    return rating[0][0]

# Handle user baru
def handle_cold_start():
    # Ensure ratings_df contains numeric ratings
    ratings_df['rating'] = pd.to_numeric(ratings_df['rating'], errors='coerce')

    # Group by course_id and calculate mean rating
    average_ratings = ratings_df[['course_id', 'rating']].groupby('course_id').mean()

    # Get top 10 courses by average rating
    top_courses = average_ratings.sort_values(by='rating', ascending=False).head(10).index.tolist()
    top_course_names = courses_df[courses_df['id'].astype(str).isin(course_encoder.inverse_transform(top_courses))]['name']

    return top_course_names

# Rekomen course untuk user
def recommend_courses(user_id):

    user_encoded = user_encoder.transform([user_id])[0]
    all_course_ids = np.arange(num_courses)

    user_course_pairs = np.array([[user_encoded, course_id] for course_id in all_course_ids])
    predictions = model.predict([user_course_pairs[:, 0], user_course_pairs[:, 1]])
    recommended_course_ids = [course_id for _, course_id in sorted(zip(predictions, all_course_ids), reverse=True)[:10]]

    return courses_df[courses_df['id'].astype(str).isin(course_encoder.inverse_transform(recommended_course_ids))]['name']

In [23]:
# Run
user_id = '1455298'  # Ganti untuk coba user lain
recommended_courses = recommend_courses(user_id)
print(recommended_courses)

25       Belajar Membuat Aplikasi Back-End untuk Pemula
27                Belajar Fundamental Aplikasi Back-End
29             Meniti Karier sebagai Software Developer
30    Pengenalan ke Logika Pemrograman (Programming ...
33                             Machine Learning Terapan
34                       Menjadi Google Cloud Architect
38                           Belajar Dasar-Dasar DevOps
41        Belajar Fundamental Aplikasi Web dengan React
Name: name, dtype: object
