In [1]:
!pip install -q tensorflow-recommenders


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/96.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h

## Import Library and Load Dataset

In [2]:
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs
from typing import Dict, Text
import numpy as np
df_sma = pd.read_csv('/content/dataset_sma.csv')
df_sd = pd.read_csv('/content/dataset_sd.csv')
df_smp = pd.read_csv('/content/dataset_smp.csv')




## Preprocessing Data

In [3]:
df_merged = pd.concat([df_sma, df_sd, df_smp])

df_merged['gender_tutor'], gender_unique = pd.factorize(df_merged['gender_tutor'])
df_merged['pelajaran'], pelajaran_unique = pd.factorize(df_merged['pelajaran'])
df_merged['daerah_tutor'], daerah_unique = pd.factorize(df_merged['daerah_tutor'])
df_merged.to_csv('/content/merged_dataset.csv', index=False)
print(df_merged.info())

<class 'pandas.core.frame.DataFrame'>
Index: 3000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   id_user        3000 non-null   int64
 1   id_tutor       3000 non-null   int64
 2   jenjang_tutor  3000 non-null   int64
 3   gender_tutor   3000 non-null   int64
 4   daerah_tutor   3000 non-null   int64
 5   pelajaran      3000 non-null   int64
dtypes: int64(6)
memory usage: 164.1 KB
None


In [4]:
lookup_daerah = tf.keras.layers.IntegerLookup()
lookup_pelajaran = tf.keras.layers.IntegerLookup()
lookup_jenjang = tf.keras.layers.IntegerLookup()
lookup_gender = tf.keras.layers.IntegerLookup()
lookup_tutor = tf.keras.layers.IntegerLookup()
lookup_user = tf.keras.layers.IntegerLookup()

daerah_ds = tf.data.Dataset.from_tensor_slices(df_merged['daerah_tutor'])
pelajaran_ds = tf.data.Dataset.from_tensor_slices(df_merged['pelajaran'])
jenjang_ds = tf.data.Dataset.from_tensor_slices(df_merged['jenjang_tutor'])
gender_ds = tf.data.Dataset.from_tensor_slices(df_merged['gender_tutor'])
tutor_ds = tf.data.Dataset.from_tensor_slices(df_merged['id_tutor'])
user_ds = tf.data.Dataset.from_tensor_slices(df_merged['id_user'])

lookup_daerah.adapt(daerah_ds)
lookup_pelajaran.adapt(pelajaran_ds)
lookup_jenjang.adapt(jenjang_ds)
lookup_gender.adapt(gender_ds)
lookup_tutor.adapt(tutor_ds)
lookup_user.adapt(user_ds)


## Build Model and Train

In [5]:
class UserModel(tf.keras.Model):
    def __init__(self, user_vocab_size, embedding_dim):
        super().__init__()
        self.user_embedding = tf.keras.layers.Embedding(user_vocab_size, embedding_dim)
        self.dense = tf.keras.layers.Dense(embedding_dim)

    def call(self, inputs):
        x = self.user_embedding(inputs)
        return self.dense(x)

class TutorModel(tf.keras.Model):
    def __init__(self, tutor_vocab_size, daerah_vocab_size, gender_vocab_size, jenjang_vocab_size, pelajaran_vocab_size, embedding_dim):
        super().__init__()
        self.daerah_embedding = tf.keras.layers.Embedding(daerah_vocab_size, embedding_dim)
        self.gender_embedding = tf.keras.layers.Embedding(gender_vocab_size, embedding_dim)
        self.jenjang_embedding = tf.keras.layers.Embedding(jenjang_vocab_size, embedding_dim)
        self.pelajaran_embedding = tf.keras.layers.Embedding(pelajaran_vocab_size, embedding_dim)
        self.tutor_embedding = tf.keras.layers.Embedding(tutor_vocab_size, embedding_dim)
        self.dense = tf.keras.layers.Dense(embedding_dim)

    def call(self, inputs):
        daerah_input, gender_input, jenjang_input, pelajaran_input, tutor_input = inputs
        daerah_emb = self.daerah_embedding(daerah_input)
        gender_emb = self.gender_embedding(gender_input)
        jenjang_emb = self.jenjang_embedding(jenjang_input)
        pelajaran_emb = self.pelajaran_embedding(pelajaran_input)
        tutor_emb = self.tutor_embedding(tutor_input)
        combined_emb = tf.concat([daerah_emb, gender_emb, jenjang_emb, pelajaran_emb, tutor_emb], axis=1)
        return self.dense(combined_emb)

class Model(tfrs.Model):
    def __init__(self, user_model: tf.keras.Model, tutor_model: tf.keras.Model, task: tfrs.tasks.Retrieval):
        super().__init__()
        self.user_model = user_model
        self.tutor_model = tutor_model
        self.task = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features['id_user'])
        tutor_embeddings = self.tutor_model((features["daerah_tutor"], features["gender_tutor"], features["jenjang_tutor"], features["pelajaran"], features['id_tutor']))
        return self.task(user_embeddings, tutor_embeddings)


In [6]:
embedding_dim = 32

daerah_vocab_size = len(lookup_daerah.get_vocabulary())
pelajaran_vocab_size = len(lookup_pelajaran.get_vocabulary())
gender_vocab_size = len(lookup_gender.get_vocabulary())
jenjang_vocab_size = len(lookup_jenjang.get_vocabulary())
tutor_vocab_size = len(lookup_tutor.get_vocabulary())
user_vocab_size = len(lookup_user.get_vocabulary())

print(daerah_vocab_size)
print(pelajaran_vocab_size)
print(gender_vocab_size)
print(jenjang_vocab_size)
print(tutor_vocab_size)
print(user_vocab_size)
user_model = UserModel(user_vocab_size, embedding_dim)
tutor_model = TutorModel(tutor_vocab_size, daerah_vocab_size, gender_vocab_size, jenjang_vocab_size, pelajaran_vocab_size, embedding_dim)

data = {
    "id_user": lookup_user(tf.convert_to_tensor(df_merged['id_user'])),
    "id_tutor": lookup_tutor(tf.convert_to_tensor(df_merged['id_tutor'])),
    "daerah_tutor": lookup_daerah(tf.convert_to_tensor(df_merged['daerah_tutor'])),
    "gender_tutor": lookup_gender(tf.convert_to_tensor(df_merged['gender_tutor'])),
    "jenjang_tutor": lookup_jenjang(tf.convert_to_tensor(df_merged['jenjang_tutor'])),
    "pelajaran": lookup_pelajaran(tf.convert_to_tensor(df_merged['pelajaran']))
}

batched_ds = tf.data.Dataset.from_tensor_slices(data).batch(128)

task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    candidates=batched_ds.map(lambda x: tutor_model((x["daerah_tutor"], x["gender_tutor"], x["jenjang_tutor"], x["pelajaran"], x["id_tutor"])))
))

model = Model(user_model, tutor_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

model.fit(batched_ds, epochs=5)


6
13
3
13
151
399
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7e564057d6f0>

## Get Recommendation Tutor for User


In [8]:
def get_top_k_recommendations(model, user_id, k=10):
    user_input = tf.convert_to_tensor([user_id], dtype=tf.int64)
    user_input_transformed = lookup_user(user_input)

    user_embedding = model.user_model(user_input_transformed)


    tutor_ids = df_merged['id_tutor'].unique()
    daerah_tutors = df_merged[['id_tutor', 'daerah_tutor']].drop_duplicates(subset=['id_tutor']).set_index('id_tutor').loc[tutor_ids].values.flatten()
    gender_tutors = df_merged[['id_tutor', 'gender_tutor']].drop_duplicates(subset=['id_tutor']).set_index('id_tutor').loc[tutor_ids].values.flatten()
    jenjang_tutors = df_merged[['id_tutor', 'jenjang_tutor']].drop_duplicates(subset=['id_tutor']).set_index('id_tutor').loc[tutor_ids].values.flatten()
    pelajaran_tutors = df_merged[['id_tutor', 'pelajaran']].drop_duplicates(subset=['id_tutor']).set_index('id_tutor').loc[tutor_ids].values.flatten()

    daerah_tutors_transformed = lookup_daerah(tf.convert_to_tensor(daerah_tutors, dtype=tf.int64))
    gender_tutors_transformed = lookup_gender(tf.convert_to_tensor(gender_tutors, dtype=tf.int64))
    jenjang_tutors_transformed = lookup_jenjang(tf.convert_to_tensor(jenjang_tutors, dtype=tf.int64))
    pelajaran_tutors_transformed = lookup_pelajaran(tf.convert_to_tensor(pelajaran_tutors, dtype=tf.int64))
    tutor_ids_transformed = lookup_tutor(tf.convert_to_tensor(tutor_ids, dtype=tf.int64))


    tutor_embeddings = model.tutor_model((daerah_tutors_transformed, gender_tutors_transformed, jenjang_tutors_transformed, pelajaran_tutors_transformed, tutor_ids_transformed))

    scores = tf.linalg.matmul(user_embedding, tutor_embeddings, transpose_b=True)

    top_k_scores, top_k_indices = tf.nn.top_k(scores, k=k)

    top_k_tutors = tf.gather(tutor_ids, top_k_indices[0])
    top_k_scores = top_k_scores.numpy()[0]

    return top_k_tutors.numpy(), top_k_scores


user_id = 1
top_k_tutors, top_k_scores = get_top_k_recommendations(model, user_id, k=5)

print("Top K Tutors:", top_k_tutors)
print("Top K Scores:", top_k_scores)


Top K Tutors: [108  98 106  88  73]
Top K Scores: [3.720426  3.5097637 3.1405838 2.5417233 2.5118084]


## Save Model

In [None]:
# Save the weights in .h5 format
user_model.save_weights('user_model_weights', save_format='keras')
tutor_model.save_weights('tutor_model_weights', save_format='keras')

# Create a new model instance and load the saved weights into it
new_user_model = UserModel(user_vocab_size, embedding_dim)
new_tutor_model = TutorModel(tutor_vocab_size, daerah_vocab_size, gender_vocab_size, jenjang_vocab_size, pelajaran_vocab_size, embedding_dim)
new_task = tfrs.tasks.Retrieval(...)
new_model = Model(new_user_model, new_tutor_model, new_task)


model.fit(batched_ds, epochs=5)



Epoch 1/5
Epoch 2/5
Epoch 3/5

In [11]:
tf.saved_model.save(new_model, './ml/1')

