In [82]:
import re
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_recommenders as tfrs

from typing import Dict, Text
from datetime import datetime
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler

In [83]:
users_df = pd.read_csv('../data/users.csv')
users_df.head()

Unnamed: 0,user_id,user_name,user_gender,user_birth_date,user_age,user_location,user_tags,user_preferences
0,ba01ce05-a24e-4fc6-8c53-21b1faa624a9,Manuella Beatrice,P,5/27/2002,21,"Banten, Serang",Bernyanyi|Independen|Membaca|Disiplin|Menulis,Ingin memiliki teman yang bisa diajak kulinera...
1,ebeac7d2-642f-42a6-9175-6eb86dbcf84d,Achmad Irvan Hakim,L,10/21/2003,20,"Kalimantan Timur, Bontang",Pecinta alam|Membaca|Kritis|Memasak|Ngopi,"Teman yang enak di ajak ngapa ngaapin (mabar,j..."
2,25fe48f2-a312-421d-8751-984b5e4a448f,Lubna Mawaddah,P,1/12/2003,20,"Jawa Tengah, Semarang",Puzzle|Bernyanyi|Penerjemah|Kecerdasan buatan|...,"Saya ingin teman yang bisa diajak kulineran, k..."
3,f5b01cb5-d06c-41fc-880a-694d9c74e96c,Fajar Ramadhan,L,10/27/2005,18,"Jawa Barat, Tasikmalaya",Olahraga|Analis keamanan|Memanjat|Binaraga|Han...,"Temen yang bisa diajak belajar, diskusi terkai..."
4,b8307662-eb4f-4222-845e-771736274160,Gede Putra Nugraha,L,6/19/2002,21,"Jawa Barat, Cirebon",Dokter|Analitis|Kuliner|Musisi|Sosmed,Saya ingin teman yang bisa diajak berkembang b...


In [84]:
talents_df = pd.read_csv('../data/talents.csv')
talents_df.head()

Unnamed: 0,talent_id,talent_name,talent_gender,talent_birth_date,talent_age,talent_location,talent_tags,talent_description
0,fc0f6e4b-c397-40a4-a24c-d08b92aedc45,Pribadi Anwar Wicaksono,L,24/4/1992,31,"Banten, Cilegon",Badminton|Berjiwa petualang|Kpopers|Hiking|Ola...,"Saya sangat menyukai Kpop, girlband favorit sa..."
1,674c14d8-ed13-471a-aabf-ceef4e5bc44b,Perdana Anshari,L,10/3/2005,18,"Kalimantan Utara, Tarakan",Desain|Inovatif|Optimis|Seniman|Kreatif,Saya adalah seorang seniman visual yang mencin...
2,f2427a83-eb72-47dd-aeaf-cf131b5cefa2,Dwi Nur Septasya Nahda,P,10/3/2002,21,"Sumatera Selatan, Pagaralam",Petualangan|Bersepeda|Kutu buku|Traveling|Kuliner,Pecinta kuliner dan petualangan! Saya suka men...
3,ac56a04d-aa98-4591-8e8c-2b188040c114,Asmara Tamara Liranda,P,31/3/1992,31,"Sumatera Barat, Lima Puluh Kota",Analitis|Coding|Inovatif|Teknologi|Programmer,Seorang pengembang perangkat lunak yang bersem...
4,d7a31753-7f7a-4d3a-80ec-bca26e578735,Adisyafitri Oky Septasya,P,12/7/1990,33,"Kalimantan Timur, Samarinda",Ambisius|Membaca|Cerdas|Pengajar|Visioner,Pengajar bahasa Inggris yang juga pencinta lit...


In [85]:
reviews_df = pd.read_csv('../data/reviews.csv')
reviews_df.head()

Unnamed: 0,user_id,talent_id,rating,review
0,5de6b455-0a80-4047-90e7-8ddee7ee2bb8,6742ab46-c8ca-4acc-99b2-9ba1c5bfccb6,4,"Talent ini sangat kreatif dalam fotografi, has..."
1,5de6b455-0a80-4047-90e7-8ddee7ee2bb8,d9b90682-f5b4-45f7-b1ed-8b8044c945e1,3,"Kemampuan olahraga talent ini memuaskan, tetap..."
2,bc92f639-6659-4732-ad42-cc62b377f811,1b18b89b-5434-422d-b2d9-294e4c64657e,5,Sangat puas dengan pengalaman kuliner bersama ...
3,9a8427dd-ef41-46d9-86fe-063129e8190e,931c2814-1f84-404f-851a-7fb075835d95,4,Talent ini memberikan konsultasi bisnis yang s...
4,0e9ced1a-35a0-4ad5-a204-3d5c843de78f,ddc39acb-d29b-43f0-abcd-f54fe7be2904,2,Kemampuan gym dan jogging talent ini tidak ses...


### Merging reviews with users and talents

In [86]:
ratings_df = pd.merge(pd.merge(reviews_df, talents_df, on='talent_id'), users_df, on='user_id')

In [87]:
ratings_df.head()

Unnamed: 0,user_id,talent_id,rating,review,talent_name,talent_gender,talent_birth_date,talent_age,talent_location,talent_tags,talent_description,user_name,user_gender,user_birth_date,user_age,user_location,user_tags,user_preferences
0,5de6b455-0a80-4047-90e7-8ddee7ee2bb8,6742ab46-c8ca-4acc-99b2-9ba1c5bfccb6,4,"Talent ini sangat kreatif dalam fotografi, has...",Muhammad Hamzah,L,21/12/1991,32,"Sulawesi Utara, Kotamobagu",Ambisius|Editor|Kreatif|Fotografer|Kuliner,Fotografer yang mengkhususkan diri dalam fotog...,Irgi,L,7/3/2002,21,"Jawa Timur, Pasuruan",Astrografi|Menggambar|Ambisius|Ahli nutrisi|Tr...,"Yang bisa diajak tukeran gambar, ngobrolin hal..."
1,5de6b455-0a80-4047-90e7-8ddee7ee2bb8,d9b90682-f5b4-45f7-b1ed-8b8044c945e1,3,"Kemampuan olahraga talent ini memuaskan, tetap...",Tedi Febrianto Arya,L,7/10/1979,44,"Sumatera Utara, Pematangsiantar",Badminton|Cerdas|Pengajar|Ramah|Olahraga,Seorang mantan pemain bulu tangkis yang berali...,Irgi,L,7/3/2002,21,"Jawa Timur, Pasuruan",Astrografi|Menggambar|Ambisius|Ahli nutrisi|Tr...,"Yang bisa diajak tukeran gambar, ngobrolin hal..."
2,600687d6-bf67-406d-a50e-768e3d2dd53d,6742ab46-c8ca-4acc-99b2-9ba1c5bfccb6,3,Fotografer talent ini memiliki kemampuan yang ...,Muhammad Hamzah,L,21/12/1991,32,"Sulawesi Utara, Kotamobagu",Ambisius|Editor|Kreatif|Fotografer|Kuliner,Fotografer yang mengkhususkan diri dalam fotog...,Irhamulloh Angga Taufik,L,18/2/1997,26,"Banten, Cilegon",Cerdas|Seni|Berkebun|Grafity|Pelukis,"Seorang pelukis yang suka seni, grafity, berke..."
3,6a58a9cc-b016-4ff3-83d4-c41e735bf4df,6742ab46-c8ca-4acc-99b2-9ba1c5bfccb6,5,Talent ini memiliki keterampilan mendengarkan ...,Muhammad Hamzah,L,21/12/1991,32,"Sulawesi Utara, Kotamobagu",Ambisius|Editor|Kreatif|Fotografer|Kuliner,Fotografer yang mengkhususkan diri dalam fotog...,Kevin Setiawan,L,9/13/2000,23,"Sumatera Utara, Binjai",Konsultan|Dokter|Sepak bola|Pet lovers|Memancing,"Biasa diajak seneng bareng, maen game bareng, ..."
4,6a58a9cc-b016-4ff3-83d4-c41e735bf4df,167c23a2-5f3d-48f1-87a5-e7e8b4ce8e44,3,Secara umum ok. Kemampuan bersepeda dan berkeb...,Sayid Bryan Karim Restiantoro,L,26/8/1978,45,"Jawa Timur, Mojokerto",Penyanyi|Musisi|Kreatif|Psikolog|Ramah,Pecinta musik yang memiliki kecintaan pada ber...,Kevin Setiawan,L,9/13/2000,23,"Sumatera Utara, Binjai",Konsultan|Dokter|Sepak bola|Pet lovers|Memancing,"Biasa diajak seneng bareng, maen game bareng, ..."


In [88]:
ratings_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1026 entries, 0 to 1025
Data columns (total 18 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   user_id             1026 non-null   object
 1   talent_id           1026 non-null   object
 2   rating              1026 non-null   int64 
 3   review              1026 non-null   object
 4   talent_name         1026 non-null   object
 5   talent_gender       1026 non-null   object
 6   talent_birth_date   1026 non-null   object
 7   talent_age          1026 non-null   int64 
 8   talent_location     1026 non-null   object
 9   talent_tags         1026 non-null   object
 10  talent_description  1026 non-null   object
 11  user_name           1026 non-null   object
 12  user_gender         1026 non-null   object
 13  user_birth_date     1026 non-null   object
 14  user_age            1026 non-null   int64 
 15  user_location       1026 non-null   object
 16  user_tags           1026

### Preprocessing data

In [89]:
genders_encoder = LabelEncoder()
genders = ratings_df["user_gender"].unique()

In [90]:
genders_encoder.fit(genders)

LabelEncoder()

In [91]:
ratings_df.loc[:, "user_gender"] = genders_encoder.transform(ratings_df["user_gender"])
ratings_df.loc[:, "talent_gender"] = genders_encoder.transform(ratings_df["talent_gender"])

In [92]:
ratings_df.head()

Unnamed: 0,user_id,talent_id,rating,review,talent_name,talent_gender,talent_birth_date,talent_age,talent_location,talent_tags,talent_description,user_name,user_gender,user_birth_date,user_age,user_location,user_tags,user_preferences
0,5de6b455-0a80-4047-90e7-8ddee7ee2bb8,6742ab46-c8ca-4acc-99b2-9ba1c5bfccb6,4,"Talent ini sangat kreatif dalam fotografi, has...",Muhammad Hamzah,0,21/12/1991,32,"Sulawesi Utara, Kotamobagu",Ambisius|Editor|Kreatif|Fotografer|Kuliner,Fotografer yang mengkhususkan diri dalam fotog...,Irgi,0,7/3/2002,21,"Jawa Timur, Pasuruan",Astrografi|Menggambar|Ambisius|Ahli nutrisi|Tr...,"Yang bisa diajak tukeran gambar, ngobrolin hal..."
1,5de6b455-0a80-4047-90e7-8ddee7ee2bb8,d9b90682-f5b4-45f7-b1ed-8b8044c945e1,3,"Kemampuan olahraga talent ini memuaskan, tetap...",Tedi Febrianto Arya,0,7/10/1979,44,"Sumatera Utara, Pematangsiantar",Badminton|Cerdas|Pengajar|Ramah|Olahraga,Seorang mantan pemain bulu tangkis yang berali...,Irgi,0,7/3/2002,21,"Jawa Timur, Pasuruan",Astrografi|Menggambar|Ambisius|Ahli nutrisi|Tr...,"Yang bisa diajak tukeran gambar, ngobrolin hal..."
2,600687d6-bf67-406d-a50e-768e3d2dd53d,6742ab46-c8ca-4acc-99b2-9ba1c5bfccb6,3,Fotografer talent ini memiliki kemampuan yang ...,Muhammad Hamzah,0,21/12/1991,32,"Sulawesi Utara, Kotamobagu",Ambisius|Editor|Kreatif|Fotografer|Kuliner,Fotografer yang mengkhususkan diri dalam fotog...,Irhamulloh Angga Taufik,0,18/2/1997,26,"Banten, Cilegon",Cerdas|Seni|Berkebun|Grafity|Pelukis,"Seorang pelukis yang suka seni, grafity, berke..."
3,6a58a9cc-b016-4ff3-83d4-c41e735bf4df,6742ab46-c8ca-4acc-99b2-9ba1c5bfccb6,5,Talent ini memiliki keterampilan mendengarkan ...,Muhammad Hamzah,0,21/12/1991,32,"Sulawesi Utara, Kotamobagu",Ambisius|Editor|Kreatif|Fotografer|Kuliner,Fotografer yang mengkhususkan diri dalam fotog...,Kevin Setiawan,0,9/13/2000,23,"Sumatera Utara, Binjai",Konsultan|Dokter|Sepak bola|Pet lovers|Memancing,"Biasa diajak seneng bareng, maen game bareng, ..."
4,6a58a9cc-b016-4ff3-83d4-c41e735bf4df,167c23a2-5f3d-48f1-87a5-e7e8b4ce8e44,3,Secara umum ok. Kemampuan bersepeda dan berkeb...,Sayid Bryan Karim Restiantoro,0,26/8/1978,45,"Jawa Timur, Mojokerto",Penyanyi|Musisi|Kreatif|Psikolog|Ramah,Pecinta musik yang memiliki kecintaan pada ber...,Kevin Setiawan,0,9/13/2000,23,"Sumatera Utara, Binjai",Konsultan|Dokter|Sepak bola|Pet lovers|Memancing,"Biasa diajak seneng bareng, maen game bareng, ..."


In [93]:
numerical_columns = ["rating" , "user_gender", "talent_gender", "user_age", "talent_age"]

for column in ratings_df.columns:
    if column in numerical_columns:
        ratings_df[column] = ratings_df[column].astype(np.int32)
    else:
        ratings_df[column] = ratings_df[column].astype(np.str_)

In [94]:
ratings_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1026 entries, 0 to 1025
Data columns (total 18 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   user_id             1026 non-null   object
 1   talent_id           1026 non-null   object
 2   rating              1026 non-null   int32 
 3   review              1026 non-null   object
 4   talent_name         1026 non-null   object
 5   talent_gender       1026 non-null   int32 
 6   talent_birth_date   1026 non-null   object
 7   talent_age          1026 non-null   int32 
 8   talent_location     1026 non-null   object
 9   talent_tags         1026 non-null   object
 10  talent_description  1026 non-null   object
 11  user_name           1026 non-null   object
 12  user_gender         1026 non-null   int32 
 13  user_birth_date     1026 non-null   object
 14  user_age            1026 non-null   int32 
 15  user_location       1026 non-null   object
 16  user_tags           1026

### Building data pipeline

In [95]:
ratings = tf.data.Dataset.from_tensor_slices(
    dict(
        ratings_df[[
                'user_id', 'talent_id', 'rating', 'review', 'talent_gender', 'talent_age', 'talent_location', 'talent_tags', 'talent_description', 'user_gender', 'user_age', 'user_location', 'user_tags', 'user_preferences'
            ]]
        )
)
talents = tf.data.Dataset.from_tensor_slices(
    dict(
        talents_df[[
                'talent_id', 
                # 'talent_gender', 'talent_age', 'talent_location', 'talent_tags', 'talent_description'
            ]]
        )
)

In [96]:
ratings = ratings.map(
    lambda x: {
        "user_id": tf.cast(x["user_id"], tf.string),
        "talent_id": tf.cast(x["talent_id"], tf.string),
        "rating": tf.cast(x["rating"], tf.int32),
        "review": tf.cast(x["review"], tf.string),
        "talent_gender": tf.cast(x["talent_gender"], tf.int32),
        "talent_age": tf.cast(x["talent_age"], tf.int32),
        "talent_location": tf.cast(x["talent_location"], tf.string),
        "talent_tags": tf.cast(x["talent_tags"], tf.string),
        "talent_description": tf.cast(x["talent_description"], tf.string),
        "user_gender": tf.cast(x["user_gender"], tf.int32),
        "user_age": tf.cast(x["user_age"], tf.int32),
        "user_location": tf.cast(x["user_location"], tf.string),
        "user_tags": tf.cast(x["user_tags"], tf.string),
        "user_preferences": tf.cast(x["user_preferences"], tf.string),
    }
)

talents = talents.map(
    lambda x: {
        "talent_id": tf.cast(x["talent_id"], tf.string),
        # "talent_gender": tf.cast(x["talent_gender"], tf.int32),
        # "talent_age": tf.cast(x["talent_age"], tf.int32),
        # "talent_location": tf.cast(x["talent_location"], tf.string),
        # "talent_tags": tf.cast(x["talent_tags"], tf.string),
        # "talent_description": tf.cast(x["talent_description"], tf.string),
    }
)

In [97]:
feature_names = [
    "user_id",
    "talent_id",
    "rating",
    "review",
    "talent_gender",
    "talent_age",
    "talent_location",
    "talent_tags",
    "talent_description",
    "user_gender",
    "user_age",
    "user_location",
    "user_tags",
    "user_preferences",
]

vocabularies = {}

for feature_name in feature_names:
    vocab = ratings.batch(1_000).map(lambda x : x[feature_name])
    vocabularies[feature_name] = np.unique(np.concatenate(list(vocab)))

In [98]:
train_size = int(0.8 * len(ratings_df))
test_size = len(ratings_df) - train_size

assert train_size + test_size == len(ratings_df)

train_size, test_size

(820, 206)

In [99]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings_df), seed=42, reshuffle_each_iteration=False)

train = shuffled.take(train_size)
test = shuffled.skip(train_size).take(test_size)

cached_train = train.shuffle(len(ratings_df)).batch(1024).cache()
cached_test = test.batch(512).cache()

### Bulding model architecture

In [100]:
class UserModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        self.embedding_dimension = 32

        str_features = ["user_id", "user_location", "user_tags"]
        int_features = ["user_gender", "user_age", "rating"]
        text_features = ["user_preferences", "review", "user_tags", "user_location"]

        self._all_features = str_features + int_features + text_features
        self._embeddings = {}

        # Compute embeddings for string features.
        for feature_name in str_features:
            vocabulary = vocabularies[feature_name]
            self._embeddings[feature_name] = tf.keras.Sequential(
                [
                    tf.keras.layers.StringLookup(
                        vocabulary=vocabulary, mask_token=None
                    ),
                    tf.keras.layers.Embedding(
                        len(vocabulary) + 1, self.embedding_dimension
                    ),
                ]
            )

        # Compute embeddings for int features.
        for feature_name in int_features:
            vocabulary = vocabularies[feature_name]
            self._embeddings[feature_name] = tf.keras.Sequential(
                [
                    tf.keras.layers.IntegerLookup(
                        vocabulary=vocabulary, mask_token=None
                    ),
                    tf.keras.layers.Embedding(
                        len(vocabulary) + 1, self.embedding_dimension
                    ),
                ]
            )

        # Compute embeddings for text features.
        for feature_name in text_features:
            vocabulary = vocabularies[feature_name]
            vectorization_layer = tf.keras.layers.TextVectorization()
            vectorization_layer.adapt(cached_train.map(lambda x: x[feature_name]))
            self._embeddings[feature_name] = tf.keras.Sequential(
                [
                    vectorization_layer,
                    tf.keras.layers.Embedding(
                        len(vectorization_layer.get_vocabulary()),
                        self.embedding_dimension,
                        mask_zero=True,
                    ),
                    tf.keras.layers.GlobalAveragePooling1D(),
                ]
            )

    def call(self, inputs: Dict[Text, tf.Tensor]) -> tf.Tensor:
        # Concatenate embeddings
        embeddings = []
        for feature_name in self._all_features:
            embedding_fn = self._embeddings[feature_name]
            embeddings.append(embedding_fn(inputs[feature_name]))

        return tf.concat(embeddings, axis=1)

In [101]:
class QueryModel(tf.keras.Model):
    """Model for encoding user queries."""

    def __init__(self, layer_sizes):
        """Model for encoding user queries.

        Args:
          layer_sizes:
            A list of integers where the i-th entry represents the number of units
            the i-th layer contains.
        """
        super().__init__()
        
        # We first use the user model for generating embeddings.
        self.embedding_model = UserModel()

        # Then construct the layers.
        self.dense_layers = tf.keras.Sequential()

        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

        # No activation for the last layer.
        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size))


    def call(self, inputs: Dict[Text, tf.Tensor]) -> tf.Tensor:
        feature_embedding = self.embedding_model(inputs)
        return self.dense_layers(feature_embedding)

In [102]:
class TalentModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.embedding_dimension = 32

        str_features = ["talent_id"]
        int_features = []
        text_features = ["talent_id"]

        self._all_features = str_features + int_features + text_features
        self._embeddings = {}

        # Compute embeddings for string features.
        for feature_name in str_features:
            vocabulary = vocabularies[feature_name]
            self._embeddings[feature_name] = tf.keras.Sequential(
                [
                    tf.keras.layers.StringLookup(
                        vocabulary=vocabulary, mask_token=None
                    ),
                    tf.keras.layers.Embedding(
                        len(vocabulary) + 1, self.embedding_dimension
                    ),
                ]
            )

        # Compute embeddings for int features.
        for feature_name in int_features:
            vocabulary = vocabularies[feature_name]
            self._embeddings[feature_name] = tf.keras.Sequential(
                [
                    tf.keras.layers.IntegerLookup(
                        vocabulary=vocabulary, mask_token=None
                    ),
                    tf.keras.layers.Embedding(
                        len(vocabulary) + 1, self.embedding_dimension
                    ),
                ]
            )

        # Compute embeddings for text features.
        for feature_name in text_features:
            vocabulary = vocabularies[feature_name]
            vectorization_layer = tf.keras.layers.TextVectorization()
            vectorization_layer.adapt(cached_train.map(lambda x: x[feature_name]))
            self._embeddings[feature_name] = tf.keras.Sequential(
                [
                    vectorization_layer,
                    tf.keras.layers.Embedding(
                        len(vectorization_layer.get_vocabulary()),
                        self.embedding_dimension,
                        mask_zero=True,
                    ),
                    tf.keras.layers.GlobalAveragePooling1D(),
                ]
            )

    def call(self, inputs: Dict[Text, tf.Tensor]) -> tf.Tensor:
        # Concatenate embeddings
        embeddings = []
        for feature_name in self._all_features:
            embedding_fn = self._embeddings[feature_name]
            embeddings.append(embedding_fn(inputs[feature_name]))

        return tf.concat(embeddings, axis=1)

In [103]:
class CandidateModel(tf.keras.Model):
    """Model for encoding talents."""

    def __init__(self, layer_sizes):
        """Model for encoding talents.

        Args:
          layer_sizes:
            A list of integers where the i-th entry represents the number of units
            the i-th layer contains.
        """
        super().__init__()
        self.embedding_model = TalentModel()

        # Then construct the layers.
        self.dense_layers = tf.keras.Sequential()

        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

        # No activation for the last layer.
        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size))


    def call(self, inputs: Dict[Text, tf.Tensor]) -> tf.Tensor:
        feature_embedding = self.embedding_model(inputs)
        return self.dense_layers(feature_embedding)

In [104]:
class FriendEaseModel(tfrs.models.Model):
    def __init__(self, layer_sizes, rating_weight: float, retrieval_weight: float):
        super().__init__()

        # User and talent models.
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes)

        # A small model to take in user and movie embeddings and predict ratings.
        # We can make this as complicated as we want as long as we output a scalar
        # as our prediction.
        self.rating_model = tf.keras.Sequential(
            [
                tf.keras.layers.Dense(256, activation="relu"),
                tf.keras.layers.Dense(128, activation="relu"),
                tf.keras.layers.Dense(1),
            ]
        )

        # The tasks.
        self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
            loss=tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.RootMeanSquaredError()],
        )
        self.retrieval_task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=talents.batch(128).map(self.candidate_model),
            ),
        )

        # The loss weights.
        self.rating_weight = rating_weight
        self.retrieval_weight = retrieval_weight

    def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
        # We pick out the user features and pass them into the user model.
        user_embeddings = self.query_model(
            {
                "user_id": features["user_id"], 
                "user_gender": features["user_gender"], 
                "user_age": features["user_age"], 
                "user_location": features["user_location"], 
                "user_tags": features["user_tags"], 
                "user_preferences": features["user_preferences"],
                "rating": features["rating"],
                "review": features["review"],
            }
        )
        # And pick out the movie features and pass them into the movie model.
        talent_embeddings = self.candidate_model({"talent_id": features["talent_id"]})

        return (
            user_embeddings,
            talent_embeddings,
            # We apply the multi-layered rating model to a concatentation of
            # user and movie embeddings.
            self.rating_model(tf.concat([user_embeddings, talent_embeddings], axis=1)),
        )

    def compute_loss(
        self, features: Dict[Text, tf.Tensor], training=False
    ) -> tf.Tensor:
        rate = features["rating"]

        user_embeddings, talent_embeddings, rating_predictions = self(features)

        # We compute the loss for each task.

        rating_loss = self.rating_task(
            labels=rate,
            predictions=rating_predictions,
        )
        retrieval_loss = self.retrieval_task(user_embeddings, talent_embeddings)

        # And combine them using the loss weights.
        return self.rating_weight * rating_loss + self.retrieval_weight * retrieval_loss

In [105]:
num_epochs = 100

model = FriendEaseModel([64], rating_weight=1.0, retrieval_weight=1.0)
model.compile(optimizer=tf.keras.optimizers.Adagrad(1e-1))

one_layer_history = model.fit(
    cached_train,
    validation_data=cached_test,
    epochs=num_epochs,
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [106]:
accuracy = one_layer_history.history[
    "val_factorized_top_k/top_100_categorical_accuracy"
][-1]
print(f"Top-100 accuracy: {accuracy:.2f}.")

Top-100 accuracy: 0.25.


In [107]:
def predict_movie(inputs, top_n=3):
    # Create a model that takes in raw query features, and
    index = tfrs.layers.factorized_top_k.BruteForce(model.query_model)
    # recommends movies out of the entire movies dataset.
    index.index_from_dataset(
        talents.batch(128).map(
            lambda x: (
                x["talent_id"],
                model.candidate_model({"talent_id": x["talent_id"]}),
            )
        )
    )

    # Get recommendations.
    _, titles = index(
        {
            "user_id": np.array([inputs["user_id"]]),
            "user_gender": np.array([inputs["user_gender"]]),
            "user_age": np.array([inputs["user_age"]]),
            "user_location": np.array([inputs["user_location"]]),
            "user_tags": np.array([inputs["user_tags"]]),
            "user_preferences": np.array([inputs["user_preferences"]]),
            "rating": np.array([inputs["rating"]]),
            "review": np.array([inputs["review"]]),
        }
    )

    print("Top {} recommendations for user {}:\n".format(top_n, inputs["user_id"]))
    for i, title in enumerate(titles[0, :top_n].numpy()):
        print("{}. {}".format(i + 1, title.decode("utf-8")))
        
    return titles

In [108]:
titles = predict_movie(
    {
        "user_id": "935fae71-86a1-4632-b979-b01b35d3ba1d",
        "user_gender": 0,
        "user_age": 25,
        "user_location": "Bali, Denpasar",
        "user_tags": "Pendaki|Bersepeda|Memancing|Fotografi|Berjiwa petualang",
        "user_preferences": "Pendaki yang suka petualangan dan fotografi alam. Hobi bersepeda dan memancing di waktu luang. Ingin bertemu teman yang memiliki semangat petualang.",
        "rating": 0,
        "review": "",
    },
    5,
)

Top 5 recommendations for user 935fae71-86a1-4632-b979-b01b35d3ba1d:

1. fc0f6e4b-c397-40a4-a24c-d08b92aedc45
2. 674c14d8-ed13-471a-aabf-ceef4e5bc44b
3. f2427a83-eb72-47dd-aeaf-cf131b5cefa2
4. ac56a04d-aa98-4591-8e8c-2b188040c114
5. d7a31753-7f7a-4d3a-80ec-bca26e578735


In [109]:
talents_df.loc[talents_df["talent_id"].isin(titles[0].numpy().astype(np.str_)), :].head()

Unnamed: 0,talent_id,talent_name,talent_gender,talent_birth_date,talent_age,talent_location,talent_tags,talent_description
0,fc0f6e4b-c397-40a4-a24c-d08b92aedc45,Pribadi Anwar Wicaksono,L,24/4/1992,31,"Banten, Cilegon",Badminton|Berjiwa petualang|Kpopers|Hiking|Ola...,"Saya sangat menyukai Kpop, girlband favorit sa..."
1,674c14d8-ed13-471a-aabf-ceef4e5bc44b,Perdana Anshari,L,10/3/2005,18,"Kalimantan Utara, Tarakan",Desain|Inovatif|Optimis|Seniman|Kreatif,Saya adalah seorang seniman visual yang mencin...
2,f2427a83-eb72-47dd-aeaf-cf131b5cefa2,Dwi Nur Septasya Nahda,P,10/3/2002,21,"Sumatera Selatan, Pagaralam",Petualangan|Bersepeda|Kutu buku|Traveling|Kuliner,Pecinta kuliner dan petualangan! Saya suka men...
3,ac56a04d-aa98-4591-8e8c-2b188040c114,Asmara Tamara Liranda,P,31/3/1992,31,"Sumatera Barat, Lima Puluh Kota",Analitis|Coding|Inovatif|Teknologi|Programmer,Seorang pengembang perangkat lunak yang bersem...
4,d7a31753-7f7a-4d3a-80ec-bca26e578735,Adisyafitri Oky Septasya,P,12/7/1990,33,"Kalimantan Timur, Samarinda",Ambisius|Membaca|Cerdas|Pengajar|Visioner,Pengajar bahasa Inggris yang juga pencinta lit...


In [110]:
def predict_rating(inputs):
    trained_movie_embeddings, trained_user_embeddings, predicted_rating = model(
        {
            "user_id": np.array([inputs["user_id"]]),
            "user_gender": np.array([inputs["user_gender"]]),
            "user_age": np.array([inputs["user_age"]]),
            "user_location": np.array([inputs["user_location"]]),
            "user_tags": np.array([inputs["user_tags"]]),
            "user_preferences": np.array([inputs["user_preferences"]]),
            "rating": np.array([inputs["rating"]]),
            "review": np.array([inputs["review"]]),
            "talent_id": np.array([inputs["talent_id"]]),
        }
    )
    print("Predicted rating for {}: {}".format(inputs['talent_id'], predicted_rating.numpy()[0][0]))

In [111]:
predict_rating(ratings_df.loc[5])

Predicted rating for ebeec989-30a1-49fb-a40b-02a4bfbb6633: 3.365267038345337


In [112]:
ratings_df.loc[5:5, :]

Unnamed: 0,user_id,talent_id,rating,review,talent_name,talent_gender,talent_birth_date,talent_age,talent_location,talent_tags,talent_description,user_name,user_gender,user_birth_date,user_age,user_location,user_tags,user_preferences
5,6a58a9cc-b016-4ff3-83d4-c41e735bf4df,ebeec989-30a1-49fb-a40b-02a4bfbb6633,4,Layak dicoba. Kemampuan bermain badminton dan ...,Allysa Finesh Anggraeni,1,16/5/1981,42,"Sumatera Utara, Medan",Seni|Kreatif|Fotografi|Ramah|Berjiwa petualang,Pecinta seni yang memiliki ketertarikan pada a...,Kevin Setiawan,0,9/13/2000,23,"Sumatera Utara, Binjai",Konsultan|Dokter|Sepak bola|Pet lovers|Memancing,"Biasa diajak seneng bareng, maen game bareng, ..."
