In [None]:
!pip install tensorflow_recommenders

Installing collected packages: tensorflow_recommenders
Successfully installed tensorflow_recommenders-0.7.3


In [None]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

In [None]:
test = pd.read_csv('train_data.csv')

In [None]:
test['user_id'] = test['user_id'].astype('str')
test['product_id'] = test['product_id'].astype('str')

In [None]:
user = test[['user_id', 'product_id', 'click_cnt', 'purchase_cnt',
       'age_range', 'gender', 'price', 'category', 'cate1', 'std_preference']]
item = test[['product_id', 'price', 'category', 'cate1']]

In [None]:
prefer = tf.data.Dataset.from_tensor_slices((dict(user)))
product = tf.data.Dataset.from_tensor_slices((dict(item)))


In [None]:
for x in prefer.take(1).as_numpy_iterator():
  pprint.pprint(x)

{'age_range': b'40-44',
 'cate1': b'\xe5\x8c\x96\xe7\xb2\xa7\xe5\x93\x81',
 'category': b'\xe5\x8c\x96\xe7\xb2\xa7\xe5\x93\x81,\xe7\xbe\x8e\xe5\xae\xb9'
             b',\xe3\x83\x98\xe3\x82\xa2\xe3\x82\xb1\xe3\x82\xa2 \xe3\x82'
             b'\xb9\xe3\x82\xad\xe3\x83\xb3\xe3\x82\xb1\xe3\x82\xa2',
 'click_cnt': 2.0,
 'gender': b'f',
 'price': 2821.0,
 'product_id': b'635',
 'purchase_cnt': 1.0,
 'std_preference': 0.1344093209580474,
 'user_id': b'306'}


In [None]:
prefer = prefer.map(lambda x: {
    "product_id": x["product_id"],
    "user_id": x["user_id"],
})
product = product.map(lambda x: x["product_id"])

In [None]:
tf.random.set_seed(42)
shuffled = prefer.shuffle(10, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(10)
test = shuffled.skip(10).take(10)

In [None]:
product_id = product.batch(10)
user_ids = prefer.batch(10).map(lambda x: x["user_id"])

unique_product_id = np.unique(np.concatenate(list(product_id)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

list(unique_product_id)[:10], len(unique_product_id), len(unique_user_ids)

([b'10299',
  b'10449',
  b'10551',
  b'10567',
  b'10682',
  b'10978',
  b'10990',
  b'11076',
  b'112',
  b'11273'],
 430,
 616)

In [None]:
embedding_dimension = 45 # number of category

user_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_user_ids, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])

product_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_product_id, mask_token=None),
  tf.keras.layers.Embedding(len(unique_product_id) + 1, embedding_dimension)
])

metrics = tfrs.metrics.FactorizedTopK(
  candidates=product.batch(10).map(product_model)
)

# Task is wrapper of Loss and metric computation
task = tfrs.tasks.Retrieval(
  metrics=metrics
)

In [None]:
class ItemModel(tfrs.Model):

  def __init__(self, user_model, product_model):
    super().__init__()
    self.product_model: tf.keras.Model = product_model
    self.user_model: tf.keras.Model = user_model
    self.task: tf.keras.layers.Layer = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:

    user_embeddings = self.user_model(features["user_id"])
    positive_movie_embeddings = self.product_model(features["product_id"])
    return self.task(user_embeddings, positive_movie_embeddings)

In [None]:
model = ItemModel(user_model, product_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [None]:
cached_train = train.shuffle(10).batch(10).cache()
cached_test = test.batch(10).cache()

In [None]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fa2220c41c0>

In [None]:
model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.0,
 'factorized_top_k/top_5_categorical_accuracy': 0.0,
 'factorized_top_k/top_10_categorical_accuracy': 0.0,
 'factorized_top_k/top_50_categorical_accuracy': 0.10000000149011612,
 'factorized_top_k/top_100_categorical_accuracy': 0.10000000149011612,
 'loss': 23.0472412109375,
 'regularization_loss': 0,
 'total_loss': 23.0472412109375}

In [None]:
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)

In [None]:
index

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x7fa2258d3ac0>

In [None]:
# Create a model that takes in raw query features, and
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
# recommends movies out of the entire movies dataset.
index.index_from_dataset(
  tf.data.Dataset.zip((product.batch(100), product.batch(100).map(model.product_model)))
)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x7fa225ff1d50>

In [None]:
scores, titles = index(tf.constant(['335']), k=1)
# Get recommendations.
print("Recommendations for user 335")

result = pd.DataFrame(data={"titles": titles[0], "scores": scores[0]})
result

Recommendations for user 335


Unnamed: 0,titles,scores
0,b'376',0.112251


In [None]:
new_user = pd.read_csv('_20230401_users.csv')

In [None]:
user_ids = unique_user_ids.copy()

In [None]:
user_ids = [int(i) for i in list(user_ids)]

In [None]:
new_user['recommand_title'] = ''
for u in range(len(list(new_user.user_id))):
  if new_user.user_id[u] in user_ids:
    scores, titles = index(tf.constant([str(new_user.user_id[u])]), k=1)
    new_user.recommand_title[u] = int(np.array(titles[0])[0])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_user.recommand_title[u] = int(np.array(titles[0])[0])


In [None]:
new_user

Unnamed: 0,user_id,age_range,gender,recommand_title
0,50242,45-49,f,27079
1,49918,40-44,f,27079
2,49728,60-64,f,27079
3,49394,50-54,f,27079
4,50698,25-29,f,27079
...,...,...,...,...
2216,50833,40-44,f,27079
2217,49368,50-54,m,27079
2218,49844,35-39,f,27079
2219,45670,45-49,f,27079


In [None]:
titles

<tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'27079']], dtype=object)>

In [None]:
new_user.loc[new_user.recommand_title != 27079].to_csv('result_model.csv')