# Recommender Systems for Steam Video Games

data source: https://www.kaggle.com/datasets/tamber/steam-video-games/data
 
a blog: https://andbloch.github.io/An-Overview-of-Collaborative-Filtering-Algorithms/

In [65]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, when, isnan, countDistinct, sum, round, max, min, explode, udf
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
import tensorflow as tf
import tensorflow_recommenders as tfrs
from tensorflow.keras import layers, Model
import numpy as np

In [2]:
spark = SparkSession.builder.appName('Steam Recommender System').getOrCreate()
spark

24/10/20 14:49:37 WARN Utils: Your hostname, Asenas-MacBook-Air.local resolves to a loopback address: 127.0.0.1; using 192.168.1.101 instead (on interface en0)
24/10/20 14:49:37 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/10/20 14:49:37 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
df = spark.read.csv('steam-200k.csv', inferSchema = True)
df

                                                                                

DataFrame[_c0: int, _c1: string, _c2: string, _c3: double, _c4: int]

In [4]:
df.show(5)

+---------+--------------------+--------+-----+---+
|      _c0|                 _c1|     _c2|  _c3|_c4|
+---------+--------------------+--------+-----+---+
|151603712|The Elder Scrolls...|purchase|  1.0|  0|
|151603712|The Elder Scrolls...|    play|273.0|  0|
|151603712|           Fallout 4|purchase|  1.0|  0|
|151603712|           Fallout 4|    play| 87.0|  0|
|151603712|               Spore|purchase|  1.0|  0|
+---------+--------------------+--------+-----+---+
only showing top 5 rows



In [5]:
df.printSchema()

root
 |-- _c0: integer (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: string (nullable = true)
 |-- _c3: double (nullable = true)
 |-- _c4: integer (nullable = true)



In [6]:
df.columns

['_c0', '_c1', '_c2', '_c3', '_c4']

In [7]:
df = df.withColumnRenamed('_c0', 'user_id') \
       .withColumnRenamed('_c1', 'name') \
       .withColumnRenamed('_c2', 'action') \
       .withColumnRenamed('_c3', 'hours') \
       .withColumnRenamed('_c4', 'zero')

In [8]:
df.printSchema()

root
 |-- user_id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- action: string (nullable = true)
 |-- hours: double (nullable = true)
 |-- zero: integer (nullable = true)



shape

In [9]:
df.count(), len(df.columns) ## shape

(200000, 5)

null or na values

In [10]:
df.select([count(when(col(c).isNull(), c)).alias(c) for c in df.columns]).show()

+-------+----+------+-----+----+
|user_id|name|action|hours|zero|
+-------+----+------+-----+----+
|      0|   0|     0|    0|   0|
+-------+----+------+-----+----+



In [11]:
columns = ['user_id', 'name', 'action', 'hours', 'zero']

df.select([count(when(isnan(col(c)), c)).alias(c) for c in columns]).show()



+-------+----+------+-----+----+
|user_id|name|action|hours|zero|
+-------+----+------+-----+----+
|      0|   0|     0|    0|   0|
+-------+----+------+-----+----+



                                                                                

*Duplicate rows*

In [12]:
df.groupBy(columns).count().filter('count > 1').show()



+---------+--------------------+--------+-----+----+-----+
|  user_id|                name|  action|hours|zero|count|
+---------+--------------------+--------+-----+----+-----+
| 86338111|Grand Theft Auto ...|purchase|  1.0|   0|    2|
|189858084|Grand Theft Auto ...|purchase|  1.0|   0|    2|
|150882304|Sid Meier's Civil...|purchase|  1.0|   0|    2|
|189858084|Grand Theft Auto ...|purchase|  1.0|   0|    2|
|116617462|Grand Theft Auto ...|purchase|  1.0|   0|    2|
| 37422528|Sid Meier's Civil...|purchase|  1.0|   0|    2|
|147859903|Sid Meier's Civil...|purchase|  1.0|   0|    2|
|138941587|Sid Meier's Civil...|purchase|  1.0|   0|    2|
|145825155|Grand Theft Auto ...|purchase|  1.0|   0|    2|
|105782521|Sid Meier's Civil...|purchase|  1.0|   0|    2|
| 46301758|Sid Meier's Civil...|purchase|  1.0|   0|    2|
|179936723|Grand Theft Auto ...|purchase|  1.0|   0|    2|
| 64455019|Sid Meier's Civil...|purchase|  1.0|   0|    2|
|142650116|Grand Theft Auto ...|purchase|  1.0|   0|    

                                                                                

In [13]:
df_new = df.dropDuplicates()

In [14]:
df_new.count()

                                                                                

199293

*Unique Values*

In [15]:
df_new.select([countDistinct(c).alias(c) for c in columns]).show()



+-------+----+------+-----+----+
|user_id|name|action|hours|zero|
+-------+----+------+-----+----+
|  12393|5155|     2| 1593|   1|
+-------+----+------+-----+----+



                                                                                

In [16]:
# remove the column zero
df_new = df_new.drop('zero')

*Analysis*

In [17]:
df.groupBy('name')\
    .agg(count('user_id').alias('users_count'))\
    .orderBy('users_count', ascending = False)\
    .limit(20)\
    .show()

+--------------------+-----------+
|                name|users_count|
+--------------------+-----------+
|              Dota 2|       9682|
|     Team Fortress 2|       4646|
|Counter-Strike Gl...|       2789|
|            Unturned|       2632|
|       Left 4 Dead 2|       1752|
|Counter-Strike So...|       1693|
|      Counter-Strike|       1424|
|         Garry's Mod|       1397|
|The Elder Scrolls...|       1394|
|            Warframe|       1271|
|Half-Life 2 Lost ...|       1158|
|Sid Meier's Civil...|       1150|
|           Robocraft|       1096|
|Half-Life 2 Death...|       1021|
|              Portal|       1005|
|            Portal 2|        997|
|         Half-Life 2|        995|
|   Heroes & Generals|        993|
|            Terraria|        956|
|Counter-Strike Co...|        904|
+--------------------+-----------+



In [18]:
df.filter(col('action') == 'purchase') \
  .groupBy('name') \
  .agg(count('user_id').alias('count_user_purchase')) \
  .orderBy('count_user_purchase', ascending=False) \
  .limit(20) \
  .show()

+--------------------+-------------------+
|                name|count_user_purchase|
+--------------------+-------------------+
|              Dota 2|               4841|
|     Team Fortress 2|               2323|
|            Unturned|               1563|
|Counter-Strike Gl...|               1412|
|Half-Life 2 Lost ...|                981|
|Counter-Strike So...|                978|
|       Left 4 Dead 2|                951|
|      Counter-Strike|                856|
|            Warframe|                847|
|Half-Life 2 Death...|                823|
|         Garry's Mod|                731|
|The Elder Scrolls...|                717|
|           Robocraft|                689|
|Counter-Strike Co...|                679|
|Counter-Strike Co...|                679|
|   Heroes & Generals|                658|
|         Half-Life 2|                639|
|Sid Meier's Civil...|                596|
|         War Thunder|                590|
|              Portal|                588|
+----------

                                                                                

In [19]:
df.filter(col('action') == 'play') \
  .groupBy('name') \
  .agg(count('user_id').alias('count_user_play')) \
  .orderBy('count_user_play', ascending=False) \
  .limit(20) \
  .show()

+--------------------+---------------+
|                name|count_user_play|
+--------------------+---------------+
|              Dota 2|           4841|
|     Team Fortress 2|           2323|
|Counter-Strike Gl...|           1377|
|            Unturned|           1069|
|       Left 4 Dead 2|            801|
|Counter-Strike So...|            715|
|The Elder Scrolls...|            677|
|         Garry's Mod|            666|
|      Counter-Strike|            568|
|Sid Meier's Civil...|            554|
|            Terraria|            460|
|            Portal 2|            453|
|            Warframe|            424|
|              Portal|            417|
|           Robocraft|            407|
|            PAYDAY 2|            390|
|       Borderlands 2|            386|
|         Half-Life 2|            356|
|   Heroes & Generals|            335|
|         War Thunder|            303|
+--------------------+---------------+



In [20]:
df.filter(col('action') == 'play') \
  .groupBy('name') \
  .agg(round(sum('hours'), 2).alias('total_play')) \
  .orderBy('total_play', ascending = False) \
  .limit(50) \
  .show()



+--------------------+----------+
|                name|total_play|
+--------------------+----------+
|              Dota 2|  981684.6|
|Counter-Strike Gl...|  322771.6|
|     Team Fortress 2|  173673.3|
|      Counter-Strike|  134261.1|
|Sid Meier's Civil...|   99821.3|
|Counter-Strike So...|   96075.5|
|The Elder Scrolls...|   70889.3|
|         Garry's Mod|   49725.3|
|Call of Duty Mode...|   42009.9|
|       Left 4 Dead 2|   33596.7|
|Football Manager ...|   32308.6|
|Football Manager ...|   30845.8|
|Football Manager ...|   30574.8|
|            Terraria|   29951.8|
|            Warframe|   27074.6|
|Football Manager ...|   24283.1|
|              Arma 3|   24055.7|
|  Grand Theft Auto V|   22956.7|
|       Borderlands 2|   22667.9|
|    Empire Total War|   21030.3|
+--------------------+----------+
only showing top 20 rows



                                                                                

### Data prep for recommedation systems

In [21]:
df_purchase = df_new.filter(col('action') == 'purchase')
df_purchase.count()

128804

In [22]:
df_purchase.select(countDistinct('user_id')).show()
df_purchase = df_purchase.withColumnRenamed('action', 'action_pur') \
                         .withColumnRenamed('hours', 'purchase')



+-----------------------+
|count(DISTINCT user_id)|
+-----------------------+
|                  12393|
+-----------------------+



                                                                                

In [23]:
df_play = df_new.filter(col('action') == 'play')
df_play.count()

70489

In [24]:
df_play.select(countDistinct('user_id')).show()

+-----------------------+
|count(DISTINCT user_id)|
+-----------------------+
|                  11350|
+-----------------------+



In [25]:
full_df = df_purchase.join(df_play, ["user_id", "name"], "outer")

In [26]:
full_df.count()

                                                                                

128816

In [27]:
new_columns = full_df.columns

In [28]:
full_df_unique = full_df.select([countDistinct(c).alias(c) for c in new_columns]).show()

+-------+----+----------+--------+------+-----+
|user_id|name|action_pur|purchase|action|hours|
+-------+----+----------+--------+------+-----+
|  12393|5155|         1|       1|     1| 1593|
+-------+----+----------+--------+------+-----+



In [29]:
full_df.select([count(when(col(c).isNull(), c)).alias(c) for c in new_columns]).show()

+-------+----+----------+--------+------+-----+
|user_id|name|action_pur|purchase|action|hours|
+-------+----+----------+--------+------+-----+
|      0|   0|         0|       0| 58327|58327|
+-------+----+----------+--------+------+-----+



In [30]:
data = full_df.fillna(0, subset=['hours'])

In [31]:
data = data.drop("action_pur", "purchase", "action")

In [32]:
data.describe("hours").show()

+-------+------------------+
|summary|             hours|
+-------+------------------+
|  count|            128816|
|   mean|26.746411936405334|
| stddev|171.38236775962963|
|    min|               0.0|
|    max|           11754.0|
+-------+------------------+



In [33]:
min_hours = data.select(min("hours")).collect()[0][0]
max_hours = data.select(max("hours")).collect()[0][0]

data = data.withColumn("normalized_hours", (col("hours") - min_hours) / (max_hours - min_hours))

                                                                                

## Recommender Systems

### 1) ALS - Pyspark

In [34]:
indexer = StringIndexer(inputCol="name", outputCol="game_id")
data_indexed = indexer.fit(data).transform(data)

                                                                                

*Train - Test Split*

In [35]:
(train_set, test_set) = data_indexed.randomSplit([0.8, 0.2])

*Model Selection - Hyperparameter Tunning*

In [36]:
# the als model 
als = ALS(userCol="user_id", itemCol="game_id", ratingCol="normalized_hours", implicitPrefs=True, coldStartStrategy='drop') #Note we set cold start strategy to 'drop' 
#to ensure we don't get NaN evaluation metrics

# the parameter grid
paramGrid = ParamGridBuilder() \
  .addGrid(als.maxIter, [5, 10, 15]) \
  .addGrid(als.regParam, [0.01, 0.05, 0.1]) \
  .build()

# cross-validator
crossval = CrossValidator(estimator=als, estimatorParamMaps=paramGrid, evaluator=RegressionEvaluator(metricName="rmse", labelCol="normalized_hours", predictionCol="prediction"), numFolds=5, seed=42)

# Fit the cross-validator to the training data
cvModel = crossval.fit(train_set)

24/10/20 14:50:31 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
24/10/20 14:50:31 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.lapack.JNILAPACK
                                                                                ]]

In [37]:
# best model
bestModel = cvModel.bestModel

# Parameter map
paramMap = bestModel.extractParamMap()

# best params
best_maxIter = bestModel._java_obj.parent().getMaxIter()
best_regParam = bestModel._java_obj.parent().getRegParam()

print("Best maxIter:", best_maxIter)
print("Best regParam:", best_regParam)


Best maxIter: 5
Best regParam: 0.1


*Model Training*

In [38]:
als_best = ALS(maxIter=best_maxIter, regParam=best_regParam, userCol="user_id", itemCol="game_id",
                ratingCol="normalized_hours", implicitPrefs=True, coldStartStrategy="drop")
als_best = als_best.fit(train_set)

*Making predictions*

In [39]:
predictions_als = als_best.transform(test_set)

*Model Evaluation*

In [40]:
evaluator_als = RegressionEvaluator(metricName="rmse", labelCol="normalized_hours", predictionCol="prediction")
rmse_als = evaluator_als.evaluate(predictions_als)
print('RMSE of ALS:', rmse_als)

RMSE of ALS: 0.10058599364198913


*Model Training by using all data to get recommendations*

In [41]:
als_best = ALS(maxIter=best_maxIter, regParam=best_regParam, userCol="user_id", itemCol="game_id",
                ratingCol="normalized_hours", implicitPrefs=True, coldStartStrategy="drop")
als_full_model = als_best.fit(data_indexed)

*Recommender System*

In [42]:
user_recommendations = als_full_model.recommendForAllUsers(5)
user_recommendations.show()



+--------+--------------------+
| user_id|     recommendations|
+--------+--------------------+
|   76767|[{44, 0.8302596},...|
|  144736|[{7, 0.64719903},...|
|  229911|[{7, 0.809055}, {...|
|  835015|[{49, 0.0}, {48, ...|
|  948368|[{5, 0.7349485}, ...|
|  975449|[{20, 0.70236766}...|
| 1268792|[{7, 0.64720684},...|
| 2531540|[{5, 0.5835752}, ...|
| 2753525|[{3, 0.614613}, {...|
| 3450426|[{7, 0.7381111}, ...|
| 7923954|[{7, 0.6472029}, ...|
| 7987640|[{49, 0.0}, {48, ...|
| 8259307|[{11, 0.46094027}...|
| 8567888|[{5, 0.6576412}, ...|
| 8585433|[{11, 0.60877556}...|
| 8784496|[{1, 0.4540149}, ...|
| 8795607|[{5, 0.6497613}, ...|
|10144413|[{49, 0.0}, {48, ...|
|10595342|[{5, 0.9260915}, ...|
|10599862|[{11, 0.9111481},...|
+--------+--------------------+
only showing top 20 rows



                                                                                

In [43]:
user_recommendations.printSchema()

root
 |-- user_id: integer (nullable = false)
 |-- recommendations: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- game_id: integer (nullable = true)
 |    |    |-- rating: float (nullable = true)



According to the schema of the data, the recommendations column contains an array, and each element is a struct. Each struct includes two fields: game_id (an integer) and rating (a float).

In [44]:
recommendations_exploded = user_recommendations.withColumn("recommendation", explode("recommendations"))

In [45]:
recommendations_als = recommendations_exploded.select("user_id", "recommendation.game_id", "recommendation.rating")

In [46]:
game_id_name = data_indexed[['name', 'game_id']]

In [47]:
game_id_name.count()

128816

In [48]:
game_id_name = game_id_name.drop_duplicates()

In [49]:
game_id_name.count()

5155

In [50]:
recommendations_als = recommendations_als.join(game_id_name, on='game_id', how='left')

In [51]:
recommendations_als = recommendations_als.orderBy(['user_id', 'rating'], ascending =[True, False])

In [52]:
recommendations_als.show(10)

                                                                                

+-------+-------+----------+--------------------+
|game_id|user_id|    rating|                name|
+-------+-------+----------+--------------------+
|      1|   5250|  0.726252|     Team Fortress 2|
|      0|   5250| 0.6869544|              Dota 2|
|     19|   5250|0.14574155|              Portal|
|      6|   5250|0.13253117|       Left 4 Dead 2|
|     16|   5250|0.12955722|         Half-Life 2|
|     44|  76767| 0.8302596|Call of Duty Mode...|
|     45|  76767| 0.8122936|Call of Duty Mode...|
|     69|  76767| 0.5927692|Call of Duty Blac...|
|     70|  76767|0.55496377|Call of Duty Blac...|
|      7|  76767| 0.5214544|      Counter-Strike|
+-------+-------+----------+--------------------+
only showing top 10 rows



A function to find recommended games for a specific user

In [53]:
def print_recommendation_als (user_id):
    """
    A function to find and print the recommended games for a specific user.

    Parameters:
    - user_id (int): The ID of the user for whom the game recommendations will be retrieved.

    The function filters the DataFrame based on the provided user_id and retrieves the game names associated with the recommendations for that user. It then prints the names of the recommended games in a numbered list.
    """
    recommend_filtered = recommendations_als.filter(col('user_id') == user_id).select('name')
    print(f"User ID: {user_id}")
    recommended_games = recommend_filtered.collect()
    print('The list of all recommended games:')
    for i, row in enumerate(recommended_games, 1):
        print(f"{i}. {row['name']}")

    

In [54]:
print_recommendation_als(user_id = 1423371) ## example

User ID: 1423371


                                                                                

The list of all recommended games:
1. Counter-Strike Global Offensive
2. Left 4 Dead 2
3. Garry's Mod
4. PAYDAY 2
5. Unturned


### 2) Neural Collaborative Filtering (NCF) - Tensorflow --- NeuMF

*Data Prep*

In [55]:
# Collecting Data and Mapping User/Game IDs to Indexes
ncf_data = data_indexed.select('user_id', 'name', 'normalized_hours').collect() 
user_ids = data_indexed.select('user_id').distinct().rdd.flatMap(lambda x:x).collect()
game_names = data_indexed.select('name').distinct().rdd.flatMap(lambda x:x).collect()

In [62]:
# Mapping User and Game IDs to Indexes
userid_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
game_to_index = {name: index for index, name in enumerate(game_names)}

In [64]:
# Functions to Map Users and Games to Indexes
def get_user_index(user_id):
    return userid_to_index.get(user_id, -1)

def get_game_index(game_name):
    return game_to_index.get(game_name, -1)
  
# Creating UDFs (User Defined Functions) to Use in PySpark
user_index_udf = udf(get_user_index, IntegerType())
game_index_udf = udf(get_game_index, IntegerType())

# Applying the UDFs to the DataFrame
indexed_user_data = data_indexed.withColumn("user_index", user_index_udf(data_indexed["user_id"]))
indexed_user_data = indexed_user_data.withColumn("game_index", game_index_udf(indexed_user_data["name"]))

indexed_user_data.show()

                                                                                

+-------+--------------------+-----+--------------------+-------+----------+----------+
|user_id|                name|hours|    normalized_hours|game_id|user_index|game_index|
+-------+--------------------+-----+--------------------+-------+----------+----------+
|   5250|     Cities Skylines|144.0| 0.01225114854517611|  198.0|      5039|      2500|
|   5250|      Counter-Strike|  0.0|                 0.0|    7.0|      5039|      2653|
|   5250|Counter-Strike So...|  0.0|                 0.0|    5.0|      5039|        30|
|   5250|       Day of Defeat|  0.0|                 0.0|   21.0|      5039|      1449|
|   5250|              Dota 2|  0.2|1.701548409052237...|    0.0|      5039|         0|
|   5250|Half-Life 2 Episo...|  0.0|                 0.0|   30.0|      5039|      2137|
|   5250|Half-Life Blue Shift|  0.0|                 0.0|   40.0|      5039|       321|
|   5250|Half-Life Opposin...|  0.0|                 0.0|   36.0|      5039|      1094|
|  76767|Age of Empires II...| 1

In [66]:
# Converting Data to NumPy Arrays
X = np.array(indexed_user_data.select("user_index", "game_index").collect())
y = np.array(indexed_user_data.select("normalized_hours").collect())

                                                                                

*Model*

In [67]:
class NCFModel(Model):
    def __init__(self, num_users, num_games, embedding_dim=8):
        super(NCFModel, self).__init__()
        
        # embedding layers for users and games
        self.user_embedding = layers.Embedding(num_users, embedding_dim)
        self.game_embedding = layers.Embedding(num_games, embedding_dim)

        # MLP (Multi-Layer Perceptron) layers
        self.fc1 = layers.Dense(64, activation='relu')
        self.fc2 = layers.Dense(32, activation='relu')
        self.fc3 = layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        game_vector = self.game_embedding(inputs[:, 1])

        # merge embeddings
        interaction = layers.concatenate([user_vector, game_vector])

        # Fully Connected layers
        x = self.fc1(interaction)
        x = self.fc2(x)
        return self.fc3(x)


In [72]:
#model
embedding_dim = 32
num_users = len(user_ids)
num_games = len(game_names)

model_ncf = NCFModel(num_users, num_games, embedding_dim)

In [73]:
model_ncf.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
model_ncf.fit(X, y, epochs=10, batch_size=32)

Epoch 1/10
[1m4026/4026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 0.0144 - mae: 0.0435
Epoch 2/10
[1m4026/4026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 1.8845e-04 - mae: 0.0032
Epoch 3/10
[1m4026/4026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 1.9613e-04 - mae: 0.0034
Epoch 4/10
[1m4026/4026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 2.0625e-04 - mae: 0.0032
Epoch 5/10
[1m4026/4026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 2.0204e-04 - mae: 0.0032
Epoch 6/10
[1m4026/4026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 1.8206e-04 - mae: 0.0033
Epoch 7/10
[1m4026/4026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 1.8975e-04 - mae: 0.0032
Epoch 8/10
[1m4026/4026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 1.9784e-04 - mae: 0.0032
Epoch 9/10
[1m4026/4026[0m [32m━━

<keras.src.callbacks.history.History at 0x30c5f42c0>

*Recommender System*

In [74]:
def recommend_games(user_id, model, game_to_index, top_n=5):
    user_index = userid_to_index[user_id]
    user_vector = np.array([user_index])

    # game index
    game_indices = np.array(range(len(game_to_index)))

    # merge user index and game index
    user_game_pairs = np.array(np.meshgrid(user_vector, game_indices)).T.reshape(-1, 2)

    predictions = model.predict(user_game_pairs).flatten()

    # top games
    top_games_indices = np.argsort(predictions)[-top_n:][::-1]
    recommended_games = [game_names[i] for i in top_games_indices]

    return recommended_games


recommended_games = recommend_games(1423371, model_ncf, game_to_index)
print("Recommended games for user 1423371:", recommended_games)

[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 380us/step
Recommended games for user 1423371: ['Reckless Ruckus', 'Arcadia', "Tom Clancy's Ghost Recon Phantoms - EU Looks and Power (Recon)", 'UFO Afterlight - Old Version', 'Overcast - Walden and the Werewolf - Soundtrack']
