### Libs

In [13]:
import pandas as pd
import numpy as np
import ast
import requests
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD

In [2]:
pd.set_option('display.max_colwidth', 200)

### Upload files and transform data

In [3]:
df_top_100_game_reviews = pd.read_csv('../../data/top_100_game_reviews.gz', compression='gzip')
df_users_profile = pd.read_csv('../../data/users_profile.csv')
df_users_friendlist = pd.read_csv('../../data/users_friendlist.csv')
df_users_owned_games = pd.read_csv('../../data/users_owned_games.csv')
df_users_recently_played_games = pd.read_csv('../../data/users_recently_played_games.csv')

  df_top_100_game_reviews = pd.read_csv('../../data/top_100_game_reviews.gz', compression='gzip')


In [4]:
# Add our steam ids for tests
key = "06273B6FCBD23D875865DFC7E0EAFC06"
list_top_100_game_reviews = [76561198080989870, 76561199062172023, 76561198164574454]
#list_top_100_game_reviews = [76561199062172023]
users_owned_games = []
for steamid in list_top_100_game_reviews:
    dict_users_owned_games = {}
    url = f"http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={key}&steamid={steamid}&include_appinfo=true&include_extended_appinfo=true&format=json"
    response = requests.get(url)
    #print(response)
    #print(response.json())
    dict_users_owned_games['user_steamid'] =  steamid
    try: # in case of no owned games
        dict_users_owned_games['user_owned_games'] =  response.json()['response']['games']
        users_owned_games.append(dict_users_owned_games)
    except:
        pass

# Concat in users df
users_owned_games
df_users_owned_games_team = pd.DataFrame(users_owned_games)
df_users_owned_games = pd.concat([df_users_owned_games,df_users_owned_games_team])

In [5]:
df_users_owned_games_team

Unnamed: 0,user_steamid,user_owned_games
0,76561198080989870,"[{'appid': 72850, 'name': 'The Elder Scrolls V: Skyrim', 'playtime_forever': 3889, 'img_icon_url': 'b9aca8a189abd8d6aaf09047dbb0f57582683e1c', 'has_community_visible_stats': True, 'playtime_window..."
1,76561199062172023,"[{'appid': 322500, 'name': 'SUPERHOT', 'playtime_forever': 408, 'img_icon_url': '81839f0d50cb3e54c9aa7c69c04916f1e53d8c35', 'has_community_visible_stats': True, 'capsule_filename': 'library_600x90..."
2,76561198164574454,"[{'appid': 32470, 'name': 'STAR WARS™ Empire at War: Gold Pack', 'playtime_forever': 320, 'img_icon_url': '3f65d9be3af3083c07f1053dbf0b0653af7323b8', 'capsule_filename': '82795235c7d4481a68914f066..."


In [6]:
users_owned_games[0]['user_owned_games'][7]

{'appid': 1174180,
 'name': 'Red Dead Redemption 2',
 'playtime_forever': 5576,
 'img_icon_url': '5106abd9c1187a97f23295a0ba9470c94804ec6c',
 'has_community_visible_stats': True,
 'playtime_windows_forever': 5576,
 'playtime_mac_forever': 0,
 'playtime_linux_forever': 0,
 'playtime_deck_forever': 0,
 'rtime_last_played': 1741484617,
 'capsule_filename': 'library_600x900.jpg',
 'has_workshop': False,
 'has_market': False,
 'has_dlc': True,
 'content_descriptorids': [5],
 'playtime_disconnected': 0}

In [7]:
# Base table for users and games played (items)
df_users_owned_games.tail(3)


Unnamed: 0,user_steamid,user_owned_games
0,76561198080989870,"[{'appid': 72850, 'name': 'The Elder Scrolls V: Skyrim', 'playtime_forever': 3889, 'img_icon_url': 'b9aca8a189abd8d6aaf09047dbb0f57582683e1c', 'has_community_visible_stats': True, 'playtime_window..."
1,76561199062172023,"[{'appid': 322500, 'name': 'SUPERHOT', 'playtime_forever': 408, 'img_icon_url': '81839f0d50cb3e54c9aa7c69c04916f1e53d8c35', 'has_community_visible_stats': True, 'capsule_filename': 'library_600x90..."
2,76561198164574454,"[{'appid': 32470, 'name': 'STAR WARS™ Empire at War: Gold Pack', 'playtime_forever': 320, 'img_icon_url': '3f65d9be3af3083c07f1053dbf0b0653af7323b8', 'capsule_filename': '82795235c7d4481a68914f066..."


In [8]:
# Transform the user_owned_games JSON-like column
#df_users_owned_games["parsed_owned_games"] = df_users_owned_games["user_owned_games"].apply(ast.literal_eval)

# To avoid errors with team data later updated
def safe_parse_owned_games(val):
    if isinstance(val, str):
        try:
            return ast.literal_eval(val)
        except Exception as e:
            return None
    return val  # If it's already a list/dict, just return as is

df_users_owned_games["parsed_owned_games"] = df_users_owned_games["user_owned_games"].apply(safe_parse_owned_games)


In [9]:
# Flatten the data into rows of (user, appid, playtime)
rows = []
for _, row in df_users_owned_games.iterrows():
    user_id = row["user_steamid"]
    for game in row["parsed_owned_games"]:
        appid = game.get("appid")
        playtime = game.get("playtime_forever", 0)
        if appid is not None:
            rows.append((user_id, appid, playtime))

In [10]:
# Create a DataFrame from the flattened rows
interaction_df = pd.DataFrame(rows, columns=["user_steamid", "appid", "playtime_forever"])
interaction_df

Unnamed: 0,user_steamid,appid,playtime_forever
0,76561198974520522,10,1
1,76561198974520522,80,0
2,76561198974520522,100,434
3,76561198974520522,300,1
4,76561198974520522,20,82
...,...,...,...
2443530,76561198164574454,1716740,61
2443531,76561198164574454,1086940,10499
2443532,76561198164574454,2767030,0
2443533,76561198164574454,2322010,0


In [11]:
# Filter only top 100 played games before sparse matrix
df_top_100_game_details = pd.read_csv('../../data/top_100_game_details.csv')
top_100_games = list(df_top_100_game_details['appid'])
interaction_df = interaction_df[interaction_df['appid'].isin(top_100_games)]

In [14]:
# Apply Truncated SVD to reduce dimensions
n_components = 50  # Number of latent factors (check to be tuned)
svd = TruncatedSVD(n_components=n_components, random_state=42)

# Test with playtime instead of binary
user_item_matrix = interaction_df.pivot_table(
    index="user_steamid", columns="appid", values="playtime_forever", fill_value=0
)

user_latent_matrix = svd.fit_transform(user_item_matrix)
item_latent_matrix = svd.components_.T  # transpose to get items x latent dimensions

In [15]:
user_item_matrix

appid,10,70,80,220,240,320,400,550,730,4000,...,1665460,1811260,1938090,1966720,2186680,2246340,2358720,2694490,3164500,3241660
user_steamid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561197960281451,2136.0,131.0,22.0,2306.0,1109.0,46.0,47.0,1984.0,1533.0,193.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960351723,1409.0,773.0,0.0,14903.0,0.0,1409.0,10240.0,0.0,85072.0,0.0,...,0.0,0.0,51484.0,49960.0,0.0,0.0,0.0,0.0,3542.0,26763.0
76561197960420790,0.0,862.0,0.0,815.0,29.0,0.0,7.0,230.0,25.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960432447,6075.0,0.0,5997.0,0.0,6075.0,0.0,0.0,1183.0,77103.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960441967,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,5514.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76561199824167814,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,42.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561199834510455,11615.0,5143.0,5518.0,3875.0,18289.0,15933.0,2847.0,19404.0,27856.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561199848746905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,285.0,430.0,0.0,...,0.0,0.0,192.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561199861665926,0.0,0.0,0.0,0.0,0.0,0.0,526.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
user_latent_matrix

array([[ 3.24826622e+03,  1.83058079e+03,  6.76920992e+03, ...,
        -5.79370850e+01,  1.12766029e+02,  1.56112469e+02],
       [ 6.22314177e+04,  1.01099036e+05,  1.02399489e+03, ...,
        -7.70256259e+02,  5.33411672e+03, -1.73000549e+04],
       [ 1.01440931e+03,  9.79303583e+02,  5.27520212e+02, ...,
        -1.57478362e+03,  2.46503374e+03, -9.86755653e+02],
       ...,
       [ 3.11673202e+02,  3.30669513e+02, -7.65954958e+01, ...,
        -8.34588193e+00,  1.33700428e+01, -4.47899473e+00],
       [ 1.70717390e+02,  8.14929220e+01,  1.16534442e+00, ...,
         2.79335438e+01, -4.89576286e+01, -4.64073370e-01],
       [ 5.66560524e+00,  1.20008268e+01,  1.27050754e+01, ...,
        -2.55778773e+00,  2.77099243e+01, -5.88065547e+00]])

In [17]:
item_latent_matrix

array([[ 2.97860442e-01, -1.16644166e-01, -1.56122934e-03, ...,
        -6.68061178e-02,  8.65948479e-02, -1.56586847e-02],
       [ 2.71892035e-01, -1.63865015e-01,  1.67718669e-02, ...,
        -4.58782424e-03, -4.54525438e-03,  6.26383259e-02],
       [ 2.78272245e-01, -1.63634673e-01,  1.37499236e-02, ...,
         5.89645640e-02, -4.09775270e-02, -3.24498026e-02],
       ...,
       [ 1.40955995e-03,  6.52266950e-03,  3.56945414e-03, ...,
         9.50674750e-03,  3.03488391e-02,  1.88278666e-03],
       [ 5.31797229e-04,  2.31056517e-03,  7.83373980e-04, ...,
         4.02186006e-03,  6.68371929e-03,  1.00724244e-02],
       [ 3.77861568e-04,  1.77145793e-03,  7.81697844e-04, ...,
        -2.83780136e-03,  3.76393922e-03, -2.42494464e-04]])

In [21]:
# Save .csv
user_item_matrix.to_csv('../../data/matrix_based_user_item_matrix.csv', index=False)

In [22]:
# Compute predicted interaction matrix
predicted_ratings = user_latent_matrix @ item_latent_matrix.T  # matrix multiplication

In [23]:
predicted_ratings

array([[ 2.11739018e+03,  6.68169128e+02,  6.55573146e+02, ...,
         3.60440027e+01,  9.73602945e+00,  1.01120735e+01],
       [ 2.10654715e+02,  7.67698051e+03,  3.21717486e+03, ...,
         1.17167303e+03,  1.05425966e+03,  1.81603064e+03],
       [-1.58090470e+02,  4.23143650e+02,  6.01517326e+01, ...,
         6.50527137e-01,  6.06300907e+01,  5.16646736e+01],
       ...,
       [ 4.42928894e+00,  7.09886754e+00,  1.95593710e+00, ...,
        -2.28243735e+00,  1.06100255e+00,  4.20211958e-01],
       [-6.34975736e+00,  1.22505999e+02,  1.21632654e+02, ...,
        -2.51574636e+00,  2.13806718e+00,  4.95112429e+00],
       [ 7.71168044e+00, -4.55851715e+00, -5.96342514e+00, ...,
         8.89002158e-01,  5.50714128e-01,  8.70888598e-01]])

In [24]:
# Convert predicted scores into a DataFrame for easy lookup
predicted_df = pd.DataFrame(
    predicted_ratings,
    index=user_item_matrix.index,
    columns=user_item_matrix.columns
)

In [25]:
predicted_df

appid,10,70,80,220,240,320,400,550,730,4000,...,1665460,1811260,1938090,1966720,2186680,2246340,2358720,2694490,3164500,3241660
user_steamid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561197960281451,2117.390177,668.169128,655.573146,835.990974,1137.603872,52.903654,488.478278,1954.769488,1524.212538,199.880740,...,1.647635,-62.282864,195.663109,72.973691,-6.921247,69.505754,2.524684,36.044003,9.736029,10.112074
76561197960351723,210.654715,7676.980515,3217.174865,11035.199204,-539.325561,1257.956101,4160.552158,1077.066803,85205.565157,676.844304,...,1903.480681,4069.299352,57049.356629,4112.679527,985.756948,4702.152725,1016.379331,1171.673025,1054.259659,1816.030637
76561197960420790,-158.090470,423.143650,60.151733,1366.015906,-33.201871,-24.227126,-53.705084,305.734298,-10.178959,13.839066,...,63.537629,-45.919204,-141.313427,129.097953,52.122454,-123.684956,8.959050,0.650527,60.630091,51.664674
76561197960432447,6197.780659,1249.802151,2014.683061,1244.736491,6096.159985,-7.591029,1446.756181,1178.877692,77099.221368,-20.928740,...,225.366500,306.646064,-42.194261,294.938896,-29.911335,-14.825756,64.422303,291.382564,-29.727659,-16.592598
76561197960441967,-212.612392,812.398538,-118.907417,202.425899,-122.142248,15.328882,-296.047980,66.213095,17.819218,-50.568423,...,155.446956,-173.914599,-102.022727,-94.767314,-24.854257,5.167159,48.423419,-30.802115,-5.369299,21.919614
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76561199824167814,10.496159,348.938461,-218.877032,-99.831278,13.646021,-1.616066,-215.558416,37.880657,52.704088,0.753775,...,288.839374,-166.787489,-397.717084,-71.142561,-47.462918,-40.574329,20.576638,-35.532319,13.541084,-15.641498
76561199834510455,11623.122624,4566.427675,4719.209857,4378.432179,18259.943025,15931.010618,3900.881891,19354.130804,27835.990289,-13.428114,...,319.377901,584.698475,-124.548089,259.894378,-36.046028,18.858685,-13.779538,81.303891,-29.308469,19.909334
76561199848746905,4.429289,7.098868,1.955937,2.071491,-6.219230,0.633630,-5.831251,290.447053,428.991462,-0.769945,...,3.110327,2.190579,183.840241,6.535870,3.642387,-1.685969,1.976457,-2.282437,1.061003,0.420212
76561199861665926,-6.349757,122.505999,121.632654,125.757491,-3.298891,-0.079951,125.033709,14.076503,2.499663,-0.161899,...,-3.568412,11.286795,-4.140191,1.746277,0.199121,1.190423,1.206199,-2.515746,2.138067,4.951124


In [26]:
def recommend_games_from_matrix(user_id, predicted_df, interaction_df, df_game_details, top_n=5):
    if user_id not in predicted_df.index:
        print(f"User {user_id} not found.")
        return []

    # Predicted scores for this user
    user_scores = predicted_df.loc[user_id].copy()

    # Remove already played games
    played_games = interaction_df[interaction_df["user_steamid"] == user_id]["appid"].unique()
    user_scores = user_scores.drop(labels=played_games, errors='ignore')

    # Get top N predictions
    top_recommendations = user_scores.sort_values(ascending=False).head(top_n).reset_index()
    top_recommendations.columns = ['appid', 'similarity_score']

    # Merge with game metadata
    top_recommendations = pd.merge(top_recommendations, df_game_details, on='appid')
    top_recommendations = top_recommendations[['appid', 'name', 'similarity_score']]

    return top_recommendations


In [29]:
#example_user_id = user_item_matrix.index[3]
#example_user_id = 76561198080989870
#example_user_id = 76561198164574454
example_user_id = 76561199062172023

# Get recommendations
#recommendations = recommend_games_for_user(example_user_id, user_item_matrix, predicted_df, top_n=15)
recommendations = recommend_games_from_matrix(example_user_id, predicted_df, interaction_df, df_top_100_game_details, top_n=15)
print(f'---\nUser: {example_user_id}\n')
# Get most played games
games = list(df_users_owned_games[df_users_owned_games['user_steamid']==example_user_id]['parsed_owned_games'])[0]
games_sorted = sorted(games, key=lambda x: x['playtime_forever'], reverse=True)
print('---\nUser most played games:')
for game in games_sorted[:5]:
    if game['playtime_forever']>=1200 > 0:
        print(f"- {game['name']}")
    else:
        pass
print("\n---\nTop 5 Recommended Games:")
print(recommendations)

---
User: 76561199062172023

---
User most played games:
- Total War: THREE KINGDOMS
- A Total War Saga: TROY
- Valheim
- XCOM: Chimera Squad
- Half-Life: Alyx

---
Top 5 Recommended Games:
      appid                   name  similarity_score
0    526870           Satisfactory        587.724270
1    413150         Stardew Valley        249.178994
2    255710       Cities: Skylines        174.225221
3   1623730               Palworld        173.215309
4   1174180  Red Dead Redemption 2        151.323514
5    444090              Paladins®        127.688905
6   1086940        Baldur's Gate 3        121.658163
7       400                 Portal        121.385106
8    242760             The Forest        119.240700
9    553850          HELLDIVERS™ 2         99.461321
10   239140            Dying Light         99.354186
11   648800                   Raft         90.427457
12  1938090          Call of Duty®         88.205266
13  1326470     Sons Of The Forest         85.972920
14  2694490    