### Libs

In [1]:
import pandas as pd
import numpy as np
import ast
import requests
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

In [2]:
pd.set_option('display.max_colwidth', 200)

### Upload files and transform data

In [3]:
df_top_100_game_reviews = pd.read_csv('../../data/top_100_game_reviews.gz', compression='gzip')
df_users_profile = pd.read_csv('../../data/users_profile.csv')
df_users_friendlist = pd.read_csv('../../data/users_friendlist.csv')
df_users_owned_games = pd.read_csv('../../data/users_owned_games.csv')
df_users_recently_played_games = pd.read_csv('../../data/users_recently_played_games.csv')

  df_top_100_game_reviews = pd.read_csv('../../data/top_100_game_reviews.gz', compression='gzip')


In [4]:
# Add our steam ids for tests
key = "06273B6FCBD23D875865DFC7E0EAFC06"
list_top_100_game_reviews = [76561198080989870, 76561199062172023, 76561198164574454]
#list_top_100_game_reviews = [76561199062172023]
users_owned_games = []
for steamid in list_top_100_game_reviews:
    dict_users_owned_games = {}
    url = f"http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={key}&steamid={steamid}&include_appinfo=true&include_extended_appinfo=true&format=json"
    response = requests.get(url)
    #print(response)
    #print(response.json())
    dict_users_owned_games['user_steamid'] =  steamid
    try: # in case of no owned games
        dict_users_owned_games['user_owned_games'] =  response.json()['response']['games']
        users_owned_games.append(dict_users_owned_games)
    except:
        pass

# Concat in users df
users_owned_games
df_users_owned_games_team = pd.DataFrame(users_owned_games)
df_users_owned_games = pd.concat([df_users_owned_games,df_users_owned_games_team])

In [5]:
df_users_owned_games_team

Unnamed: 0,user_steamid,user_owned_games
0,76561198080989870,"[{'appid': 72850, 'name': 'The Elder Scrolls V: Skyrim', 'playtime_forever': 3889, 'img_icon_url': 'b9aca8a189abd8d6aaf09047dbb0f57582683e1c', 'has_community_visible_stats': True, 'playtime_window..."
1,76561199062172023,"[{'appid': 322500, 'name': 'SUPERHOT', 'playtime_forever': 408, 'img_icon_url': '81839f0d50cb3e54c9aa7c69c04916f1e53d8c35', 'has_community_visible_stats': True, 'capsule_filename': 'library_600x90..."
2,76561198164574454,"[{'appid': 32470, 'name': 'STAR WARS™ Empire at War: Gold Pack', 'playtime_forever': 320, 'img_icon_url': '3f65d9be3af3083c07f1053dbf0b0653af7323b8', 'capsule_filename': '82795235c7d4481a68914f066..."


In [6]:
users_owned_games[0]['user_owned_games'][7]

{'appid': 1174180,
 'name': 'Red Dead Redemption 2',
 'playtime_forever': 5576,
 'img_icon_url': '5106abd9c1187a97f23295a0ba9470c94804ec6c',
 'has_community_visible_stats': True,
 'playtime_windows_forever': 5576,
 'playtime_mac_forever': 0,
 'playtime_linux_forever': 0,
 'playtime_deck_forever': 0,
 'rtime_last_played': 1741484617,
 'capsule_filename': 'library_600x900.jpg',
 'has_workshop': False,
 'has_market': False,
 'has_dlc': True,
 'content_descriptorids': [5],
 'playtime_disconnected': 0}

In [7]:
# Base table for users and games played (items)
df_users_owned_games.tail(3)


Unnamed: 0,user_steamid,user_owned_games
0,76561198080989870,"[{'appid': 72850, 'name': 'The Elder Scrolls V: Skyrim', 'playtime_forever': 3889, 'img_icon_url': 'b9aca8a189abd8d6aaf09047dbb0f57582683e1c', 'has_community_visible_stats': True, 'playtime_window..."
1,76561199062172023,"[{'appid': 322500, 'name': 'SUPERHOT', 'playtime_forever': 408, 'img_icon_url': '81839f0d50cb3e54c9aa7c69c04916f1e53d8c35', 'has_community_visible_stats': True, 'capsule_filename': 'library_600x90..."
2,76561198164574454,"[{'appid': 32470, 'name': 'STAR WARS™ Empire at War: Gold Pack', 'playtime_forever': 320, 'img_icon_url': '3f65d9be3af3083c07f1053dbf0b0653af7323b8', 'capsule_filename': '82795235c7d4481a68914f066..."


In [8]:
# Transform the user_owned_games JSON-like column
#df_users_owned_games["parsed_owned_games"] = df_users_owned_games["user_owned_games"].apply(ast.literal_eval)

# To avoid errors with team data later updated
def safe_parse_owned_games(val):
    if isinstance(val, str):
        try:
            return ast.literal_eval(val)
        except Exception as e:
            return None
    return val  # If it's already a list/dict, just return as is

df_users_owned_games["parsed_owned_games"] = df_users_owned_games["user_owned_games"].apply(safe_parse_owned_games)


In [9]:
# Flatten the data into rows of (user, appid, playtime)
rows = []
for _, row in df_users_owned_games.iterrows():
    user_id = row["user_steamid"]
    for game in row["parsed_owned_games"]:
        appid = game.get("appid")
        playtime = game.get("playtime_forever", 0)
        if appid is not None:
            rows.append((user_id, appid, playtime))

In [10]:
# Create a DataFrame from the flattened rows
interaction_df = pd.DataFrame(rows, columns=["user_steamid", "appid", "playtime_forever"])
interaction_df

Unnamed: 0,user_steamid,appid,playtime_forever
0,76561198974520522,10,1
1,76561198974520522,80,0
2,76561198974520522,100,434
3,76561198974520522,300,1
4,76561198974520522,20,82
...,...,...,...
2443530,76561198164574454,1716740,61
2443531,76561198164574454,1086940,10499
2443532,76561198164574454,2767030,0
2443533,76561198164574454,2322010,0


In [11]:
# Filter only top 100 played games before sparse matrix
df_top_100_game_details = pd.read_csv('../../data/top_100_game_details.csv')
top_100_games = list(df_top_100_game_details['appid'])
interaction_df = interaction_df[interaction_df['appid'].isin(top_100_games)]

In [12]:
# Binarize playtime as implicit feedback (1 if played > x minutes)
interaction_df["interaction"] = (interaction_df["playtime_forever"] > 1200).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interaction_df["interaction"] = (interaction_df["playtime_forever"] > 1200).astype(int)


In [13]:
# Pivot to a user-item matrix
user_item_matrix = interaction_df.pivot_table(
    index="user_steamid", columns="appid", values="interaction", fill_value=0
)

In [14]:
user_item_matrix.head()

appid,10,70,80,220,240,320,400,550,730,4000,...,1665460,1811260,1938090,1966720,2186680,2246340,2358720,2694490,3164500,3241660
user_steamid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561197960281451,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960351723,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
76561197960420790,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960432447,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960441967,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [22]:
# Save .csv
user_item_matrix.to_csv('../../data/memory_based_user_item_matrix.csv', index=False)

In [16]:
# Compute item-item similarity using cosine similarity
item_user_matrix = user_item_matrix.T  # Transpose to get items x users
item_similarity = cosine_similarity(item_user_matrix)

In [17]:
# Build item similarity DataFrame for exploration
item_similarity_df = pd.DataFrame(
    item_similarity, index=item_user_matrix.index, columns=item_user_matrix.index
)

In [18]:
item_similarity_df

appid,10,70,80,220,240,320,400,550,730,4000,...,1665460,1811260,1938090,1966720,2186680,2246340,2358720,2694490,3164500,3241660
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,1.000000,0.215457,0.485875,0.178637,0.362624,0.200488,0.178914,0.246522,0.286560,0.183561,...,0.053504,0.058768,0.125873,0.082821,0.007432,0.022631,0.016511,0.059543,0.086033,0.053714
70,0.215457,1.000000,0.190717,0.459617,0.244357,0.240023,0.291543,0.242072,0.222847,0.234106,...,0.032027,0.014658,0.090089,0.123941,0.007415,0.048919,0.021963,0.033415,0.066324,0.065955
80,0.485875,0.190717,1.000000,0.132926,0.264436,0.163269,0.151771,0.151490,0.169537,0.109160,...,0.054464,0.024926,0.092850,0.056205,0.000000,0.000000,0.028012,0.025255,0.046442,0.028040
220,0.178637,0.459617,0.132926,1.000000,0.322753,0.246254,0.322466,0.357632,0.306063,0.348653,...,0.028573,0.019615,0.115078,0.169544,0.059533,0.073018,0.058781,0.072042,0.093977,0.099294
240,0.362624,0.244357,0.264436,0.322753,1.000000,0.244507,0.193952,0.370878,0.393286,0.342335,...,0.043501,0.039817,0.196523,0.138415,0.040283,0.079222,0.059662,0.143720,0.119230,0.086783
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2246340,0.022631,0.048919,0.000000,0.073018,0.079222,0.038657,0.028748,0.196496,0.203068,0.206767,...,0.008597,0.017705,0.169278,0.188526,0.107475,1.000000,0.163598,0.227225,0.113104,0.132781
2358720,0.016511,0.021963,0.028012,0.058781,0.059662,0.009401,0.034956,0.097846,0.160209,0.086516,...,0.037633,0.068891,0.137933,0.058253,0.069697,0.163598,1.000000,0.113425,0.087100,0.067810
2694490,0.059543,0.033415,0.025255,0.072042,0.143720,0.000000,0.018909,0.183100,0.243454,0.154503,...,0.000000,0.017469,0.160509,0.091909,0.088365,0.227225,0.113425,1.000000,0.120891,0.088430
3164500,0.086033,0.066324,0.046442,0.093977,0.119230,0.040079,0.054643,0.160069,0.237467,0.220677,...,0.008913,0.018357,0.196018,0.174765,0.043333,0.113104,0.087100,0.120891,1.000000,0.192731


In [20]:
def recommend_games_for_user(user_id, user_item_matrix, item_similarity_df, top_n=5):
    # Get the user’s interaction vector
    if user_id not in user_item_matrix.index:
        print(f"User {user_id} not found.")
        return []

    #user_vector = user_item_matrix.loc[user_id]
    #interacted_games = user_vector[user_vector > 0].index.tolist() # Only recommended? So more than X hours? Better to filter all played?
    # Instead of using binarized matrix, use original interaction_df:
    played_games = interaction_df[interaction_df["user_steamid"] == user_id]["appid"].unique()

    # Store recommendation scores
    scores = pd.Series(dtype=float)

    for game in played_games:
        similar_scores = item_similarity_df[game]
        scores = scores.add(similar_scores, fill_value=0)

    # Remove games the user has already interacted with
    #scores = scores.drop(labels=interacted_games, errors="ignore")
    # Remove all games the user ever played, not just liked ones
    scores = scores.drop(labels=played_games, errors="ignore")


    # Get top N recommendations
    top_recommendations = scores.sort_values(ascending=False).head(top_n)
    top_recommendations = pd.DataFrame(top_recommendations).reset_index().rename(columns={0: 'similarity_score'})
    #print(top_recommendations)
    top_recommendations = pd.merge(top_recommendations, df_top_100_game_details, on='appid')
    #print(top_recommendations)
    top_recommendations = top_recommendations[['appid','name','similarity_score']]
    return top_recommendations


In [21]:
#example_user_id = user_item_matrix.index[3]
#example_user_id = 76561198080989870
#example_user_id = 76561198164574454
example_user_id = 76561199062172023

# Get recommendations
recommendations = recommend_games_for_user(example_user_id, user_item_matrix, item_similarity_df, top_n=15)
print(f'---\nUser: {example_user_id}\n')
# Get most played games
games = list(df_users_owned_games[df_users_owned_games['user_steamid']==example_user_id]['parsed_owned_games'])[0]
games_sorted = sorted(games, key=lambda x: x['playtime_forever'], reverse=True)
print('---\nUser most played games:')
for game in games_sorted[:5]:
    if game['playtime_forever']>=1200 > 0:
        print(f"- {game['name']}")
    else:
        pass
print("\n---\nTop 5 Recommended Games:")
print(recommendations)

---
User: 76561199062172023

---
User most played games:
- Total War: THREE KINGDOMS
- A Total War Saga: TROY
- Valheim
- XCOM: Chimera Squad
- Half-Life: Alyx

---
Top 5 Recommended Games:
      appid                               name  similarity_score
0    553850                      HELLDIVERS™ 2          0.780044
1    105600                           Terraria          0.769772
2   1245620                         ELDEN RING          0.729449
3       730                   Counter-Strike 2          0.718809
4    413150                     Stardew Valley          0.704047
5    251570                      7 Days to Die          0.702924
6      4000                        Garry's Mod          0.696884
7   1086940                    Baldur's Gate 3          0.693066
8   1091500                     Cyberpunk 2077          0.675286
9    242760                         The Forest          0.667308
10  1623730                           Palworld          0.666795
11   377160                   