In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('steam-200k.csv', usecols=[0, 1, 2, 3],
                 names=['USER ID', 'GAME TITLE', 'BEHAVIOUR', 'VALUE'])

In [3]:
# import pandas_profiling
# df.profile_report()

In [4]:
df['HOURS'] = df['VALUE']
df.loc[(df['BEHAVIOUR'] == 'purchase'), 'HOURS'] = 0
df = df.sort_values(['USER ID', 'GAME TITLE', 'HOURS'])
df = df.drop_duplicates(['USER ID', 'GAME TITLE'], keep='last').drop(columns=['BEHAVIOUR', 'VALUE'])

In [5]:
# n1 = len(df['USER ID'].unique())
# n2 = len(df['GAME TITLE'].unique())

In [6]:
# df_train, df_test = train_test_split(df, random_state=42)

In [7]:
user2idx = {user: i for i, user in enumerate(df['USER ID'].unique())}
idx2user = {i: user for user, i in user2idx.items()}

game2idx = {game: i for i, game in enumerate(df['GAME TITLE'].unique())}
idx2game = {i: game for game, i in game2idx.items()}

In [8]:
matrix = df.pivot(index='USER ID', columns='GAME TITLE', values='HOURS')

In [9]:
matrix += 1

In [10]:
conf_matrix = matrix.fillna(0)

In [11]:
# train_conf_matrix, test_conf_matrix = train_test_split(conf_matrix, random_state=42, test_size=0.1)

In [12]:
pref_matrix = conf_matrix.astype(bool).astype(int)

In [13]:
# train_pref_matrix = train_conf_matrix.astype(bool).astype(int)
# test_pref_matrix = test_conf_matrix.astype(bool).astype(int)

In [14]:
# def next_batch(matrix, batch_size=256):
#     for i in range(0, matrix.shape[0], batch_size):
#         yield matrix[i: i + batch_size]

In [15]:
# def train(matrix, identity, learning_rate=0.01, max_iter=10, rank=10):
#     n1 = matrix.shape[0]
#     n2 = matrix.shape[1]
# #     loss_history = []
#     u = np.random.normal(size=(n1, rank))
#     v = np.random.normal(size=(rank, n2))
#     for k in range(max_iter):
#         for i in range(n1):
#             print(np.dot(u[i], v).shape)
#             print((matrix[i] - np.dot(u[i], v)).shape)
#             print(np.dot(identity[i], matrix[i] - np.dot(u[i], v)).shape)
#             print(np.dot(np.dot(identity[i], matrix[i] - np.dot(u[i], v)), v).shape)
#             u[i] += - learning_rate * (np.dot(np.dot(identity[i], matrix[i] - np.dot(u[i], v)), v) - u[i])
#         for j in range(n2):
#             v[j] += - learning_rate * (np.dot(np.dot(identity.T[j], matrix.T[j] - np.dot(u, v[j])), u) - v[j])
#     return u, v

In [16]:
def train(matrix, identity, learning_rate=0.01, max_iter=3, rank=2):
    n1 = matrix.shape[0]
    n2 = matrix.shape[1]
    u = np.random.normal(size=(n1, rank))
    v = np.random.normal(size=(n2, rank))
    for k in range(max_iter):
#         print(f'epoch {k}')
        for i in range(n1):
            for j in range(n2):
                s = identity[i][j] * (matrix[i][j] - np.dot(u[i], v[j])) * v[j]
            u[i] += - learning_rate * (s + u[i])
        for j in range(n2):
            for i in range(n1):
                s = identity[i][j] * (matrix[i][j] - np.dot(u[i], v[j])) * u[i]
            v[j] += - learning_rate * (s + v[j])
    return u, v

In [17]:
# conf_matrix = conf_matrix.iloc[:5000, :2000]
# pref_matrix = pref_matrix.iloc[:5000, :2000]

In [18]:
# conf_matrix.shape, pref_matrix.shape

((5000, 2000), (5000, 2000))

In [19]:
u, v = train(conf_matrix.values, pref_matrix.values)

epoch 0
epoch 1
epoch 2


In [23]:
# user2idx = {user: i for i, user in enumerate(df['USER ID'].unique()[:5000])}
# idx2user = {i: user for user, i in user2idx.items()}

# game2idx = {game: i for i, game in enumerate(df['GAME TITLE'].unique()[:2000])}
# idx2game = {i: game for game, i in game2idx.items()}

In [31]:
for user_idx in np.random.choice(list(idx2user.keys()), size=4, replace=False):
    user = idx2user[user_idx]
    games = {}
    for game_idx, game in idx2game.items():
        games[game] = np.dot(u[user_idx], v[game_idx])
    games = sorted(games.items(), key= lambda item: item[1], reverse=True)
    top10 = [games[i][0] for i in range(10)]
    print(f'\nTop 10 Recommended Games for user #{user}:\n')
    print(*top10, sep='\n')


Top 10 Recommended Games for user #126843636:

Trine 2
Pirates of Black Cove
Pre-Civilization Marble Age
Magic The Gathering - Duels of the Planeswalkers 2013
Divinity Dragon Commander
Mirror's Edge
GRID 2 GTR Racing Pack
Resident Evil 5 / Biohazard 5
Toy Soldiers
Insaniquarium! Deluxe

Top 10 Recommended Games for user #67713900:

Democracy 3
Natural Selection 2
Shadows on the Vatican - Act I Greed
Solar Flux
Bridge Constructor
Hacker Evolution Duality
Ghost Master
AdVenture Capitalist
Mount & Blade Warband - Viking Conquest Reforged Edition
Hector Ep 2

Top 10 Recommended Games for user #137977132:

Democracy 3
Fallen Enchantress Legendary Heroes
NyxQuest
AdVenture Capitalist
Hacker Evolution Duality
Pinball FX2 - Paranormal Table
Ghost Master
Spore Galactic Adventures
Natural Selection 2
Solar Flux

Top 10 Recommended Games for user #31944667:

Trine 2
Section 8 Prejudice
TERA
GRID 2 GTR Racing Pack
Geometry Wars Retro Evolved
Divinity Dragon Commander
Dungeon Siege 2
Magic The Gat