In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cosine, correlation
from IPython.display import display
import random

# Read user data

In [2]:
all_user_games = pd.read_csv('../data/processed/all_user_games.csv').dropna()
all_user_games = all_user_games[['steamid', 'appid']]
all_user_games.head()

Unnamed: 0,steamid,appid
0,76561198013196620,111800
1,76561198011737710,72850
2,76561198013247670,12210
3,76561198013442400,203770
4,76561198012059990,107410


In [3]:
all_user_games.head()

Unnamed: 0,steamid,appid
0,76561198013196620,111800
1,76561198011737710,72850
2,76561198013247670,12210
3,76561198013442400,203770
4,76561198012059990,107410


In [4]:
print(len(all_user_games.index))

1252208


In [5]:
user_data = pd.read_csv('../data/processed/user_info.csv')

#user_data.head()

# Read game data

In [6]:
game_data = pd.read_csv('../data/processed/games_info_v3.csv')
#game_data[game_data.appid == 570].head()

# Read game genres as text

In [7]:
game_genres_text = pd.read_csv('../data/processed/genres_text.csv')
#game_genres_text.head()

# Get OHE game genres

In [8]:
genre_cols = [col for col in game_data.columns if 'Genre_' in col]
game_genres = game_data[['appid', 'Release_Date'] + genre_cols]
#game_genres.head()

# Get the most played game per user over a 2-week period as a starting point
This game's genres are used as the starting point for the recommender system

In [9]:
users_data_top_game = user_data.sort_values('playtime_2weeks', ascending=False).drop_duplicates('steamid')
#users_data_top_game.head()

# Create dataframes to use with similarity function
Users time with their most played game and the game's genres in one dataframe

Games time and their genres in one dataframe

#### User dataframe for similarity calc

In [10]:
user_genres_sim = users_data_top_game.merge(game_genres, on='appid', how='inner', suffixes=('', '_y'))
user_genres_sim.drop(columns=['playtime_2weeks', 'playtime_forever'], inplace=True)
user_genres_sim.rename(columns = {'user_2weeks_playtime': 'time'}, inplace=True)
user_genres_sim = user_genres_sim.astype('int64')
#user_genres_sim.head()

#### Games dataframe for similarity calc

In [11]:
game_genres_sim = game_data[['average_forever', 'Release_Date'] + genre_cols].copy()
game_genres_sim.rename(columns = {'average_forever': 'time'}, inplace=True)
#game_genres_sim.head()

# Similarity values here!

In [24]:
# Gather user data
rand_val = random.randint(0, len(user_genres_sim.index)-1)
x_data = user_genres_sim.iloc[rand_val].copy()
x_user = x_data[['steamid', 'appid']]
x_data.drop(index=['steamid', 'appid'], axis=1, inplace=True)

# Gather game data
y_data = game_genres_sim

# Generate weights. The common genres are more heavily weighted.
weights = [1.5 if x_data[i] and 'Genre_' in i else 0.5 if 'Release_Date' in i else 1.0 for i in y_data.columns]
print(weights)

# Create cosine similarity using weights
sims = []
for index, row in y_data.iterrows():
    sims.append(1 - cosine(x_data, row, weights))
sims = pd.DataFrame(sims)

# Create dataframe and assign appid to the respective cosine value
cosine_values = pd.concat([sims, game_genres['appid']], axis=1)
cosine_values.rename(columns={0: 'similarity'}, inplace=True)
cosine_values.sort_values(by='similarity', ascending=False, inplace=True)
#display(cosine_values.head())

[1.0, 0.5, 1.0, 1.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0]


# Merge top 10 similarity values with their respective titles and info

In [25]:
cols = ['appid', 'Title', 'Release_Date', 'negative', 'positive', 'developer', 'publisher', 'Rating']
top_ten = cosine_values[:10]
rec_genres = top_ten.merge(game_data[['appid'] + genre_cols], on='appid', how='inner', suffixes=('', '_y'))
recommendations = top_ten.merge(game_data[cols], on='appid', how='inner', suffixes=('', '_y'))
recommendations = recommendations.merge(game_genres_text, on='appid', how='inner', suffixes=('', '_y'))
recommendations = recommendations.sort_values(by='similarity', ascending=False).reset_index(drop=True)
#recommendations.head(10)

# Info about the user's game

In [26]:
user_info = users_data_top_game[users_data_top_game.steamid == x_user.steamid]
user_game_info = game_data[game_data['appid'] == int(user_info.appid)].copy()
user_game_info.drop(columns=genre_cols, axis=1, inplace=True)
user_game_info = user_game_info[cols].merge(game_genres_text, on='appid', how='inner')
#user_game_info.head()

# Print user game and recommendations

In [27]:
print('Based on a playtime of %i minutes by user: %i and their recently most played game:' % (user_info.user_2weeks_playtime, x_user.steamid))
display(user_game_info)

print('\n')

print("Recommended games with similar playtime and genre, sorted by recommended:")
display(recommendations)


# Score based on the number of genres that are the same
x_genres = pd.DataFrame(x_data[genre_cols]).T
y_genres = rec_genres[genre_cols]

matches = 0.0
other_count = 0
for index, row in y_genres.iterrows():
    matches += (x_genres & row).sum(axis=1)
    other_count += row.sum(axis=0)
genre_score = matches/other_count
    
# Score is how many times any of the user's genres appear in any of the recommended games' genres
print("%i out of %i genres match" % (matches, other_count))

Based on a playtime of 3813 minutes by user: 76561198028684278 and their recently most played game:


Unnamed: 0,appid,Title,Release_Date,negative,positive,developer,publisher,Rating,Genre
0,570,Dota 2,2013,142513,864822,Valve,Valve,90,Action|Free to Play|Strategy




Recommended games with similar playtime and genre, sorted by recommended:


Unnamed: 0,similarity,appid,Title,Release_Date,negative,positive,developer,publisher,Rating,Genre
0,0.999997,9940,Blade Kitten,2014,149,747,Krome Studios,Krome Studios,52,Action|Adventure
1,0.999984,8500,EVE Online,2010,2852,8334,CCP,CCP,88,Massively Multiplayer|RPG
2,0.999958,238750,Might & Magic X - Legacy,2014,780,1576,Ubisoft,Ubisoft,70,RPG
3,0.99984,46790,Armada 2526,2011,12,14,Ntronium Games,Iceberg Interactive,66,Strategy
4,0.999764,218620,PAYDAY 2,2013,56508,308894,OVERKILL - a Starbreeze Studio.,Starbreeze Publishing AB,79,Action|RPG
5,0.999429,500,Left 4 Dead,2008,952,18010,Valve,Valve,89,Action
6,0.999243,22380,Fallout: New Vegas,2010,3155,66864,Obsidian Entertainment,Bethesda Softworks,84,Action|RPG
7,0.999234,220240,Far Cry 3,2012,5622,46356,"Ubisoft Montreal, Massive Entertainment, and U...",Ubisoft,88,Action|Adventure
8,0.998874,2810,X3: Reunion,2006,114,282,Egosoft,Egosoft,71,Strategy
9,0.998227,214950,Total War™: ROME II - Emperor Edition,2013,13431,31912,CREATIVE ASSEMBLY,SEGA,76,Strategy


8 out of 15 genres match
