In [46]:
import json
import requests
import numpy as np
import pandas as pd

In [25]:
#Dataframe of all steam games
steam_games = pd.read_csv('steam_games.csv')

def cleanJsonResponse(steamId, jsonResponse):
  new_json = []
  for j in range(jsonResponse['game_count']):
    appid = jsonResponse['games'][j]['appid']
    total_playtime = jsonResponse['games'][j]['playtime_forever']
    new_json_line = {'steamid': steamId, 'appid': appid, 'total_playtime': total_playtime}
    new_json.append(new_json_line)
  return new_json

### GET GAME INFO
def getGameInfo(steamId):
  ploads = {'steamid': steamId,'format':'json', 'key': '92B91BC3A28D928DF3AE65CB3B75943D'}
  r = requests.get('http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001',params=ploads)
  if (
      r.status_code != 204 and
      r.headers["content-type"].strip().startswith("application/json")
  ):
    return cleanJsonResponse(steamId, r.json()['response'])

def getGames(steamId):
    #Create a unique dictionnary with games and playtimes
    json = getGameInfo(steamId)
    player = {}
    for d in json:
        #Convert appid to game name
        appid = d['appid']
        try:
            game_name = steam_games.loc[steam_games['appid']==appid]['name'].values[0]
        except: #Game is not in in the dataset
            None 
        player[game_name]=d['total_playtime']
    return player

In [69]:
#knn model
from sklearn.neighbors import NearestNeighbors

class SteamPredictionModel():
    
    def __init__(self, k_neighbors = 5):
        self.neigh = NearestNeighbors(n_neighbors=k_neighbors, metric='euclidean')
        self.games_list = []
        self.avg_dict = {}
        self.hours_min = 0
        self.hours_max = 0
        self.likeness = None
        
    def dict_to_likeness(self, dicti):
        d = dicti.copy()
        for game in d.keys():
            if d[game] >= self.avg_dict[game]:
                d[game]= d[game] - self.avg_dict[game]
            else:
                d[game]=0
        return d
    
    #We fit the model on a dataset containing ids and dictionnaries of games associated with time played
    def fit(self, data):
        #Encode the hours played for each game in a dataset
        hours_encoded = data.apply(pd.Series)
        #Replace NaN values by 0 : a game not in the dict has never been played
        hours_encoded = hours_encoded.fillna(0)
        hours_encoded = hours_encoded.reindex(sorted(hours_encoded.columns),axis=1)
        
        self.neigh.fit(hours_encoded.values)
        
        #Get the list of games
        games_list = list(hours_encoded.columns)
        games_list.sort()
        self.games_list = games_list
        
        #Get average time played for each game
        avg = played_games.groupby('game')['hours_played'].apply(np.average).reset_index(name='avg_hours_played')
        #Dictionnary of average time played
        self.avg_dict = avg.set_index('game').to_dict()['avg_hours_played']

        
        #For each player, we compute the list of game he likes with the time he has played aboved average time played
        likeness_games = data.map(self.dict_to_likeness)
        #And encode them
        likeness_games_encoded = likeness_games.apply(pd.Series)
        #Replace NaN values by 0 : a game not in the dict has never been played
        likeness_games_encoded = likeness_games_encoded.fillna(0)
        likeness_games_encoded = likeness_games_encoded.reindex(sorted(likeness_games_encoded.columns),axis=1)
        self.likeness = likeness_games_encoded
    
    #We predict a certain number of games (maximum) using a dictionnary of games associated with time played
    def predict(self, X_init, recommendations_number_max):
        #One-hot-encode X
        X = score = pd.Series(X_init,index=self.games_list).fillna(0)
        
        #Create a vector with all games and a null score
        score = pd.Series({self.games_list[0]:0.0},index=self.games_list).fillna(0)

        #Get the list of games played by X
        already_owned = [self.games_list[index] for index in np.asarray(X).nonzero()[0]]

        #Get the neighbors of X
        kneighbors = self.neigh.kneighbors([X])
        kneighbors_distances = kneighbors[0][0]
        kneighbors_indices = kneighbors[1][0]

        for i in range(len(kneighbors_indices)):
            neighbor = kneighbors_indices[i]
            #get the list of liked games
            liked = [self.games_list[index] for index in np.asarray(self.likeness.iloc[neighbor]).nonzero()[0]]
            #Add to each game score (1/d)*l with d the distance between X and the neighbor
            #and l the amount of time played above the average
            for liked_game in liked:
                if liked_game not in already_owned:
                    score[liked_game] = score[liked_game] + 1/kneighbors_distances[i] + self.likeness.iloc[neighbor][liked_game]


        score = score.sort_values(ascending=False)
        return score.iloc[:recommendations_number_max]

In [70]:
#Fit the model on the dataset keeping only players that played at least 3 games

#read data
data = pd.read_csv('steam-200k.csv')
#clean data
data.columns = ['id','game','state','hours_played','0']
data = data.drop('0',axis=1)
played_games = data.loc[data['state']=='play']
played_games['hours_played'] = (played_games['hours_played']-played_games['hours_played'].min())/(played_games['hours_played'].max()-played_games['hours_played'].min())
#Get a dict of games and hours played for each id
played_dict = played_games.groupby('id').apply(lambda g : dict(zip(g['game'], g['hours_played'])))
#Select players that played at least 3 games
played_dict_3 = played_dict.loc[played_dict.map(len)>=3]

SPM = SteamPredictionModel(5)
SPM.fit(played_dict_3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  played_games['hours_played'] = (played_games['hours_played']-played_games['hours_played'].min())/(played_games['hours_played'].max()-played_games['hours_played'].min())


In [73]:
def recommend(steamId, number_of_recommendations):
    player = getGames(steamId)
    return SPM.predict(player,number_of_recommendations)

In [74]:
recommend('76561198120113085',5)

Dota 2                                      0.088606
7 Days to Die                               0.049231
Rocket League                               0.027031
Total War SHOGUN 2                          0.022307
Rising Storm/Red Orchestra 2 Multiplayer    0.013252
dtype: float64