# Recommendation algorithm

## Objective

The aim of this notebook is to enable you to check our algorithm, and to test it with your own steam ID.  
To do so, please check the last part of the report.

In [1]:
import json
import requests
import numpy as np
import pandas as pd

# How to test our algorithm ?

## First, you need to retrieve your steamId

You can find it directly on Steam application, or on their website going into your account details.  
Then, you need to make your games info public so that we can request the API to retrieve your info. To do so, you need to go to your confidentiality settings (https://steamcommunity.com/my/edit/settings?snr=) and set your games info to public.

## Then, download the two datasets

The dataset we used to fit the algorithm can be found there :
https://kaggle.com/datasets/tamber/steam-video-games
and you need to put it in the folder with the notebooks.

The other ones, containing the info on appids and names can be found there:
https://www.kaggle.com/datasets/nikdavis/steam-store-games
and put the steam.csv file with the notebooks.

That's it, pass your steamId as a string in the getGames function to test if everything works well, and then get your recommendation with the recommendation function !

## Setup recommendation model

In [8]:
#knn model
from sklearn.neighbors import NearestNeighbors

class SteamPredictionModel():
    
    def __init__(self, k_neighbors = 5):
        self.neigh = NearestNeighbors(n_neighbors=k_neighbors, metric='euclidean')
        self.games_list = []
        self.likeness = None
        self.average_played = {}

    def dict_to_likeness(self, dicti):
        d = dicti.copy()
        for game in d.keys():
            if d[game] <= self.average_played[game]:
                d[game]=0
        return d
    
    #We fit the model on a dataset containing ids and dictionnaries of games associated with time played
    def fit(self, data):

        #Firstly we encode the hours played
        hours_encoded = data.apply(pd.Series)
        #Replace NaN values by 0 : a game not in the dict has never been played
        hours_encoded = hours_encoded.fillna(0)
        hours_encoded = hours_encoded.reindex(sorted(hours_encoded.columns),axis=1)
        

        #For each player, we compute the list of game he likes with the time he has played aboved average time played
        non_zero_dict = hours_encoded.replace(0, np.NaN)
        self.average_played = non_zero_dict.mean(axis=0)
        likeness_games = data.map(self.dict_to_likeness)
        #And encode them
        likeness_games_encoded = likeness_games.apply(pd.Series)
        #Replace NaN values by 0 : a game not in the dict has never been played
        likeness_games_encoded = likeness_games_encoded.fillna(0)
        likeness_games_encoded = likeness_games_encoded.reindex(sorted(likeness_games_encoded.columns),axis=1)

        #standardization
        #We standardize each column separately

        def standardize(c):
            m = c.mean()
            if c.std() > 0:
                std = c.std()
            else:
                std = 1e-8
            return (c-m)/std

        hours_encoded = hours_encoded.apply(lambda column : standardize(column),axis=0)

        #we also standardize the likeness
        likeness_games_encoded = likeness_games_encoded.apply(lambda column : standardize(column),axis=0)
        self.likeness = likeness_games_encoded



        self.neigh.fit(hours_encoded.values)
        
        #Get the list of games
        games_list = list(hours_encoded.columns)
        games_list.sort()
        self.games_list = games_list
        
        
    
    #We predict a certain number of games (maximum) using a dictionnary of games associated with time played
    def predict(self, X_init, recommendations_number_max):
        #One-hot-encode X
        X = pd.Series(X_init,index=self.games_list).fillna(0)
        
        #Create a vector with all games and a null score
        score = pd.Series({self.games_list[0]:0.0},index=self.games_list).fillna(0)

        #Get the list of games played by X
        already_owned = [self.games_list[index] for index in np.asarray(X).nonzero()[0]]

        
        #Get the neighbors of X
        kneighbors = self.neigh.kneighbors([X])
        kneighbors_distances = kneighbors[0][0]
        kneighbors_indices = kneighbors[1][0]

        for i in range(len(kneighbors_indices)):
            neighbor = kneighbors_indices[i]
            #get the list of liked games
            liked = [self.games_list[index] for index in np.asarray(self.likeness.iloc[neighbor]).nonzero()[0]]
            #Add to each game score (1/d)*l with d the distance between X and the neighbor
            #and l the amount of time played above the average
            for liked_game in liked:
                if liked_game not in already_owned:
                    score[liked_game] = score[liked_game] + 1/kneighbors_distances[i]*self.likeness.iloc[neighbor][liked_game]


        score = score.sort_values(ascending=False)
        return score.iloc[:recommendations_number_max]

In [9]:
#Fit the model on the dataset keeping only players that played at least 3 games

#read data
data = pd.read_csv('steam-200k.csv')
#clean data
data.columns = ['id','game','state','hours_played','0']
data = data.drop('0',axis=1)
played_games = data.loc[data['state']=='play']

#Get a dict of games and hours played for each id
played_dict = played_games.groupby('id').apply(lambda g : dict(zip(g['game'], g['hours_played'])))
#Select players that played at least 3 games
played_dict_3 = played_dict.loc[played_dict.map(len)>=3]

SPM = SteamPredictionModel(20)
SPM.fit(played_dict_3)

## Setup Steam API requests

In [10]:
#Dataframe of all steam games
steam_games = pd.read_csv('steam.csv')

def cleanJsonResponse(steamId, jsonResponse):
  new_json = []
  for j in range(jsonResponse['game_count']):
    appid = jsonResponse['games'][j]['appid']
    total_playtime = jsonResponse['games'][j]['playtime_forever']
    new_json_line = {'steamid': steamId, 'appid': appid, 'total_playtime': total_playtime}
    new_json.append(new_json_line)
  return new_json

### GET GAME INFO
def getGameInfo(steamId):
  ploads = {'steamid': steamId,'format':'json', 'key': '92B91BC3A28D928DF3AE65CB3B75943D'}
  r = requests.get('http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001',params=ploads)
  if (
      r.status_code != 204 and
      r.headers["content-type"].strip().startswith("application/json")
  ):
    return cleanJsonResponse(steamId, r.json()['response'])

def getGames(steamId):
    #Create a unique dictionnary with games and playtimes
    json = getGameInfo(steamId)
    player = {}
    for d in json:
        #Convert appid to game name
        appid = d['appid']
        try:
            game_name = steam_games.loc[steam_games['appid']==appid]['name'].values[0]
        except: #Game is not in in the dataset
            None 
        player[game_name]=d['total_playtime']
    return player

## Test our algorithm !

In [11]:
def recommend(steamId, number_of_recommendations):
    player = getGames(steamId)
    return SPM.predict(player,number_of_recommendations)

In [12]:
getGames('76561198120113085')

{'Portal': 162,
 "Oddworld: Abe's Oddysee®": 12,
 "Oddworld: Abe's Exoddus®": 0,
 "Oddworld: Munch's Oddysee": 462,
 "Oddworld: Stranger's Wrath HD": 184,
 'Portal 2': 742,
 'Psychonauts': 0,
 'The Elder Scrolls V: Skyrim': 7496,
 'Counter-Strike: Global Offensive': 4787,
 'RPG Maker VX Ace': 2303,
 'Worms Revolution': 0,
 'Realm of the Mad God': 21,
 'Star Conflict': 0,
 'PlanetSide 2': 0,
 'Toribash': 2853,
 'Brutal Legend': 0,
 'The Forest': 9336,
 'Space Engineers': 233,
 'Brothers - A Tale of Two Sons': 4,
 'Castlevania: Lords of Shadow – Ultimate Edition': 3849,
 'PAYDAY 2': 644,
 'Rust': 0,
 'Rocket League®': 38808,
 'Dishonored': 335,
 'Borderlands 2': 475,
 'NARUTO SHIPPUDEN: Ultimate Ninja STORM 3 Full Burst HD': 915,
 'Darkest Dungeon®': 183,
 'FINAL FANTASY XIV Online': 6966,
 'One Finger Death Punch': 602,
 'DayZ': 194,
 'Talisman: Digital Edition': 0,
 'Talisman: Prologue': 8,
 'METAL GEAR RISING: REVENGEANCE': 688,
 'Left 4 Dead 2': 79,
 'Broken Age': 0,
 'A Story About 

In [13]:
recommend('76561198120113085',10)

7 Days to Die                           66.593270
Survival Postapocalypse Now             59.741868
Stronghold Crusader HD                  59.414397
Fading Hearts                           58.534964
Amnesia Memories                        58.534964
Crystals of Time                        58.534964
Stronghold Legends                      58.534964
Anna's Quest                            58.534964
Dream Of Mirror Online                  58.534964
Real Horror Stories Ultimate Edition    58.534964
dtype: float64