### Game Recommendation System

##### Content Page

In [13]:
import mysql.connector as mysql #pip install mysql.connector 
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
import requests
import json
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from imblearn.over_sampling import SMOTE , ADASYN # for imbalanced data
from imblearn.combine import SMOTEENN 
from imblearn.pipeline import make_pipeline
import time

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from decimal import Decimal

Content-based recommendation

recommending based on similarity score based on game plot/ description , developers, related genres,platforms

In [14]:
#Get dataset
gamedata = pd.read_csv("../SteamUserData/steamdata_cleanWithHIT.csv")

def clean_data(x):
    if isinstance(x, list):
        return [str.lower(i.replace(" ", "")) for i in x]
    else:
        if isinstance(x, str):
            return str.lower(x.replace(" ", ""))
        else:
            return ''

gamedata['additional_tags'] = gamedata['additional_tags'].apply(clean_data)

def join_tgt(x):
    return ''.join(str.lower(x['genres'])) + ' ' + ''.join(str.lower(x['developer'])) + ' ' + x['platforms'] + ' ' + ''.join(x['additional_tags'])

gamedata['joined'] = gamedata.apply(join_tgt, axis=1)


We will use cosine similarity to calculate the score for the similarity of the games


In [15]:
#Remove all english stop words such as 'the', 'a'
count = CountVectorizer(stop_words='english')

#fitting and transforming the data
count_matrix = count.fit_transform(gamedata['joined'])

## Compute the cosine similarity matrix
cosine_sim2 = cosine_similarity(count_matrix, count_matrix)

metadata = gamedata.reset_index()
#reverse map of indices and movie titles
indices = pd.Series(metadata.index, index=metadata['name'])

In [31]:
def get_recommendations(game, cosine_sim,howMany):
    # Get the index of the movie that matches the title
    idx = indices[game]

    # Get the pairwsie similarity scores of all games with that game
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the games based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar games
    howMany +=1
    sim_scores = sim_scores[1:int(howMany)]
    
    game_indices = [i[0] for i in sim_scores]
    scores = [i[1] for i in sim_scores]
    scores = pd.DataFrame(sim_scores, columns=list('ab'))
    counter = 0
    # Return the top 10 most similar games
    for x in gamedata['name'].iloc[game_indices]:
        print(x + " | Score : " + str(scores['b'][counter]))
        counter +=1
    
    return None;

#get_recommendations('Counter-Strike', cosine_sim2)

Fav_Games = input("Type in the games you play and the rating (0-10)(e.g Counter-Strike,9.5): ")
Fav_Games = Fav_Games.split(",")
dictGames = {}
for x in range(len(Fav_Games)):
    if(Fav_Games[x].isdigit() or "." in Fav_Games[x]):
        dictGames[Fav_Games[x-1]] = Decimal(Fav_Games[x])
        
if len(dictGames) <3:
    print("Games that you might like (Recommendation) : ")
    for x in dictGames:
        get_recommendations(x, cosine_sim2,10/len(dictGames))
else:
    print("Games that you might like (Recommendation) : ")
    dictGames = sorted(dictGames, key=dictGames.get, reverse=True)[:3]
    for x in dictGames:
        get_recommendations(x, cosine_sim2,3)

Games that you might like (Recommendation) : 
Counter-Strike: Source | Score : 0.7391304347826088
Counter-Strike: Condition Zero | Score : 0.6956521739130436
Day of Defeat | Score : 0.6384423980690613
Prime World | Score : 0.7862158588810493
Strife® | Score : 0.7171087882915775
Ball 3D: Soccer Online | Score : 0.6666666666666669
Half-Life: Source | Score : 0.8749999999999998
Half-Life: Opposing Force | Score : 0.7916666666666665
Half-Life 2 | Score : 0.7916666666666665


Now lets do a prediction model based collaborative filtering. Collaborative filtering is another way to make a recomendation model

We have the players data and the list of games the players own and ratings, we will use that to build our model

We will use TruncatedSVD to make our model

In [46]:
userData = pd.read_csv("../SteamUserData/PlayerGames_Cleaned.csv")


rating_crosstab = userData.pivot_table(values='rating', index='steamid', columns='gamesid', fill_value=0)
rating_crosstab

gamesid,10.0,20.0,30.0,40.0,50.0,60.0,70.0,80.0,100.0,130.0,...,433850.0,434570.0,439190.0,443080.0,446620.0,448280.0,450540.0,451520.0,457320.0,457960.0
steamid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561197960269742,1,5,3,5,2,2,1,2,5,1,...,0,0,0,0,2,2,2,4,1,1
76561197960270817,4,3,3,3,2,1,4,1,4,2,...,0,0,3,0,0,0,5,0,0,4
76561197960270881,3,4,1,2,3,2,5,4,2,4,...,2,0,0,2,0,0,0,0,0,0
76561197960271173,4,2,1,2,2,5,3,0,0,1,...,2,4,0,0,0,0,0,0,0,0
76561197960271217,4,3,1,2,4,5,4,1,4,1,...,3,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76561197960410700,5,4,2,5,1,1,4,0,0,4,...,0,0,0,0,0,0,0,0,0,0
76561197960412986,2,1,1,2,3,3,4,0,0,4,...,0,0,3,0,0,0,0,0,0,0
76561197960413532,1,5,2,5,2,5,2,5,1,4,...,0,0,0,0,0,0,0,0,0,0
76561197960417000,2,2,4,1,4,3,3,0,0,1,...,0,0,2,0,0,0,0,0,0,0


We will first Transpose the Matrix and after that use SVD to decompose it down to synthetic representations of the user ratings

In [47]:
X = rating_crosstab.T


Decomposing the matrix - Using truncatedSVD

In [49]:
from sklearn.decomposition import TruncatedSVD

SVD = TruncatedSVD(n_components=12, random_state=17)
resultant_matrix = SVD.fit_transform(X)

Now We want to find out how similar is each game to other games using user "tastes" . To do that, we will use Pearson’s R correlation coefficient

For each game, we will calculate how similar they correlate, based on the user perspective.

In [56]:
corr_mat = np.corrcoef(resultant_matrix)
