In [1]:
# This imports pandas
import pandas as pd

#This loads the appstore_games dataset and sets low memory to false
metadata = pd.read_csv('appstore_games.csv', low_memory=False)






In [2]:
# This imports the TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer

#This Clearly defines a TF-IDF Vectorizer Object. Also remove any stop words such as a and the
tfidf = TfidfVectorizer(stop_words='english')

#This replaces null values with an empty string
metadata['Description'] = metadata['Description'].fillna('')

#This finally constructs the TF-IDF matrix by formatting the data.
tfidf_matrix = tfidf.fit_transform(metadata['Description'])

#Outputs the shape of the matrix so we can see exactly what is relevant
tfidf_matrix.shape

(17007, 72114)

In [3]:
#This uses the array mapping function to map an id to a description within the matrix
tfidf.get_feature_names()[3500:3550]

['appchatter',
 'appchoppers',
 'appchronicles',
 'appcone',
 'appcraver',
 'appcroc',
 'appdesignuniv_',
 'appdesignuniverse',
 'appdev',
 'appdictions',
 'appeal',
 'appealing',
 'appeals',
 'appear',
 'appearance',
 'appearances',
 'appeared',
 'appearence',
 'appearing',
 'appears',
 'appease',
 'appeasement',
 'appeble',
 'appel',
 'appellata',
 'appelle',
 'appen',
 'appena',
 'appendix',
 'appera',
 'apperama',
 'appetite',
 'appetizing',
 'appgamer',
 'appgear',
 'appgefahren',
 'appgratis',
 'appjacks',
 'appjunkee',
 'applauded',
 'applause',
 'apple',
 'apple_tos',
 'appledevices',
 'appleiphoneschool',
 'apples',
 'appletell',
 'appletv',
 'appli',
 'appliance']

In [4]:
#Import the linear kernal from sklearn
from sklearn.metrics.pairwise import linear_kernel

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [5]:
#Outputs the shape of the similarity rows and columns with similaririty scores in each cell.
cosine_sim.shape

(17007, 17007)

In [6]:
#First object in the array's similarity scores
cosine_sim[1]

array([0.05610853, 1.        , 0.27024775, ..., 0.04490509, 0.0192072 ,
       0.01310685])

In [7]:
#This is a function for reverse mapping indeces and game names.
indices = pd.Series(metadata.index, index=metadata['Name']).drop_duplicates()

In [8]:
#This prints out the 10 game names that are all reverse mapped to the indices array.
indices[:10]

Name
Sudoku                            0
Reversi                           1
Morocco                           2
Sudoku (Free)                     3
Senet Deluxe                      4
Sudoku - Classic number puzzle    5
Gravitation                       6
Colony                            7
Carte                             8
"Barrels O' Fun"                  9
dtype: int64

In [9]:
#This is where we write the get recommentations function
def get_recommendations(Name, cosine_sim=cosine_sim):
    #this gets the index of the game that contains the name.
    idx = indices[Name]
    #THis gets the similarity score of all games with the selected game
    sim_scores = list(enumerate(cosine_sim[idx]))
    #This sorts the games based on similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    #This gets the similarity score of the 10 most similar games
    sim_scores = sim_scores[1:11]
    #This gets the 10 games indices
    app_indices = [i[0] for i in sim_scores]
    #This returns the 10 most similar games
    return metadata['Name'].iloc[app_indices]

In [10]:
#This is calling the function with the example chess
get_recommendations('Sudoku')

35                       Color Sudoku
30                      Expert Sudoku
3                       Sudoku (Free)
1035                        Sudoku HD
1046                 Sudoku HD [Free]
1431                 Conceptis Sudoku
1034                 Expert Sudoku HD
6054    Best Sudoku Classic Easy Hard
6055    Best Sudoku Classic Easy Hard
1033                  Color Sudoku HD
Name: Name, dtype: object