In [1]:
import pandas as pd

game_reviews = pd.read_csv('data\\Processed Data\\game_reviews.csv')
games = pd.read_csv('data\\Processed Data\\games.csv')
genres = pd.read_csv('data\\Processed Data\\genres.csv')
game_genre_tags = pd.read_csv('data\\Processed Data\\game_genre_tags.csv')
platforms = pd.read_csv('data\\Processed Data\\platforms.csv')

To Do:
 - What data needs to be added to review table for the engine?
 - Preliminary Neural Network model
 - How to allow a user to access the model, and make further preferential selections?

#### Prep Data

In [2]:
reviews = reviews[pd.notnull(reviews['review_text'])]

In [3]:
reviews.isna().sum()

game_ID             0
user_ID             0
score            3170
review_text         0
date           193598
title               0
dtype: int64

#### Build Model

In [4]:
import gensim
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.utils import simple_preprocess

In [20]:
#assert gensim.models.doc2vec.FAST_VERSION > -1

In [6]:
# MyDocs reading from a data frame
class MyDocs(object):
    def __iter__(self):
        for i in range(reviews.shape[0]):
            yield TaggedDocument(words=simple_preprocess(reviews.iloc[i,3]), tags=['%s' % reviews.iloc[i,0]])

In [8]:
%%time
import multiprocessing
import os
cores = multiprocessing.cpu_count()

try:
    doc2vec_model = Doc2Vec.load('models/doc2vec.model')

except:
    print("start traing doc2vec model...")
    documents = MyDocs()
    doc2vec_model = Doc2Vec(dm=1, dbow_words=1, vector_size=200, window=8, min_count=20, workers=cores)
    doc2vec_model.build_vocab(documents)
    doc2vec_model.train(documents, total_examples=doc2vec_model.corpus_count, epochs=doc2vec_model.epochs)
    if not os.path.exists('models'):
        os.makedirs('models')
        doc2vec_model.save('models/doc2vec.model')
    else:
        doc2vec_model.save('models/doc2vec.model')

start traing doc2vec model...
Wall time: 17min 25s


In [9]:
def search(keyword):
    result = []
    for name in reviews.title:
        if keyword in name.lower():
            result.append(name)
    return set(result)

In [10]:
search('overwatch')

{'Overwatch'}

In [11]:
vec = doc2vec_model['cars']
doc2vec_model.docvecs.most_similar([vec], topn=5)

[('18772', 0.5355895161628723),
 ('24337', 0.5223432779312134),
 ('24977', 0.5219161510467529),
 ('25762', 0.5189286470413208),
 ('24370', 0.5155553221702576)]

In [13]:
#doc2vec_model.docvecs.most_similar('Terraria', topn=20)

In [46]:
class Recommender(object):
    def __init__(self, model, games_df, platform_df, genre_df, game_genre_tags_df):
        self.model = model
        self.games_df = games_df
        self.platform_df = platform_df
        self.genre_df = genre_df
        self.game_genre_tags_df = game_genre_tags_df
        
        self.genre_dict = {}
        for i in self.game_genre_tags_df['game_ID'].unique():
            self.genre_dict[i] = [self.game_genre_tags_df['genre_ID'][j] for j in self.game_genre_tags_df[self.game_genre_tags_df['game_ID']==i].index]

    def create_lookup(self):
        '''Helper method that creates a lookup dictionary (from the games table) to easily filter by game ID.'''
        self.lookup_df = self.games_df[['game_ID', 'title', 'platform_ID', 'summary', 'url']]
        self.lookup_df['game_ID'] = self.lookup_df['game_ID'].astype(str)
        self.lookup_dict = self.lookup_df.set_index('game_ID').to_dict(orient='index')
        return self.lookup_dict
        
    def get_recommendations(self, keyword, n = 100):
        '''Helper method that calls the Doc2Vec model to get recommendations.
                Input:
                keyword = Game ID (to build recommendations from)
                n = # of recommendations to return (default = 100)
        '''
        return self.model.docvecs.most_similar(keyword, topn=n)

    def get_filtered_recommendations(self, keyword, platform_ID, genre_IDs, n):
        ''' Method that takes a keyword to build recommendations from, and filters recommendations by game 
        platform and genres, to the top 'n' recommendations.
                Input: 
                keyword = game ID (string)
                platform_ID = platform ID (integer)
                genre_IDs = genre ID (list of integers)
                n = number of recommendations to return (integer)
                
                Output:
                filtered_results = dictionary
        '''
        lookup_dict = self.create_lookup()
        ranked_results = self.get_recommendations(keyword)
        
        filtered_results = {}

        for game in ranked_results:
            if len(filtered_results) < n:
                dict_id = game[0]
                dict_game_name = lookup_dict[game[0]]['title']
                dict_platform_ID = lookup_dict[game[0]]['platform_ID']
                dict_summary = lookup_dict[game[0]]['summary']
                dict_url = lookup_dict[game[0]]['url']
                
                if (platform_ID is None) and (len(genre_IDs) == 0):
                    filtered_results[dict_id] = (dict_game_name, dict_summary, dict_url)

                elif len(genre_IDs) == 0:
                    if (dict_platform_ID == platform_ID):
                        if dict_id in filtered_results:
                            continue
                        else:
                            filtered_results[dict_id] = (dict_game_name, dict_summary, dict_url)

                elif platform_ID is None:
                    if set(genre_IDs) & set(self.genre_dict[dict_id]) > 0:
                        if dict_id in filtered_results:
                            continue
                        else:
                            filtered_results[dict_id] = (dict_game_name, dict_summary, dict_url)

                elif (dict_platform_ID == platform_ID) and (set(genre_IDs) & set(self.genre_dict[dict_id]) > 0):
                        if dict_id in filtered_results:
                            continue
                        else:  
                            filtered_results[dict_id] = (dict_game_name, dict_summary, dict_url)

        if len(filtered_results) > 0:
            return filtered_results
        else:
            return 
        
    def lookup_value(self, id_number, id_type):
        '''Method that looks up the value of the ID number for a game, platform, or genre.
                Input: 
                id_number = game, platform, or genre ID (integer)
                id_type = must equal 'game', 'platform', or 'genre'
                
                Output:
                Corresponding name for a genre or platform ID (as string). Returns the entire corresponding
                row for a game ID.
        '''
        if id_type == 'game':
            return self.games_df[self.games_df['game_ID']==id_number]
            
        elif id_type == 'platform':
            return self.platform_df[self.platform_df['platform_ID']==id_number]['platform'].iloc[0]
        
        elif id_type == 'genre':
            return self.genre_df[self.genre_df['genre_ID']==id_number]['genre_name'].iloc[0]
        
        else:
            raise ValueError("id_type must equal 'game', 'platform', or 'genre'")
            
    def lookup_id(self, name, id_type):
        '''Method that looks up the ID of a game, platform, or genre.
                Input: 
                name = game, platform, or genre name (string)
                id_type = must equal 'game', 'platform', or 'genre
                
                Output:
                Corresponding ID for a genre or platform name (as integer). Returns the entire corresponding
                row for a game.
        '''
        if id_type == 'game':
            return self.games_df[self.games_df['title']==name]
            
        elif id_type == 'platform':
            return self.platform_df[self.platform_df['platform']==name]['platform_ID'].iloc[0]
        
        elif id_type == 'genre':
            return self.genre_df[self.genre_df['genre_name']==name]['genre_ID'].iloc[0]
        
        else:
            raise ValueError("id_type must equal 'game', 'platform', or 'genre'. Error may be caused by\
            no corresponding ID for input 'name'.")
        