### Sentiment analyses, review-based playtime estimations, and playtime-based recommendations

In this notebook, we set up a pipeline that helps our user analyzing reviews of the game receiving a query. For this game, labelled as `A`, polarizied words in its reviews are extracted. The importance of these words are weighted by our user. The weightings are then used to make an estimation on our user's playtime on `A` using either `Random Forest` or `Document similarity` through word embeddings.

After analyzing `A`, games sharing similar tags with `A` are explored through web-scrapping contents on Steam. Weightings prerviously input by our user are mapped to reviews of these games. For each of these games, a playtime estimation is made. Games with a long estimated playtime are recommennded to our user.

Below, I perform a test run on this pipeline for the game, Port Royale 4 (ID: 1024650). Test runs on more recently released games on Steam have also been performed during code development.

In [2]:
import steamreviews

import json
import pandas as pd
import os
from io import StringIO
from collections import defaultdict
import numpy as np

import requests
from bs4 import BeautifulSoup as bs
import re

import spacy
from spacy.lang.en.stop_words import STOP_WORDS

import nltk

from gensim.models import Word2Vec

from tabulate import tabulate

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import GridSearchCV
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

#### 1.1 Codes for downloading reviews & extracting featurers of a game based on its steam id 

In [3]:
class steam_game(object):
    
    def __init__(self, gameID, model='en_core_web_sm'):
        self.gameID = gameID
        nlp = spacy.load(model)
        def tokenize_lemma(text):
            return [w.lemma_ for w in nlp(text)]
        self.tokenizer = tokenize_lemma
        
        stop_words = STOP_WORDS.union({'ll', 've', 'pron'})
        stop_words_lemma = \
        set(w.lemma_ for w in nlp(' '.join(stop_words)))
        self.stop_words = stop_words_lemma
        self.nlp = nlp
        

    def get_reviews(self, language='english', min_num_reviews=5):
        gameID = self.gameID
        steamreviews.download_reviews_for_app_id_batch([gameID])
        json_path = 'data/review_' + str(gameID) +'.json'
        json_abspath = os.path.abspath(json_path)
        with open(json_abspath, 'r') as f:
            data = json.load(f)
        f.close()
        if data['reviews']:
            data_dict = defaultdict(list)
            for post_id, reviews in data['reviews'].items():
                data_dict['post_id'] += [post_id]
                data_dict['language'] += [reviews['language']]
                data_dict['review_text'] += [reviews['review']]
                data_dict['recommended'] += [reviews['voted_up']]
                data_dict['play_time'] += [reviews['author']['playtime_forever']]
                data_dict['purchase'] += [reviews['steam_purchase']]
                data_dict['steam_id'] += [reviews['author']['steamid']]
                data_dict['num_games_owned'] += [reviews['author']['num_games_owned']]
                data_dict['num_reviews'] += [reviews['author']['num_reviews']]
                data_dict['play_time_last_2_weeks'] += [reviews['author']['playtime_last_two_weeks']]

            df = pd.DataFrame.from_dict(data_dict)
            if language is not None:
                df = df[df['language'] == language]
        else:
            print('Game/DILL {} has no review yet. An empty data set will be returned.'.format(gameID))
            data_dict = {}
            data_dict['post_id'] = []
            data_dict['language'] = []
            data_dict['review_text'] = []
            data_dict['recommended'] = []
            data_dict['play_time'] = []
            data_dict['purchase'] = []
            data_dict['steam_id'] = []
            data_dict['num_games_owned'] = []
            data_dict['num_reviews'] = []
            data_dict['play_time_last_2_weeks'] = []
            df = pd.DataFrame.from_dict(data_dict)
        
        self.data = df
        if len(df.index) > min_num_reviews:
            self.ready_for_ML = True
        else:
            self.ready_for_ML = False
            print("Game/DILL {} has less than {} reviews. No further ML-based analyses will be made."\
                 .format(gameID, min_num_reviews))
        
    
    def get_words(self, num_words=10, search_range=100):
        if self.ready_for_ML:
            stop_words = self.stop_words
            nlp = self.nlp
            tokenizer = self.tokenizer
            est = Pipeline([('vectorizer', TfidfVectorizer(
                stop_words=stop_words, ngram_range=(1,2),
                tokenizer=tokenizer
            )), ('classifier', MultinomialNB())])
            #The below para_grid is for later convenience.
            param_grid = {
                'vectorizer__max_df': [0.7],
                'vectorizer__min_df': [1],
                'vectorizer__max_features': [5000]
            }
            gs_est = GridSearchCV(
                est, param_grid, n_jobs=-1
            )
            X_train = self.data['review_text']
            y_train = self.data['recommended']
            gs_est.fit(X_train, y_train)
            vocab = \
            gs_est.best_estimator_.named_steps['vectorizer'].vocabulary_ 
            coeff_pos = \
            gs_est.best_estimator_.named_steps['classifier'].feature_log_prob_[1] 
            coeff_neg = \
            gs_est.best_estimator_.named_steps['classifier'].feature_log_prob_[0]

            self.vocab = vocab
            self.coeff_pos = coeff_pos
            self.coeff_neg = coeff_neg
            self.vectorizer_param = gs_est.best_params_
            self.recommend_classifier = gs_est.best_estimator_

            polarity = coeff_pos - coeff_neg
            indices = np.argsort(polarity)
            positive_words = []
            temp_count = 0
            for word in vocab:
                if vocab[word] in indices[-search_range:]:
                    if set(w.pos_ for w in nlp(word)) == {'NOUN'}:
                        positive_words.append(word)
                        temp_count += 1
                if temp_count >= num_words:
                    break
            negative_words = []
            temp_count = 0
            for word in vocab:
                if vocab[word] in indices[:search_range]:
                    if set(w.pos_ for w in nlp(word)) == {'NOUN'}:
                        negative_words.append(word)
                        temp_count += 1
                if temp_count >= num_words:
                    break
            self.word_pos = positive_words
            self.word_neg = negative_words
        else:
            print('Game/DILL {} has no review for analyses.')

#### 1.2 Codes for building an artifical review with an estimated voting

In [12]:
def get_feature_weights(words):
    weights = []
    for word in words:
        while True:
            try:
                inp = \
                input(
                    "Please weight the importance (0-5) of this property/feature: {} "\
                    .format(word)
                    )
                weight = int(inp)
                break
            except ValueError:
                print("The weight should be an integer between 0 and 5.")
        weights.append(weight)
    return weights

In [5]:
class artificial_reviews(object):
    def __init__(self, 
                 game, 
                 weight_pos=[0]*10,
                 weight_neg=[0]*10,
                 pre_set_review=None):
        self.word_pos = game.word_pos
        self.word_neg = game.word_neg
        self.weight_pos = weight_pos
        self.weight_neg = weight_neg
        if pre_set_review is None:
            ww_L = []
            for weight, word in zip(weight_pos, self.word_pos):
                ww_L.extend([word] * weight)
            for weight, word in zip(weight_neg, self.word_neg):
                ww_L.extend([word] * weight)           
            self.art_review = ' '.join(ww_L)
        else:
            self.art_review = pre_set_review
        self.recommend = \
        game.recommend_classifier.predict([self.art_review])

#### 1.3 Codes for estimating play time using ML

In [6]:
"""An iterators for sentences in a review."""
class nltk_sentences(object):
    def __init__(self, *arrays):
        self.arrays = arrays
        
    def __iter__(self):
        for array in self.arrays:
            for document in array:
                for sent in nltk.sent_tokenize(document):
                    yield nltk.word_tokenize(sent)

                    
class nltk_tokenizer:
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        transformed_X = []
        for document in X:
            tokenized_doc = []
            for sent in nltk.sent_tokenize(document):
                tokenized_doc += nltk.word_tokenize(sent)
            transformed_X.append(np.array(tokenized_doc))
        return np.asarray(transformed_X, dtype=object)
    
    def fit_transform(self, X, y=None):
        return self.transform(X)
    
    
class mean_embedding_vector(object):
    def __init__(self, word2vec):
        self.word2vec = word2vec
        self.dim = word2vec.wv.vector_size
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X = nltk_tokenizer().fit_transform(X)
        
        return np.array([
            np.mean(
                [self.word2vec.wv[w] for w in words \
                 if w in self.word2vec.wv]\
                or [np.zeros(self.dim)], axis=0)
            for words in X
        ])
    
    def fit_transform(self, X, y=None):
        return self.transform(X)
    
    
def get_similar_doc(mean_array,
                        ref_doc_index,
                        docs, labels, 
                        num_output=5):
    mse = np.sum(
        (mean_array - mean_array[ref_doc_index,:]) ** 2, axis=1
    )
    labels = labels.values
    output_tuples = []
    for ind, document in enumerate(docs):
        if ind == 0:
            output_tuples.append(
                (document, 0, mse[ind])
            )
        elif ind > 0:    
            output_tuples.append(
                (document, labels[ind-1], mse[ind])
            )
    output_tuples.sort(key=lambda x: x[2])
    if len(output_tuples) >= num_output:
        return output_tuples[1:num_output+1]
    else:
        return output_tuples[1:]

In [7]:
class estimator(BaseEstimator, TransformerMixin):
    def __init__(self, game):
        self.gameID = game.gameID
        self.data = game.data
        self.ready_for_ML = game.ready_for_ML
        if self.ready_for_ML:
            self.stop_words = game.stop_words
            self.tokenizer = game.tokenizer
            self.vectorizer_param = game.vectorizer_param
            self.nlp = game.nlp
            self.label_1 = game.data['play_time']
            self.label_2 = game.data['play_time_last_2_weeks']
            self.data_for_ml = game.data.drop(
                ['post_id', 'steam_id','play_time','play_time_last_2_weeks'], 
                axis=1)
            self.data_for_ml['review_length'] = \
            self.data_for_ml['review_text'].apply(lambda x : len(x.strip().split()))
        else:
            print("Game/DILL {} has no review for making any prerdiction.".format(game.gameID))

        
    def get_play_time(self, art_review, 
                      min_num_data=3000, verbose=True):
        if self.ready_for_ML:
            num_data = self.data.shape[0]
            X_user_dict = \
            {
                'review_text': [art_review.art_review],
                'recommended': art_review.recommend,
                'purchase': [True],
                'num_games_owned': [self.data_for_ml['num_games_owned'].mean()],
                'num_reviews': [self.data_for_ml['num_reviews'].mean()],
                'review_length': [len(art_review.art_review.strip().split())]
            }
            X_user_df = pd.DataFrame.from_dict(X_user_dict)


            if num_data < min_num_data:
                """Use word embedding & review similarity"""
                reviews = [X_user_df['review_text'].values[0]]
                for review in self.data_for_ml['review_text'].values:
                    reviews.append(review)
                X_in = pd.DataFrame.from_dict({'text': reviews})
                w2vec = \
                Word2Vec(
                    sentences=nltk_sentences(X_in['text'].values),
                    vector_size=100, window=5, min_count=1, workers=4
                )
                embedded = \
                mean_embedding_vector(w2vec).fit_transform(X_in['text'])
                similar_users_play_time = \
                np.array([
                    x[1] for x in get_similar_doc(embedded, 0, X_in['text'], self.label_1)
                ])
                self.prediction = similar_users_play_time.mean()
                if verbose:
                    print(
                        'Your expected play time as from users similar to you is: {} hour.'\
                        .format(self.prediction)
                    )

            elif num_data >= min_num_data:
                """Use random forest"""
                X = self.data_for_ml
                y = self.label_1

                ng_tfidf = TfidfVectorizer(
                    stop_words=self.stop_words,
                    ngram_range=(1,2),
                    tokenizer=self.tokenizer
                )
                Ohe = OneHotEncoder(sparse=False)
                Ssr = StandardScaler()

                data_preprocess = ColumnTransformer(
                    [
                        ('Ohe', Ohe, ['recommended','purchase']),
                        ('Ssr',Ssr ,['num_games_owned', 'num_reviews', 'review_length']),
                        ('vectorizer', ng_tfidf, 'review_text')
                    ],
                    remainder='drop'
                )

                rf_est = RandomForestRegressor(n_jobs=3,random_state=42)

                pipe = Pipeline(
                    [
                        ('preprocessor', data_preprocess),
                        ('estimator', rf_est)
                    ]
                )

                param_grid = {
                    'preprocessor__vectorizer__max_df': np.linspace(0.7, 1, num=4),
                    'preprocessor__vectorizer__max_df': [0,1],
                    'preprocessor__vectorizer__max_features': np.linspace(5000, 8000, num=4, dtype=int),
                    'estimator__n_estimator': np.linspace(500, 1500, num=11, dtype=int),
                    'estimator__max_depth': np.linspace(5, 10, num=6, dtype=int),
                    'estimator__ccp_alpha': np.linspace(0, 0.2, num=5, dtype=float)
                }
                search = GridSearchCV(pipe, param_grid, n_jobs=-1)
                search.fit(X, y)
                self.ML_model = {
                    'best_params': search.best_params_,
                    'best_est': search.best_estimator_,
                    'best_score': search.best_score_
                }
                self.prediction = search.best_estimator_.predict(X_user_df)
                if verbose:
                    print(
                        "Your expected play time as from all users' reviews is: {} hour."\
                        .format(self.prediction)
                    )
        else:
            print("Game/DILL {} has no review for making any prerdiction.".format(self.gameID))

#### 1.4 Codes for searching games related to the one our user makes a query of; followed by making recommedations based on play time

In [8]:
#Auxiliary functions

def get_game_tags(gameID):
    game_url = 'https://store.steampowered.com/app/' + str(gameID)
    game_page = requests.get(game_url)
    game_soup = bs(game_page.content, 'html.parser')
    game_tag_html = game_soup.find_all('a', class_="app_tag")
    game_tags = []
    for tag in game_tag_html:
        match = re.search(r'\s*(\w*)\s+',tag.get_text())
        if match:
            tag = match.group(1)
            if tag:
                if tag not in game_tags:
                    game_tags.append(match.group(1))
    return game_tags

def get_steam_tags():
    tag_url = 'https://store.steampowered.com/tag/browse/#global_492'
    tag_page=requests.get(tag_url)
    tag_soup = bs(tag_page.content,'html.parser')
    tag_html = tag_soup.find_all('div',class_='tag_browse_tag')
    tags = []
    for tag in tag_html:
        match = re.search(
            r'<div class="tag_browse_tag" data-tagid="(\d*)">(\w*)</div>',
            str(tag))
        if match:
            tags.append([match.group(2),match.group(1)])
    return tags

def get_tag_scores(steam_tags):
    max_score = len(steam_tags)
    tag_dict = {}
    count = 0
    for tag, tag_ID in steam_tags:
        tag_dict[tag] = [max_score - count, tag_ID]
        count = count + 1
    return tag_dict

def sort_tags(steam_tags_dict, game_tags):
    tags = []
    removed_tags = []
    for tag in game_tags:
        if tag in steam_tags_dict.keys():
            tags.append([tag, steam_tags_dict[tag]])
        else:
            removed_tags.append(tag)
    tags.sort(key=lambda x: x[1][0], reverse=True)
    if len(removed_tags) > 0:
        print("The unpopular tag(s), '{}', is/are removed."\
              .format(', '.join(removed_tags)))
    
    return pd.DataFrame(tags, columns = ['tag','(score, id)'])

In [9]:
class recommender(object):
    def __init__(self, gameID):
        self.game_tags = get_game_tags(gameID)
        self.steam_tags = get_tag_scores(get_steam_tags())
        self.game_tags_sorted = \
        sort_tags(self.steam_tags, self.game_tags)
        
    def search_related_games(self,
        base_url=\
        'https://store.steampowered.com/search/?sort_by=Released_DESC&tags=',
                            verbose=True,
                            language='english'):
        tag_1_id = self.game_tags_sorted['(score, id)'].iloc[0][1]
        tag_2_id = self.game_tags_sorted['(score, id)'].iloc[1][1]
        tag_1_tag = self.game_tags_sorted['tag'].iloc[0]
        tag_2_tag = self.game_tags_sorted['tag'].iloc[1]
        
        search_url = \
        base_url + tag_1_id + '%2C' + tag_2_id +'&supportedlang=' + language
        
        page = requests.get(search_url)
        soup = bs(page.content, 'html.parser')
        html = soup.find_all('a',class_='search_result_row')
        game_search_result = []
        for info in html:
            match_id = re.search(r'[.\n]*data-ds-appid="(\d+)',str(info))
            match_name = re.search(r'.*https://store.steampowered.com/app/\d+/(.+)/' ,str(info))
            if match_id and match_name:
                if match_name.group(1) != '_':
                    game_search_result.append([match_name.group(1), match_id.group(1)])
        self.related_games = \
        pd.DataFrame(game_search_result,
                    columns=['name', 'id'])
        if verbose:
            if len(game_search_result) > 0:
                print("Most recent {} and {} games/DILLs (up to 5):"\
                     .format(tag_1_tag, tag_2_tag))
                for name, ID in game_search_result[:5]:
                    print("Name: {}; ID: {}"\
                         .format(name, ID))
                
    def get_related_reviews(self):
        ids = self.related_games['id'].values
        self.games = {}
        for gameID in ids:
            game = steam_game(gameID)
            game.get_reviews()
            if game.ready_for_ML:
                game.get_words()
                self.games[gameID] = game
            else:
                print("Game/DILL {} has not enough review. SKIPPED.".format(gameID))
            
    def get_estimations(self, art_review):
        self.predictions = {}
        for gameID in self.games.keys():
            game = self.games[gameID]
            art_review_for_this_game = \
            artificial_reviews(game, 
                               pre_set_review=art_review.art_review
                              )
            model = estimator(game)
            model.get_play_time(art_review_for_this_game,
                               verbose=False)
            self.predictions[gameID] = model.prediction
        #Merge predictions to search results with names
        predictions = []
        for gameID in self.related_games['id']:
            if gameID in self.predictions.keys():
                predictions.append(self.predictions[gameID])
            else:
                predictions.append(None)
        df = self.related_games
        df['estimated_playtime'] = predictions
            
        self.predictions_in_df = \
        df.sort_values(
            by=['estimated_playtime'],
            ascending=False).reset_index().drop(columns='index')
        if df['estimated_playtime'].isnull().values.any():
            print(
                "Nan in 'extimated_playtime' is returned for cases with a lack of reviews for making predictions.")

#### 1.5 Codes for execution

In [23]:
if __name__ == "__main__":
    #Get game ID
    while True:
        try:
            inp = \
            input(
                "For the game you are checking out, please enter its ID on Steam ('q' for exit):"
                )
            if inp == 'q':
                break
            game_id = int(inp)
            break
        except ValueError:
            print("Game ID should be an integer.")
    if inp != 'q':
        #Get reviews for analyses
        game = steam_game(game_id)
        game.get_reviews()
        game.get_words()
        if game.ready_for_ML:
            print("Please enter 0 if a property/feature does not make sense to you.")
            weight_pos = get_feature_weights(game.word_pos)
            weight_neg = get_feature_weights(game.word_neg)
            art_review = artificial_reviews(
                game,
                weight_pos=weight_pos,
                weight_neg=weight_neg
            )
            user = estimator(game)
            user.get_play_time(art_review)
            game_recommend = recommender(game.gameID)
            game_recommend.search_related_games()
            game_recommend.get_related_reviews()
            game_recommend.get_estimations(art_review)
            print("Games/DILL recommendations based on your input related to game {}."\
                 .format(game.gameID))
            print(tabulate(
                game_recommend.predictions_in_df[['name','estimated_playtime']], 
                headers='keys', tablefmt='psql',
                showindex="never"))
            
        else:
            print("Game/DILL {} does not have adequate reviews for analyses.")
    else:
        print("Exit.")

For the game you are checking out, please enter its ID on Steam ('q' for exit):1024650
Loading idprocessed_on_20211017.txt
Skipping previously found appID = 1024650
Game records written: 0




Please enter 0 if a property/feature does not make sense to you.
Please weight the importance (0-5) of this property/feature: fun 5
Please weight the importance (0-5) of this property/feature: type 0
Please weight the importance (0-5) of this property/feature: strategy 5
Please weight the importance (0-5) of this property/feature: tutorial 5
Please weight the importance (0-5) of this property/feature: trading 5
Please weight the importance (0-5) of this property/feature: lot 0
Please weight the importance (0-5) of this property/feature: bit 0
Please weight the importance (0-5) of this property/feature: fun game 5
Please weight the importance (0-5) of this property/feature: century 0
Please weight the importance (0-5) of this property/feature: simulation 5
Please weight the importance (0-5) of this property/feature: garbage 0
Please weight the importance (0-5) of this property/feature: ruin 0
Please weight the importance (0-5) of this property/feature: favor 0
Please weight the importan



Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 905220
[appID = 905220] expected #reviews = 23
[appID = 905220] num_reviews = 23 (expected: 23)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1774980
[appID = 1774980] expected #reviews = 12
[appID = 1774980] num_reviews = 12 (expected: 12)
Game records written: 1
Game/DILL 1774980 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1774980 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1764670
[appID = 1764670] expected #reviews = 12
[appID = 1764670] num_reviews = 12 (expected: 12)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1733210
[appID = 1733210] num_reviews = 0 (expected: -1)
Game records written: 1
Game/DILL 1733210 has no review yet. An empty data set will be returned.
Game/DILL 1733210 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1733210 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 868800
[appID = 868800] num_reviews = 0 (expected: -1)
Game records written: 1
Game/DILL 868800 has no review yet. An empty data set will be returned.
Game/DILL 868800 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 868800 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1212180
[appID = 1212180] num_reviews = 0 (expected: -1)
Game records written: 1
Game/DILL 1212180 has no review yet. An empty data set will be returned.
Game/DILL 1212180 has less than 5 reviews. No further ML-based anal



Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1585530
[appID = 1585530] expected #reviews = 13
[appID = 1585530] num_reviews = 13 (expected: 13)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1775070
[appID = 1775070] expected #reviews = 2
[appID = 1775070] num_reviews = 2 (expected: 2)
Game records written: 1
Game/DILL 1775070 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1775070 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1731520
[appID = 1731520] expected #reviews = 95
[appID = 1731520] num_reviews = 99 (expected: 95)
Game records written: 1
Game/DILL 1731520 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1731520 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1674660
[appID = 1674660] expected #reviews = 3
[appID = 1674660] num_reviews = 3 (expected: 3)
Game records written: 1
Game/DILL 1674660 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1674660 has not enough review. SKIPPED.
Loading idprocessed_on_20



Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1480830
[appID = 1480830] expected #reviews = 29
[appID = 1480830] num_reviews = 29 (expected: 29)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1644490
[appID = 1644490] expected #reviews = 87
[appID = 1644490] num_reviews = 87 (expected: 87)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 589940
[appID = 589940] expected #reviews = 67
[appID = 589940] num_reviews = 67 (expected: 67)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1669560
[appID = 1669560] expected #reviews = 3
[appID = 1669560] num_reviews = 3 (expected: 3)
Game records written: 1
Game/DILL 1669560 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1669560 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1550760
[appID = 1550760] expected #reviews = 18
[appID = 1550760] num_reviews = 18 (expected: 18)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1778800
[appID = 1778800] expected #reviews = 128
[appID = 1778800] num_reviews = 128 (expected: 128)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 780310
[appID = 780310] expected #reviews = 1971
[appID = 780310] num_reviews = 1753 (expected: 1971)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1456880
[appID = 1456880] expected #reviews = 93
[appID = 1456880] num_reviews = 93 (expected: 93)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1632140
[appID = 1632140] num_reviews = 0 (expected: -1)
Game records written: 1
Game/DILL 1632140 has no review yet. An empty data set will be returned.
Game/DILL 1632140 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1632140 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1444920
[appID = 1444920] expected #reviews = 93
[appID = 1444920] num_reviews = 93 (expected: 93)
Game records written: 1




Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1652000
[appID = 1652000] num_reviews = 0 (expected: -1)
Game records written: 1
Game/DILL 1652000 has no review yet. An empty data set will be returned.
Game/DILL 1652000 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1652000 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1575900
[appID = 1575900] expected #reviews = 1
[appID = 1575900] num_reviews = 1 (expected: 1)
Game records written: 1
Game/DILL 1575900 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 1575900 has not enough review. SKIPPED.
Loading idprocessed_on_20211017.txt
Downloading reviews for appID = 1763990
[appID = 1763990] num_reviews = 0 (expected: -1)
Game records written: 1
Game/DILL 1763990 has no review yet. An empty data set will be returned.
Game/DILL 1763990 has less than 5 reviews. No further ML-based analyses will be made.
Game/DILL 