# 1. User Reviews via Steam API (https://partner.steamgames.com/doc/store/getreviews)

In [1]:
# import packages
import os
import sys
import time
import json
import numpy as np
import urllib.parse
import urllib.request
from tqdm import tqdm
import plotly.express as px
from datetime import datetime
from googletrans import Translator

import pandas as pd
from pandas import json_normalize

# list package ver. etc.
print("Python version")
print (sys.version)
print("Version info.")
print (sys.version_info)
print('---------------')

Python version
3.8.3 (default, May 19 2020, 18:47:26) 
[GCC 7.3.0]
Version info.
sys.version_info(major=3, minor=8, micro=3, releaselevel='final', serial=0)
---------------


---
### Data Dictionary:

- Response:
    - success - 1 if the query was successful
    - query_summary - Returned in the first request
        - recommendationid - The unique id of the recommendation
        - author
            - steamid - the user’s SteamID
            - um_games_owned - number of games owned by the user
            - num_reviews - number of reviews written by the user
            - playtime_forever - lifetime playtime tracked in this app
            - playtime_last_two_weeks - playtime tracked in the past two weeks for this app
            - playtime_at_review - playtime when the review was written
            - last_played - time for when the user last played
        - language - language the user indicated when authoring the review
        - review - text of written review
        - timestamp_created - date the review was created (unix timestamp)
        - timestamp_updated - date the review was last updated (unix timestamp)
        - voted_up - true means it was a positive recommendation
        - votes_up - the number of users that found this review helpful
        - votes_funny - the number of users that found this review funny
        - weighted_vote_score - helpfulness score
        - comment_count - number of comments posted on this review
        - steam_purchase - true if the user purchased the game on Steam
        - received_for_free - true if the user checked a box saying they got the app for free
        - written_during_early_access - true if the user posted this review while the game was in Early Access
        - developer_response - text of the developer response, if any
        - timestamp_dev_responded - Unix timestamp of when the developer responded, if applicable

---
Source: https://partner.steamgames.com/doc/store/getreviews

## 1.1 Import

In [2]:
# generate game review df

#steam 'chunks' their json files (the game reviews) in sets of 100
#ending with a signature, a 'cursor'. This cursor is then pasted
#onto the the same url, to 'grab' the next chunk and so on. 
#This sequence block with an 'end cursor' of 'AoJ4tey90tECcbOXSw=='

#set variables
url_base = 'https://store.steampowered.com/appreviews/393380?json=1&filter=updated&language=all&review_type=all&purchase_type=all&num_per_page=100&cursor='

#first pass
url = urllib.request.urlopen("https://store.steampowered.com/appreviews/393380?json=1&filter=updated&language=all&review_type=all&purchase_type=all&num_per_page=100&cursor=*")
data = json.loads(url.read().decode())
next_cursor = data['cursor']
next_cursor = next_cursor.replace('+', '%2B')
df1 = json_normalize(data['reviews'])
print(next_cursor)

#add results till stopcursor met, then send all results to csv
while True:
    time.sleep(0.5) # Sleep for one second
    url_temp = url_base + next_cursor
    url = urllib.request.urlopen(url_temp)
    data = json.loads(url.read().decode())
    next_cursor = data['cursor']
    next_cursor = next_cursor.replace('+', '%2B')
    df2 = json_normalize(data['reviews'])
    df1 = pd.concat([df1, df2])
    print(next_cursor)
    if next_cursor == 'AoJ44PCp0tECd4WXSw==' or next_cursor == '*':
        df_steam_reviews = df1
        df1 = None
        break
        
#the hash below is each 'cursor' I loop through until the 'end cursor'.
#this is just my way to monitor the download.

AoJwtMGZ6PICfJ%2BKjwI=
AoJwrcGT4fICf5OSiAI=
AoJ4v%2B6N2/ICcsuPjgI=
AoJwheLr1PICf9HSjQI=
AoJ416WgzvICc%2BaYjQI=
AoJ4quzJyPICecbmjAI=
AoJw0Mf4wvICdautjAI=
AoJw1ovEvvICe/iEjAI=
AoJ4qaXyufICfMXbiwI=
AoJw65nYtfICe6e1iwI=
AoJ4j53er/ICdsf6igI=
AoJwkLeKqvICd63EigI=
AoJwjdHhpvICeK2iigI=
AoJ44M%2BYovICd%2BbwiQI=
AoJ4t9nBnvICftXFiQI=
AoJ40N6RmvICe8%2BPiQI=
AoJwycW%2BlfICc9PeiAI=
AoJwpezBkfICfsqyiAI=
AoJw4fzBjfICeMaAiAI=
AoJ47faaivICcqXchwI=
AoJ43eSShvICcZCyhwI=
AoJw4Ke2g/ICcemVhwI=
AoJ4nPXrgPICevn7hgI=
AoJ4ltHU/vECe6/0igE=
AoJ4yIGA/PECeZ7IhgI=
AoJwxLKL%2BvECd8rK%2BQE=
AoJw%2BJiD%2BfECdpqphgI=
AoJ4kbjh%2BPECcIOl0QE=
AoJwk%2B%2BQ%2BPECcJKfhgI=
AoJwmNz99vECcrjExAE=
AoJ40Zfg9fECfoOFhgI=
AoJ4mPjM8/ECfb3thQI=
AoJwt%2Bmj8fECdvPRhQI=
AoJ4sOfb7/ECdf%2B%2BhQI=
AoJwo7bb7vECeey0hQI=
AoJwwqX87fECf8yshQI=
AoJwx7/X7fECeuqohQI=
AoJ41djX7PECc62chQI=
AoJ4m9fF6/ECeoyQhQI=
AoJ4yp3k6vECd5GFhQI=
AoJw0dP36fECeZv7hAI=
AoJ4nsb96PECf9vwhAI=
AoJ4zLCe6PECe/WR4QE=
AoJ477395vECf47XhAI=
AoJwmNeO5PECe/G1hAI=
AoJ4rMSr3vECffHygwI

In [3]:
# inspect columns
print(df_steam_reviews.info(verbose=True))

<class 'pandas.core.frame.DataFrame'>
Int64Index: 51321 entries, 0 to 20
Data columns (total 21 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   recommendationid                51321 non-null  object 
 1   language                        51321 non-null  object 
 2   review                          51321 non-null  object 
 3   timestamp_created               51321 non-null  int64  
 4   timestamp_updated               51321 non-null  int64  
 5   voted_up                        51321 non-null  bool   
 6   votes_up                        51321 non-null  int64  
 7   votes_funny                     51321 non-null  int64  
 8   weighted_vote_score             51321 non-null  object 
 9   comment_count                   51321 non-null  int64  
 10  steam_purchase                  51321 non-null  bool   
 11  received_for_free               51321 non-null  bool   
 12  written_during_early_access     513

In [4]:
# inspect shape
print(df_steam_reviews.shape)

(51321, 21)


In [5]:
# inspect df
df_steam_reviews

Unnamed: 0,recommendationid,language,review,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,...,received_for_free,written_during_early_access,author.steamid,author.num_games_owned,author.num_reviews,author.playtime_forever,author.playtime_last_two_weeks,author.last_played,timestamp_dev_responded,developer_response
0,71182846,russian,"When I just joined the battle, my squad was at...",1592842434,1592842434,True,0,0,0,0,...,False,True,76561198271837310,77,1,364,364,1592848618,,
1,71182712,english,Game is fun and exciting when it works. Very s...,1592842261,1592842261,False,0,0,0,0,...,False,True,76561198055021933,150,17,316,52,1592843901,,
2,71181661,turkish,Valla efsane oyun beyler. Takım oyunu yapmama ...,1592840698,1592840698,True,1,0,0.523809552192687988,0,...,False,True,76561198380703693,56,8,6446,1323,1592837756,,
3,71180994,russian,TOP,1592839610,1592839610,True,0,0,0,0,...,False,True,76561198127609061,66,3,4879,1,1592842083,,
4,57766737,russian,"Очень хочу, чтобы в русской локализации игру н...",1574601186,1592837322,True,0,0,0,0,...,False,True,76561198105740539,549,14,57627,7599,1592848771,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,19708009,english,Communication and teamplay is one of the best ...,1450166529,1450166529,True,44,3,0.566995859146118164,0,...,False,True,76561198014615390,251,2,13623,0,1589089421,,
17,19708002,english,If you're looking for a realistic mil-sim styl...,1450166484,1450166484,True,38,2,0.63659060001373291,0,...,False,True,76561198066675517,543,9,4512,0,1587823021,,
18,19707993,english,ITS THE BEST GAME EVERS \n\nPlay this game if ...,1450166426,1450166426,True,205,517,0.549310982227325439,13,...,False,True,76561198048218734,93,11,112520,0,1591500969,,
19,19707992,english,(Disclaimer: This game used to not be on Steam...,1450166424,1450166424,True,157,9,0.790276587009429932,0,...,False,True,76561198038225005,253,8,3969,0,1589425139,,


In [6]:
# save that sheet
df_steam_reviews.to_csv('squad_reviews.csv', index=False)

## 1.2 Clean

In [7]:
#search for presence of empty cells
df_steam_reviews.isnull().sum(axis = 0)

recommendationid                      0
language                              0
review                                0
timestamp_created                     0
timestamp_updated                     0
voted_up                              0
votes_up                              0
votes_funny                           0
weighted_vote_score                   0
comment_count                         0
steam_purchase                        0
received_for_free                     0
written_during_early_access           0
author.steamid                        0
author.num_games_owned                0
author.num_reviews                    0
author.playtime_forever               0
author.playtime_last_two_weeks        0
author.last_played                    0
timestamp_dev_responded           51320
developer_response                51320
dtype: int64

In [8]:
#drop empty cols 'timestamp_dev_responded' and 'developer_response'
df_steam_reviews = df_steam_reviews.drop(['timestamp_dev_responded', 'developer_response'], axis=1)

In [9]:
# convert unix timestamp columns to datetime format
def time_to_clean(x):
    return datetime.fromtimestamp(x)

df_steam_reviews['timestamp_created'] = df_steam_reviews['timestamp_created'].apply(time_to_clean)
df_steam_reviews['timestamp_updated'] = df_steam_reviews['timestamp_updated'].apply(time_to_clean)
df_steam_reviews['author.last_played'] = df_steam_reviews['author.last_played'].apply(time_to_clean)

In [11]:
# inspect
df_steam_reviews

Unnamed: 0,recommendationid,language,review,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,received_for_free,written_during_early_access,author.steamid,author.num_games_owned,author.num_reviews,author.playtime_forever,author.playtime_last_two_weeks,author.last_played
0,71182846,russian,"When I just joined the battle, my squad was at...",2020-06-22 11:13:54,2020-06-22 11:13:54,True,0,0,0,0,True,False,True,76561198271837310,77,1,364,364,2020-06-22 12:56:58
1,71182712,english,Game is fun and exciting when it works. Very s...,2020-06-22 11:11:01,2020-06-22 11:11:01,False,0,0,0,0,True,False,True,76561198055021933,150,17,316,52,2020-06-22 11:38:21
2,71181661,turkish,Valla efsane oyun beyler. Takım oyunu yapmama ...,2020-06-22 10:44:58,2020-06-22 10:44:58,True,1,0,0.523809552192687988,0,True,False,True,76561198380703693,56,8,6446,1323,2020-06-22 09:55:56
3,71180994,russian,TOP,2020-06-22 10:26:50,2020-06-22 10:26:50,True,0,0,0,0,True,False,True,76561198127609061,66,3,4879,1,2020-06-22 11:08:03
4,57766737,russian,"Очень хочу, чтобы в русской локализации игру н...",2019-11-24 07:13:06,2020-06-22 09:48:42,True,0,0,0,0,True,False,True,76561198105740539,549,14,57627,7599,2020-06-22 12:59:31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,19708009,english,Communication and teamplay is one of the best ...,2015-12-15 02:02:09,2015-12-15 02:02:09,True,44,3,0.566995859146118164,0,False,False,True,76561198014615390,251,2,13623,0,2020-05-10 00:43:41
17,19708002,english,If you're looking for a realistic mil-sim styl...,2015-12-15 02:01:24,2015-12-15 02:01:24,True,38,2,0.63659060001373291,0,False,False,True,76561198066675517,543,9,4512,0,2020-04-25 08:57:01
18,19707993,english,ITS THE BEST GAME EVERS \n\nPlay this game if ...,2015-12-15 02:00:26,2015-12-15 02:00:26,True,205,517,0.549310982227325439,13,False,False,True,76561198048218734,93,11,112520,0,2020-06-06 22:36:09
19,19707992,english,(Disclaimer: This game used to not be on Steam...,2015-12-15 02:00:24,2015-12-15 02:00:24,True,157,9,0.790276587009429932,0,False,False,True,76561198038225005,253,8,3969,0,2020-05-13 21:58:59


In [10]:
# save that sheet
df_steam_reviews.to_csv('game_reviews.csv', index=False)

# Misc

In [None]:
# list of free weekends:
Squad Free Weekend - Nov 2016
Squad Free Weekend - Apr 2017
Squad Free Weekend - Nov 2017
Squad Free Weekend - Jun 2018
Squad Free Weekend - Nov 2018
Squad Free Weekend - Jul 2019
Squad Free Weekend - Nov 2019

# list of major patch days:
v1 - July 1 2015
v2 - Oct 31 2015
v3 - Dec 15 2015
v4 - ?
v5 - Mar 30 2016
v6 - May 26 2016
v7 - Aug  7 2016
v8 - Nov  1 2016
v9 - Mar  9 2017
v10  Feb  5 2018
v11  Jun  6 2018
v12  Nov 29 2018
v13  May  ? 2019
v14  Jun 28 2019
v15  Jul 22 2019
v16  Oct 10 2019
v17  Nov 25 2019
v18  ?
v19  May  2 2020

![Rating-Formula](https://steamdb.info/static/img/blog/84/formula.png)

In [None]:
#v2 (fromhttps://cloud.google.com/translate/docs/simple-translate-call#translate_translate_text-python)
# translate/spellcheck via googletranslate pkg
from google.cloud import translate_v2 as translate

def time_to_translate(x):
    if x == None: # ignore the 'NaN' reviews
        return 'NaN'
    else:
        translate_client = translate.Client()
        if isinstance(x, six.binary_type):
            text = x.decode('utf-8')
            return text
        
#print(time_to_translate('hola'))

In [None]:
# scratch
df_steam_reviews = pd.read_csv('squad_reviews.csv', low_memory=False)

In [None]:
df_steam_reviews

In [None]:
# display reviews 
fig = px.histogram(df_steam_reviews, x="timestamp_created", color="voted_up", width=1000, height=500, title='Positive(True)/Negative(False) Reviews')
fig.show()

In [None]:
# translate/spellcheck t
t['review.translated'] = t['review'].progress_apply(time_to_translate)
t.to_csv('t.csv', index=False)