In [1]:
#import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import requests
from bs4 import BeautifulSoup

In [2]:
def get_reviews(appid, params={'json':1}):
        url = 'https://store.steampowered.com/appreviews/'
        response = requests.get(url=url+appid, 
                                params=params, 
                                headers=None)
        return response.json()
    
def get_n_reviews(appid, n=1000):
    reviews = []
    params = {
            'json' : 1,
            'filter' : 'all',  #change to 'recent' to extract all reviews
            'language' : 'english', 
            'day_range' : None,
            'review_type' : 'all',
            'purchase_type' : 'all' 
            }

    cursor = '*'
    while n > 0:
        params['cursor'] = cursor.encode()
       
        params['num_per_page'] = min(100, n)
        n -= 100
        response = get_reviews(appid, params)
        cursor = response['cursor']      
        reviews += response['reviews']
        
        if len(response['reviews']) < 100: break 
    return reviews

def get_m_appid_titles(m=100, filter_by='all'):
    appid_titles = []
    url = f'https://store.steampowered.com/search/?category1=998&filter={filter_by}&page='
    page = 0

    while page*25 < m:
        page += 1
        response = requests.get(url=url+str(page))
        soup = BeautifulSoup(response.text, 'html.parser')
        for row in soup.find_all(class_='search_result_row'):
            appid_titles.append(row['href'].split('/')[4:6]) #partition 4,5 corresponds to appid and title 
    return appid_titles[:m]

In [3]:
def append_reviews(title, review_info, df):
    for review in (review_info):
        title = title
        playtime_at_review = review["author"]["playtime_at_review"]
        timestamp_created = review["timestamp_created"]
        review_text = review["review"]
        voted_up = review["voted_up"]
        votes_up = review["votes_up"]
        votes_funny = review["votes_funny"]
        weighted_vote_score = review["weighted_vote_score"]
        comment_count = review["comment_count"]
        
        df = df.append({"title":title,
                        "review_text":review_text,
                        "timestamp_created":timestamp_created,
                        "voted_up":voted_up, 
                        "votes_up":votes_up, 
                        "votes_funny":votes_funny, 
                        "weighted_vote_score": weighted_vote_score,
                        "comment_count":comment_count,
                        },
                       ignore_index=True)   
    return df

In [5]:
#review_info = get_n_reviews('1145360',n=100)
def get_review_df(num_id=1000, num_review=100):
    #initialize dataframe
    df = pd.DataFrame(columns = ["title","review_text","timestamp_created","voted_up", "votes_up", \
                             "votes_funny", "weighted_vote_score","comment_count"])
    games = get_m_appid_titles()
    num_games = len(games)
    count = 1
    for game in games:  
        appid, title = game
        review_info = get_n_reviews(appid, n=1000)
        count+1
        df = append_reviews(title, review_info, df)
        print('game {}/{}'.format(count, num_games))
    return df


In [6]:
#this will take time on pc
df = get_review_df(num_id=1000, num_review=100)
pd.set_option("display.max_colwidth",0)
df.head()

Unnamed: 0,title,review_text,timestamp_created,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count
0,Destiny_2,free to play but not free to enjoy,1643914192,True,732,102,0.9349721670150756,0
1,Destiny_2,I want Mara Sov to stomp me,1643418367,True,678,230,0.8794969320297241,25
2,Destiny_2,It's not a game it's a job,1643432021,False,421,108,0.816015601158142,0
3,Destiny_2,Horrible Game,1644883764,True,200,183,0.7835745811462402,0
4,Destiny_2,"I have spent an ungodly amount of time on this game. Every time I load the starting screen ask myself the same question...""Why?"". I later come to the same conclusion after I rage quit from playing trials, iron banner, or the crucible. After being skull fucked into oblivion for the millionth time, by a team of shotgunning apes, that conclusion is simply put. It is because i hate myself. I like to consider myself a calm and collected human being but after i play a match of trials its as if I have the overwhelming feeling to know what a shotgun shell taste like. The only thing that makes the pvp so bad in this game is the developers have no concept of matchmaking and the communities incessant need to gate keep the tower. One match you will feel like the people you are playing against are about your skill level. The next match, and every match after that, you will be matched with people that have not seen the sunlight since their mom drove them home from the hospital from their birth. If there was an exotic quest for going outside I think they would be incapable of such a feat just due to the fact they cannot get out of their chair, have you seen the south park episode where they play WoW, that is them. The only redeeming quality this game does have is blind running a dungeon and or raid. Afterwards, you just feel this emptiness because the game has this really bad practice of having the player drain their life away to find an exotic weapon where they have the off chance of acquiring. Ever heard of the Vex Mythoclast? Me neither. Do I recommend this game? Yes, if you live in your parents basement and literally having to do with your life, then fuck yeah please go for it, you fuck. Otherwise, no, you would have better time going to college, getting a degree, finding a girlfriend, getting cheated on, then finding another, getting married, having kids, having a mortgage, taking loans, getting insane credit card debt, and then finally dying before stumbling across the Vex.",1644288799,True,91,65,0.7815302610397338,0


In [7]:
import os  
os.makedirs('datasets', exist_ok=True)  
df.to_csv('datasets/steam_reviews.csv',index=False)  