### Let's see what people are saying about NMS (No Man's Sky) on Steam...

In [61]:
import time
import csv
import re

from selenium import webdriver as web
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import numpy as np
import pandas as pd

In [30]:
# note that in this case, there is no path as I have placed a chromedriver.exe directly in the folder
# this script is located for simplicity
chrome = web.Chrome()

In [31]:
# directing browser to the initial page for the game (275850 is No Man's Sky)
# the last numerical value in the link can easily be changed to switch to another game; or a range for many
chrome.get('https://store.steampowered.com/app/275850/')
wait = WebDriverWait(chrome,30)

In [32]:
# clicking on the view all reviews button
cross = wait.until(EC.element_to_be_clickable((By.ID, 'ViewAllReviewsall')))
cross.click()

In [33]:
# scrolling to the end of the webpage
lenOfPage = chrome.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
match=False
while(match==False):
    lastCount = lenOfPage
    time.sleep(3)
    lenOfPage = chrome.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
    if lastCount==lenOfPage:
        match=True

In [34]:
# 'apphub_CardTextContent' refers to the reviews; placing the text in a list
reviews = []

review_elements = chrome.find_elements_by_class_name('apphub_CardTextContent')
for i in review_elements:
    reviews.append(i.text)

In [35]:
# after some time, the auto-loading stops and a 'See More Content' button appears
# how many reviews have been saved so far?
len(reviews)

3050

In [39]:
# the most popular review:
reviews[0]

u'Posted: August 15\nA mile wide but an inch deep. I can not express it better.'

I have the raw text of many reviews, which include the date posted, but I don't have other info such as number of hours on record, recommended or not, etc.  This is something I'll add soon.

#### a little cleaning..

In [45]:
# type() shows us that the reviews in one giant list
type(reviews)

list

In [46]:
# the second review (0 would be the first)
reviews[1]

u'Posted: August 17\nTried very hard to enjoy the game and see it through. A mile wide but and inch deep is the perfect analogy for this experiment. Not worth the $60 price tag.'

In [48]:
# and the fourth review
reviews[4]

u'Posted: August 16\nI played this game for 23 hours, and I did not have fun.\n\nSome will say "You must have been having fun, otherwise you wouldn\'t have played it for 23 hours. Why didn\'t you stop after 5 hours?"\n\nThe answer to that is: Because I BELIEVED it would get better. I had faith. People kept saying things like this:\n\n"Planets get more interesting closer to the center."\n"You just haven\'t given the game enough time yet. It gets better once you get more inventory slots."\n"Keep playing, I\'ve had an amazing time."\n\nLets say someone came to me and said "Lets dig to the center of the Earth. Theres a pot of gold there and you will be rich. Heres a shovel", and I start digging. After 10 hours I say "This sucks. I\'m going back up the ladder."\n\nThe gentleman says "No keep digging. It will get better. It will be worth it."\n\nAfter 23 hours of digging I\'m still having a terrible time. Do I give up and say "that was no fun", or do I keep digging?\n\nHow about if I had pai

In [62]:
reviews[1]

u'Posted: August 17\nTried very hard to enjoy the game and see it through. A mile wide but and inch deep is the perfect analogy for this experiment. Not worth the $60 price tag.'

In [74]:
# we can see here these are unicode objects
type(reviews[1])

unicode

In [67]:
# this is how we would split the date off and create a list for each review
reviews[1].split("\n")

[u'Posted: August 17',
 u'Tried very hard to enjoy the game and see it through. A mile wide but and inch deep is the perfect analogy for this experiment. Not worth the $60 price tag.']

In [77]:
# creating an empty list, splitting date and review, and appending these new lists into the empty list
# effectively creating a list of lists
split_rev = []

for i in reviews:
    split_rev.append(i.split("\n"))

In [78]:
# the list of lists
split_rev

[[u'Posted: August 15',
  u'A mile wide but an inch deep. I can not express it better.'],
 [u'Posted: August 17',
  u'Tried very hard to enjoy the game and see it through. A mile wide but and inch deep is the perfect analogy for this experiment. Not worth the $60 price tag.'],
 [u'Posted: August 16',
  u'http://www.gamezone.com/news/no-man-s-sky-founder-backtracks-could-get-paid-dlc-3442515',
  u'',
  u"Sean Murray promised there would be no paid DLC, we would recieve free updates since we had already paid $60 for the game. That was last week. Looks like that's already changed as Sean has gone back on that and said that Paid DLC is possible.",
  u'',
  u'https://www.reddit.com/r/NoMansSkyTheGame/comments/4wxydf/im_about_to_meet_another_player_seriously/',
  u'',
  u'https://www.reddit.com/r/Games/comments/4y1h9i/wheres_the_no_mans_sky_we_were_sold_on_a_big_list/',
  u'',
  u'https://www.reddit.com/r/NoMansSkyTheGame/comments/4y046e/wheres_the_nms_we_were_sold_on_heres_a_big_list/',
  u

In [79]:
# a list of dates, a list of reviews
dates = [item[0] for item in split_rev]
revs = [item[1] for item in split_rev]

In [87]:
# creating a new dataframe, adding dates and reviews
# note that generally it is important to make sure that the date and reveiws still coorsepond with their original
# entries.  a quick glance above shows the first 3 adding up; are we sure?
df = pd.DataFrame({  "Date"  : dates,
                     "Reviews" : revs})

In [88]:
# the dataframe
df

Unnamed: 0,Date,Reviews
0,Posted: August 15,A mile wide but an inch deep. I can not expres...
1,Posted: August 17,Tried very hard to enjoy the game and see it t...
2,Posted: August 16,http://www.gamezone.com/news/no-man-s-sky-foun...
3,Posted: August 14,So I never knew space was so boring.
4,Posted: August 16,"I played this game for 23 hours, and I did not..."
5,Posted: August 16,"This game was falsely advertised, plain and si..."
6,Posted: August 15,I tried REALLY hard to like this game.
7,Posted: August 14,"""A mile wide but an inch deep"" was the title o..."
8,Posted: August 17,*UPDATES BELOW* READ THIS FIRST BEFORE BEING A...
9,Posted: August 12,This is a overhyped indie game made by 15 peop...


In [90]:
df.to_csv('steam_reviews.csv', encoding='utf-8')