In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service

import re

import pandas as pd

import time

import numpy as np

import pymongo

from dotenv import load_dotenv
import os

In [2]:
url="https://steamdb.info/"
driver = webdriver.Firefox(service=Service("../project_env/geckodriver"))

In [3]:
driver.get(url)

In [4]:
most_played_button = driver.find_element(by=By.XPATH, value='//th[@class="table-title"]//a[@href="/graph/"]')
most_played_button.click()

In [5]:
show_all_button = driver.find_element(by=By.XPATH, value='//option[@value="-1"]')
show_all_button.click()

In [6]:
matches = driver.find_element(by=By.TAG_NAME, value='tbody').find_elements(by=By.TAG_NAME, value='tr')

In [7]:
games = []
for match in matches:
    games.append(match.find_element(by=By.TAG_NAME, value='a').get_attribute('href'))

In [11]:
games = list(map(lambda x: re.search('(?<=p\/)(.*)(?=\/g)', x).group(0), games))
games[:5]

['730', '570', '1599340', '578080', '1245620']

In [35]:
url="https://store.steampowered.com/app/"
games_url=[]
for game in games:
    games_url.append(url+game)

In [36]:
games_url[:5]

['https://store.steampowered.com/app/730',
 'https://store.steampowered.com/app/570',
 'https://store.steampowered.com/app/1599340',
 'https://store.steampowered.com/app/578080',
 'https://store.steampowered.com/app/1245620']

In [37]:
load_dotenv()
mongo_username = os.getenv("mongo_username")
mongo_password = os.getenv("mongo_password")

client = pymongo.MongoClient(f"mongodb+srv://{mongo_username}:{mongo_password}@cluster0.d4ojg.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")
client.list_database_names()

['SteamDB', 'admin', 'local']

In [38]:
db = client.SteamDB
db.list_collection_names()

['steam_user_games',
 'steamdb_games',
 'steam_user_reviews',
 'steam_users',
 'steam_games']

In [39]:
collection = db.steamdb_games

In [41]:
count=0
for game in games_url[count:1]:
    count+=1
    driver.get(game)
    
    gameId = re.search('\d+', game).group(0)
    
    age_gate = len(driver.find_elements(By.XPATH, '//div[@id="app_agegate"]'))
    game_page = len(driver.find_elements(By.XPATH, '//div[@class="game_page_background game"]'))

    if game_page:
        if age_gate:
            driver.find_element(by=By.XPATH, value='//option[@value="1995"]').click()
            time.sleep(1)
            driver.find_element(by=By.XPATH, value='//a[@id="view_product_page_btn"]').click()
            time.sleep(1)
        # Scraping time
        
        # Details ['TITLE', 'GENRE', 'DEVELOPER', 'PUBLISHER', 'FRANCHISE']
        details = driver.find_element(By.XPATH, '//div[@id="genresAndManufacturer"]').text
        
        rows = ['TITLE', 'GENRE', 'DEVELOPER', 'PUBLISHER', 'FRANCHISE']
        for row in rows:
            if row in details:
                locals()[row.lower()] = re.search(f'(?<={row}: )(.*)', details).group(0)
            else:
                locals()[row.lower()] = np.NaN
                
            if 'RELEASE DATE' in details:
                releaseDate = re.search(f'(?<=RELEASE DATE: )(.*)', details).group(0)
            else:
                releaseDate = np.NaN
        
        # Description
        description = driver.find_element(By.XPATH, '//div[@class="game_description_snippet"]').text
        
        # User defiend tags
        driver.find_element(By.XPATH, '//div[@class="app_tag add_button"]').click()
        userDefinedTags = driver.find_elements(By.XPATH, '//div[@class="app_tag_control popular"]')
        tags=[]
        for tag in userDefinedTags:
            tags.append(tag.text)
        driver.find_element(By.XPATH, '//div[@class="newmodal_close"]').click()
        
        # Features
        features_list = driver.find_elements(By.XPATH, '//div[@class="game_area_features_list_ctn"]/a')
        features=[]
        for feature in features_list:
            features.append(feature.text)
        
        # System Requirements
#         systemRequirements = driver.find_elements(By.XPATH, '//ul[@class="bb_ul"]')
#         minimumRequirement = ''
#         recommendedRequirement = ''
#         if systemRequirements:
#             minimumRequirement = systemRequirements[0].text
#         elif len(systemRequirements) == 2:
#             recommendedRequirement = systemRequirements[1].text
        
        # Price
        purchase = driver.find_elements(By.XPATH, '//div[@class="game_area_purchase"]')
        discount_purchase = driver.find_elements(By.XPATH, '//div[@class="discount_original_price"]')
        price = np.NaN
        if purchase:
            purchase_price = purchase[0].find_elements(By.XPATH, './/div[@class="game_purchase_price price"]')
            if purchase_price:
                price = purchase_price[0].text
            elif discount_purchase:
                price = discount_purchase[0].text

        # Reviews
        reviews = driver.find_elements(by=By.XPATH, value='//div[@id="userReviews"]/div')
        recentReviews = np.NaN
        allReviews = np.NaN
        if reviews:
            for review in reviews:
                if "RECENT" in review.text:
                    recentReviews = review.find_element(By.XPATH, './div[2]').text
                if "ALL" in review.text:
                    allReviews = review.find_element(By.XPATH, './div[2]').text
        
        # Critic score
        metaCritic = driver.find_elements(by=By.XPATH, value='//div[@id="game_area_metascore"]/div')
        if metaCritic:
            criticScore = metaCritic[0].text
        else:
            criticScore = np.NaN
            
        # Awards
        steamAwards = driver.find_elements(By.XPATH, '//div[@id="AwardsDefault"]')
        if steamAwards:
            awards = steamAwards[0].text
        else:
            awards = np.NaN

            
        # Insert to MongoDB
        data = {'gameId': gameId, 'Title': title, 'Genre': genre, 'Developer': developer, 'Publisher': publisher,
                'Franchise': franchise, 'Release_Date': releaseDate, 'Description': description,
                'Tags': tags, 'Features': features,
#                 'Minimum_System_Requirements': minimumRequirement,
#                 'Recommended_System_Requirements': recommendedRequirement,
                'Price': price,
                'Recent_Reviews': recentReviews, 'All_Reviews': allReviews, 'Meta_Critic_Score': criticScore,
                'Steam_Awards': awards}
        
#         collection.insert_one(data)
        
        # Done
        print(f"{count}: {title}...Done✔")
    else:
        print('##### Game page not found! #####')

1: Counter-Strike: Global Offensive...Done✔


In [5]:
driver.quit()

In [139]:
# collection.delete_many({})

<pymongo.results.DeleteResult at 0x7effa32f9900>