In [105]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import json
import re
import time

DELAY = 1  # seconds
BASE_URL = 'https://store.playstation.com'
LIST_BASE_URL = f'{BASE_URL}/en-us/pages/browse'
API_HASH = '16b0b76dac848b6e33ed088a5a3aedb738e51c9481c6a6eb6d6c1c1991ea1f39'
headers={"X-Psn-Store-Locale-Override":"en-US"}


session = requests.Session()

In [107]:
def ps_digital_scrape(pages=None):
    page = 1

    game_list = []

    # Loop through pages
    while True:
        if (pages is not None) & (page > pages):
            break

        # Retrieve list page
        response = session.get(f'{LIST_BASE_URL}/{page}')
        soup = BeautifulSoup(response.text, 'html.parser')

        # There is no clear way to know when we've arrived at the end.
        # I've found that the "All Games" header does not appear when
        # there are no more results.
        if len(soup.find_all("h1", string="All Games")) == 0:
            break

        # Get games on page
        games = soup.select('ul.psw-grid-list li')

        for game in games:
            data = {}
            path = game.select('div a')[0]['href']

            id_search = re.search('(\d*)$', path, re.IGNORECASE)
            if id_search:
                id = id_search.group(1)
            else:
                id = None

            data['concept_id'] = id
            data['url'] = f'{BASE_URL}/{path}'
            data['img'] = game.select('img.psw-l-fit-cover')[0]['src']

            print(f'id: {id}')
            print(f'url: {data["url"]}\n')

            data = scrape_game_page(data)
            data = scrape_game_api(data)
            game_list.append(data)

            time.sleep(DELAY)

        page = page + 1

    return game_list

def scrape_game_page(data):
    response = session.get(data['url'], headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    name = soup.select('h1[data-qa="mfe-game-title#name"]')
    name = name[0].text if len(name) > 0 else None
    rating = soup.select('img[data-qa="mfe-content-rating#ratingImage#image-no-js"]')
    rating = rating[0]['alt'] if len(rating) > 0 else None

    notice0 = soup.select('span[data-qa="mfe-compatibility-notices#notices#notice0#compatText"]')
    notice0 = notice0[0].text if len(notice0) > 0 else None
    notice1 = soup.select('span[data-qa="mfe-compatibility-notices#notices#notice1#compatText"]')
    notice1 = notice1[0].text if len(notice1) > 0 else None
    notice2 = soup.select('span[data-qa="mfe-compatibility-notices#notices#notice2#compatText"]')
    notice2 = notice2[0].text if len(notice2) > 0 else None
    notice3 = soup.select('span[data-qa="mfe-compatibility-notices#notices#notice3#compatText"]')
    notice3 = notice3[0].text if len(notice3) > 0 else None
    notice4 = soup.select('span[data-qa="mfe-compatibility-notices#notices#notice4#compatText"]')
    notice4 = notice4[0].text if len(notice4) > 0 else None
    notice5 = soup.select('span[data-qa="mfe-compatibility-notices#notices#notice5#compatText"]')
    notice5 = notice5[0].text if len(notice5) > 0 else None

    description = soup.select('p[data-qa="mfe-game-overview#description"]')
    description = description[0].text if len(description) > 0 else None

    platform =  soup.select('dd[data-qa="gameInfo#releaseInformation#platform-value"]')
    platform = platform[0].text if len(platform) > 0 else None
    release = soup.select('dd[data-qa="gameInfo#releaseInformation#releaseDate-value"]')
    release = release[0].text if len(release) > 0 else None
    publisher = soup.select('dd[data-qa="gameInfo#releaseInformation#publisher-value"]')
    publisher = publisher[0].text if len(publisher) > 0 else None
    genre = soup.select('dd[data-qa="gameInfo#releaseInformation#genre-value"] span')
    genre = genre[0].text if len(genre) > 0 else None
    voice_lang = soup.select('dd[data-qa="gameInfo#releaseInformation#voice-value"]')
    voice_lang = voice_lang[0].text if len(voice_lang) > 0 else None
    screen_lang = soup.select('dd[data-qa="gameInfo#releaseInformation#subtitles-value"]')
    screen_lang = screen_lang[0].text if len(screen_lang) > 0 else None

    data['name'] = name
    data['rating'] = rating
    data['notice0'] = notice0
    data['notice1'] = notice1
    data['notice2'] = notice2
    data['notice3'] = notice3
    data['notice4'] = notice4
    data['notice5'] = notice5
    data['description'] = description
    data['platform'] = platform
    data['release'] = release
    data['publisher'] = publisher
    data['genre'] = genre
    data['voice_lang'] = voice_lang
    data['screen_lang'] = screen_lang

    return data

def scrape_game_api(data):
    d = {}
    concept_id = data['concept_id']

    api_url = f'https://web.np.playstation.com/api/graphql/v1//op?operationName=queryRetrieveTelemetryDataPDPConcept&variables={{"conceptId":"{concept_id}","productId":null}}&extensions={{"persistedQuery":{{"version":1,"sha256Hash":"{API_HASH}"}}}}'
    response = session.get(api_url, headers=headers)
    j = json.loads(response.text)

    product = j['data']['conceptRetrieve']['defaultProduct']
    data['c_name'] = j['data']['conceptRetrieve']['name']
    data['c_rating'] = product['contentRating']['name']
    data['c_id'] = product['id']
    data['c_genres'] = list(map(lambda a: a['value'], product['localizedGenres']))
    data['c_default_prod_name'] = product['name']
    data['c_np_title_id'] = product['npTitleId']
    data['c_sku_id'] = product['skus'][0]['id']
    data['c_display_price'] = product['skus'][0]['displayPrice']
    data['c_price'] = product['skus'][0]['price']

    main_cta = list(filter(lambda a: a['price']['serviceBranding'][0] == 'NONE', product['webctas']))
    if len(main_cta) > 0:
        data['c_discounted_price'] = main_cta[0]['price']['discountedPrice']
    else:
        data['c_discounted_price'] = None

    return data

In [108]:
# Tests

# DRAGON BALL XENOVERSE 2 - https://store.playstation.com/en-us/concept/217012
# ELDEN RING PS4 & PS5 - https://store.playstation.com/en-us/concept/10000333
# Fortnite - https://store.playstation.com//en-us/concept/228748
# Dragon's Dogma: Dark Arisen - https://store.playstation.com//en-us/concept/223963

# data = {
#     'concept_id': '217012',
#     'url': 'https://store.playstation.com/en-us/concept/217012'
# }
# data = scrape_game_page(data)
# data = scrape_game_api(data)
# data

data = ps_digital_scrape(pages=2)

id: 228748
url: https://store.playstation.com//en-us/concept/228748

id: 10001130
url: https://store.playstation.com//en-us/concept/10001130

id: 10004836
url: https://store.playstation.com//en-us/concept/10004836

id: 10002100
url: https://store.playstation.com//en-us/concept/10002100

id: 231761
url: https://store.playstation.com//en-us/concept/231761

id: 10002456
url: https://store.playstation.com//en-us/concept/10002456

id: 201930
url: https://store.playstation.com//en-us/concept/201930

id: 10004336
url: https://store.playstation.com//en-us/concept/10004336

id: 10005744
url: https://store.playstation.com//en-us/concept/10005744

id: 232352
url: https://store.playstation.com//en-us/concept/232352

id: 10001615
url: https://store.playstation.com//en-us/concept/10001615

id: 10000896
url: https://store.playstation.com//en-us/concept/10000896

id: 10003900
url: https://store.playstation.com//en-us/concept/10003900

id: 212779
url: https://store.playstation.com//en-us/concept/212779

In [109]:
df = pd.DataFrame(data)
df

Unnamed: 0,concept_id,url,img,name,rating,notice0,notice1,notice2,notice3,notice4,...,c_name,c_rating,c_id,c_genres,c_default_prod_name,c_np_title_id,c_sku_id,c_display_price,c_price,c_discounted_price
0,228748,https://store.playstation.com//en-us/concept/2...,https://image.api.playstation.com/vulcan/ap/rn...,Fortnite,ESRB Teen,In-game purchases optional,Online play required,99 online players,,,...,Fortnite,ESRB_TEEN,UP1477-PPSA01922_00-FORTNITETESTING2,"[Action, Adventure, Unique]",Fortnite,PPSA01922_00,UP1477-PPSA01922_00-FORTNITETESTING2-U001,Free,0,Free
1,10001130,https://store.playstation.com//en-us/concept/1...,https://image.api.playstation.com/vulcan/ap/rn...,Call of Duty®: Modern Warfare® II - Cross-Gen ...,ESRB Mature 17+,PS Plus required for online play,In-game purchases optional,Supports up to 99 online players with PS Plus,Online play optional,1 - 2 players,...,Call of Duty®: Modern Warfare® II,ESRB_MATURE_17,UP0002-PPSA01649_00-CODMW2CROSSGEN01,[Action],Call of Duty®: Modern Warfare® II - Cross-Gen ...,PPSA01649_00,UP0002-PPSA01649_00-CODMW2CROSSGEN01-U004,$69.99,6999,$69.99
2,10004836,https://store.playstation.com//en-us/concept/1...,https://image.api.playstation.com/vulcan/ap/rn...,NBA 2K23,ESRB Everyone,PS Plus required for online play,In-game purchases optional,Supports up to 10 online players with PS Plus,Online play optional,1 - 4 players,...,NBA 2K23,ESRB_EVERYONE,UP1001-PPSA07169_00-NBA2K23CROSSBUY0,[Sport],NBA 2K23,PPSA07169_00,UP1001-PPSA07169_00-NBA2K23CROSSBUY0-U001,$99.99,9999,
3,10002100,https://store.playstation.com//en-us/concept/1...,https://image.api.playstation.com/vulcan/ap/rn...,FINAL FANTASY XVI,ESRB Mature 17+,1 player,Remote Play supported,,,,...,FINAL FANTASY XVI,ESRB_MATURE_17,UP0082-PPSA10664_00-FF16SIEA00000002,"[Action, Role Playing Games]",FINAL FANTASY XVI,PPSA10664_00,UP0082-PPSA10664_00-FF16SIEA00000002-U001,$69.99,6999,$69.99
4,231761,https://store.playstation.com//en-us/concept/2...,https://image.api.playstation.com/vulcan/ap/rn...,Diablo® IV - Standard Edition,ESRB Mature 17+,PS Plus required for online play,In-game purchases optional,Online play required,Supports up to 4 online players with PS Plus,1 - 2 players,...,Diablo® IV,ESRB_MATURE_17,UP0002-PPSA02442_00-DIVGAMESTANDARD1,[Unique],Diablo® IV - Standard Edition,PPSA02442_00,UP0002-PPSA02442_00-DIVGAMESTANDARD1-U003,$69.99,6999,$69.99
5,10002456,https://store.playstation.com//en-us/concept/1...,https://image.api.playstation.com/vulcan/ap/rn...,Marvel’s Spider-Man 2,ESRB Teen,1 player,Game Help supported,,,,...,Marvel's Spider-Man 2,ESRB_TEEN,UP9000-PPSA03016_00-MARVELSPIDERMAN2,[Action],Marvel’s Spider-Man 2,PPSA03016_00,UP9000-PPSA03016_00-MARVELSPIDERMAN2-U001,$69.99,6999,$69.99
6,201930,https://store.playstation.com//en-us/concept/2...,https://image.api.playstation.com/vulcan/ap/rn...,Grand Theft Auto Online (PlayStation®5),ESRB Mature,PS Plus required for online play,In-game purchases optional,Online play required,Supports up to 30 online players with PS Plus,Remote Play supported,...,Grand Theft Auto V (PlayStation®5),ESRB_MATURE,UP1004-PPSA03420_00-GTAOSTANDALONE01,"[Action, Adventure]",Grand Theft Auto Online (PlayStation®5),PPSA03420_00,UP1004-PPSA03420_00-GTAOSTANDALONE01-U003,$19.99,1999,$19.99
7,10004336,https://store.playstation.com//en-us/concept/1...,https://image.api.playstation.com/vulcan/ap/rn...,EA SPORTS™ FIFA 23 PS5™,ESRB Everyone,PS Plus required for online play,In-game purchases optional,Supports up to 22 online players with PS Plus,Online play optional,1 - 4 players,...,EA SPORTS™ FIFA 23,ESRB_EVERYONE,UP0006-PPSA06279_00-FIFAFOOTBALL2023,[Sport],EA SPORTS™ FIFA 23 PS5™,PPSA06279_00,UP0006-PPSA06279_00-FIFAFOOTBALL2023-U001,$69.99,6999,$17.49
8,10005744,https://store.playstation.com//en-us/concept/1...,https://image.api.playstation.com/vulcan/ap/rn...,MLB® The Show™ 23 PS5™,ESRB Everyone,PS Plus required for online play,In-game purchases optional,Supports up to 8 online players with PS Plus,Online play optional,1 - 4 players,...,MLB® The Show™ 23,ESRB_EVERYONE,UP9000-PPSA09243_00-MLBTHESHOW23SHIP,[Sport],MLB® The Show™ 23 PS5™,PPSA09243_00,UP9000-PPSA09243_00-MLBTHESHOW23SHIP-U003,$69.99,6999,$69.99
9,232352,https://store.playstation.com//en-us/concept/2...,https://image.api.playstation.com/vulcan/ap/rn...,Apex Legends™ PS5™,ESRB Teen,In-game purchases optional,Online play required,60 online players,,,...,Apex Legends™,ESRB_TEEN,UP0006-PPSA04873_00-APEXLEGENDRSPWN1,"[Action, Shooter]",Apex Legends™ PS5™,PPSA04873_00,UP0006-PPSA04873_00-APEXLEGENDRSPWN1-U003,Free,0,Free


In [96]:
concept_id = 204861
hash = '16b0b76dac848b6e33ed088a5a3aedb738e51c9481c6a6eb6d6c1c1991ea1f39'

test = f'https://web.np.playstation.com/api/graphql/v1//op?operationName=queryRetrieveTelemetryDataPDPConcept&variables={{"conceptId":"{concept_id}","productId":null}}&extensions={{"persistedQuery":{{"version":1,"sha256Hash":"{hash}"}}}}'

headers={"X-Psn-Store-Locale-Override":"en-US"}

response = session.get(test, headers=headers)

In [93]:
d = {}

j = json.loads(response.text)
product = j['data']['conceptRetrieve']['defaultProduct']

d['c_name'] = j['data']['conceptRetrieve']['name']
d['c_rating'] = product['contentRating']['name']
d['c_id'] = product['id']
d['c_genres'] = list(map(lambda a: a['value'], product['localizedGenres']))
d['c_default_prod_name'] = product['name']
d['c_np_title_id'] = product['npTitleId']
d['c_sku_id'] = product['skus'][0]['id']
d['c_display_price'] = product['skus'][0]['displayPrice']
d['c_price'] = product['skus'][0]['price']

main_cta = list(filter(lambda a: a['price']['serviceBranding'][0] == 'NONE', product['webctas']))
if len(main_cta) > 0:
    d['c_discounted_price'] = main_cta[0]['price']['discountedPrice']
else:
    d['c_discounted_price'] = None

d




{'c_name': 'DRAGON BALL XENOVERSE 2',
 'c_rating': 'ESRB_TEEN',
 'c_id': 'UP0700-CUSA05350_00-DRAGONBALLXV2002',
 'c_genres': ['Action', 'Casual', 'Fighting'],
 'c_default_prod_name': 'DRAGON BALL XENOVERSE 2',
 'c_np_title_id': 'CUSA05350_00',
 'c_sku_id': 'UP0700-CUSA05350_00-DRAGONBALLXV2002-U001',
 'c_display_price': '$59.99',
 'c_price': 5999,
 'c_discounted_price': '$8.99'}