In [14]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
import time
import epicstore_api
from epicstore_api import EpicGamesStoreAPI, OfferData
import json

In [15]:
og_small = pd.read_csv("df_scraped.csv")
api = EpicGamesStoreAPI()
og_small

Unnamed: 0.1,Unnamed: 0,Number,Name,Start,End,Link
0,0,477,The Bridge,3/14/2024,3/21/2024,https://store.epicgames.com/en-US/p/the-bridge
1,1,476,Deus Ex: Mankind Divided,3/14/2024,3/21/2024,https://store.epicgames.com/en-US/p/deus-ex-ma...
2,2,475,Astro Duel 2,3/7/2024,3/14/2024,https://store.epicgames.com/en-US/p/astro-duel...
3,3,474,Aerial_Knight’s Never Yield,2/29/2024,3/7/2024,https://store.epicgames.com/en-US/p/aerial-kni...
4,4,473,Super Meat Boy Forever,2/22/2024,2/29/2024,https://store.epicgames.com/en-US/p/super-meat...
...,...,...,...,...,...,...
472,472,5,Axiom Verge,2/7/2019,2/22/2019,https://store.epicgames.com/en-US/p/axiom-verge
473,473,4,The Jackbox Party Pack,1/24/2019,2/7/2019,https://store.epicgames.com/en-US/p/jackbox-pa...
474,474,3,What Remains of Edith Finch,1/11/2019,1/24/2019,https://store.epicgames.com/en-US/p/what-remai...
475,475,2,Super Meat Boy,12/28/2018,1/10/2019,https://store.epicgames.com/en-US/p/super-meat...


In [29]:
def get_gamedata(gamename, searchsize = 5, less_strict = False):
    # takes a string gamename, searches for it (defaults to 5 search results), finds exact name/title match
    # returns dict with id, descr[iption], namespace, orig[inal]_price, fmt_orig_price (nicely formatted), and tags (list)
    dict = api.fetch_store_games(count = searchsize, keywords = gamename)
    match = None
    for element in dict["data"]["Catalog"]["searchStore"]["elements"]:
        # ideal case
        if element["title"] == gamename:
            match = element
            break

    # less strict matching mode, if enabled
    # generally want to change the api to match the og df name
    # will re-loop through returned results a second time looking for a match
    if less_strict == True:
        for element in dict["data"]["Catalog"]["searchStore"]["elements"]:
            # api space-hyphens become : like og df only, should never false positive
            if re.sub(r" -", ":", element["title"]) == gamename:
                match = element
                break
            # get rid of ALL nonalphanumerics, including special characters and whitespace
            # ALSO checks lowercase for case issues at same time
            if re.sub(r'[^a-zA-Z0-9]', '', element["title"]).lower() == re.sub(r'[^a-zA-Z0-9]', '', gamename).lower():
                match = element
                break
        # last resort, does ALL the dataframe version match the START of the api version? If so use the first result
        for element in dict["data"]["Catalog"]["searchStore"]["elements"]:
            if element["title"][:len(gamename)] == gamename:
                match = element
                break
            
            
    return_dict = {
    "id": match["id"],
    "descr": match["description"],
    "namespace": match["namespace"],
    "orig_price": match["price"]["totalPrice"]["originalPrice"],
    "fmt_orig_price": match["price"]["totalPrice"]["fmtPrice"]["originalPrice"],
    "tags": match["tags"]  # list but looks like individual dicts
    #TODO: possibly format the tags better
    #TODO: there might be one or two relevant pieces more to grab
    }
    return return_dict

# setting up a wrapped function to test other titles, eventually for the whole table, but to respect rate limits
def single_get(name, sleep_time = .5):
    time.sleep(sleep_time)
    #TODO: POTENTIALLY UNCOMMENT BELOW TEST OUTPUT PRINT STATEMENTS
    print("Attempting to grab data about", name)
    # No additional text = Success
    try:    
        done = get_gamedata(name, 5)
    #TODO: trying to think of possibility get_gamedata fails... maybe add a better fail return?
    except:
        try:
            # in this case, possibly the name wasn't within the first 5 results, so trying more and 
            print("RETRY RETRY:", name)
            done = get_better_gamedata(name, 40, True)
        except:
            # on a fail, I will return None and I will have to make this into an empty row later
            print("\tFAILED RETRY")
            done = None
    return done

In [None]:
# CAUTION: TAKES A LONG TIME TO RUN! OUTPUTS A LOT TOO!
# This goes through the entire table and uses the API for all almost 500 games
# could add optional argument sleep_time for other than .5 if I want to though
big_df = [single_get(a_game) for a_game in gdf["Name"]]  # pulls all strings from "Name" column in gdf, same order
big_df

In [None]:
# worried about data loss (long time to re-run), saving copies two ways to not overwrite by accident
# big_df_copy = big_df.copy()
# big_df_copy2 = big_df

# convert to dataframe and save to not re-run script (since with waits, takes a while)
# data = [d if d is not None else {} for d in big_df]
# adf = pd.DataFrame(data)
# adf.to_csv("~/Desktop/dshw/pypractice/df_api.csv", index = True)
# gdf.to_csv("~/Desktop/dshw/pypractice/df_scraped.csv", index = True)

# print(adf.columns, "\n", gdf.columns)
# # print(adf.head(5), gdf.head(5))
# joined = pd.merge(adf, gdf, left_index = True, right_index = True)
# print(joined.columns)
# print(joined.head(2))

In [18]:
# test bench and helper test functions off of saved CSV data

# loads the csv into "dff"
dff = pd.read_csv("df_joined.csv")

def titleresults(title, trycounts = 10):
    # returns TITLES ONLY for a default 10 search
    resdict = api.fetch_store_games(count = trycounts, keywords = title)
    match_found = 0
    for element in resdict["data"]["Catalog"]["searchStore"]["elements"]:
        if element["title"] == title:
            print("\n\nMATCH FOUND!!!!\n\n")
            match_found = 1
        print(element["title"])
    return match_found

def getname(index_num):
    # grabs the full name from index to save me typing
    return (dff.iloc[index_num]["Name"])

def jprint(thing):
    print(json.dumps(thing, indent = 2, default = str))

def testsuite(index_num):
    name = getname(index_num)
    print("-> Testing", name)
    print("A search returns the following:", titleresults(name))
    if titleresults(name) == 1:
        jprint(get_gamedata(name))
    elif titleresults(name, 100) == 1:
        jprint(get_gamedata(name))
    else:
        print("\n\nsadface") # somethign was supposed to go here

# failure mode: Doors - Paradox != Doors: Paradox
# get_gamedata("Doors: Paradox")
# jprint(api.fetch_store_games(count = 10, keywords = "Doors: Paradox"))
# also deus ex: mankind same exact (space then dash)
# SOLUTION: add text replacer? Not sure if this happens ALWAYS though.

# failure case: Love
# TWO causes for failure: too many results (doesn't appear until 60, too general)
# titleresults("Love")
# SOLUTION: ??? Do it manually?
# listed in scrape as Love but LOVE is different caps.
# SOLUTION: add text lowercase coercion?
# jprint(api.fetch_store_games(count = 70, keywords = "Love"))
# jprint(get_gamedata("Love"))
# titleresults("Love", 30)
# get_gamedata("Love")

# strange case: Destiny 2: Legacy Collection
# doesn't return in search at all, but also, 

# The Outer Worlds: Spacer's Choice Edition
# DOES return on search but gamedata doesn't return anything

# print(getname(22))

# testsuite(22)
jprint(api.fetch_store_games(count = 1, keywords = getname(2)))
astro = api.fetch_store_games(count = 1, keywords = getname(2))
jprint(astro["id"])
spacer = api.fetch_store_games(count = 10, keywords = getname(22))
print(spacer["id"])

# get_gamedata(getname(2))
# get_gamedata(getname(22))

# getname(12)
# titleresults(getname(12))
# get_gamedata(getname(12)) # it DOES get found but cannot retrieve
# jprint(api.fetch_store_games(count = 5, keywords = getname(12)))
# jprint(api.fetch_store_games(count = 5, keywords = getname(2)))

{
  "data": {
    "Catalog": {
      "searchStore": {
        "elements": [
          {
            "title": "Astro Duel 2",
            "id": "bc38eac0277d41ec955690e25779cb53",
            "namespace": "4a681a809a094e2c8dcc68353c68fed6",
            "description": "Astro Duel 2 is a sci-fi combat game combining top-down space dog fights with up-close platforming action. Whether versus or co-op, battle through space and on foot at the same time! Strike within the fully-destructible arenas or just nuke \u2018em from orbit.",
            "effectiveDate": "2024-03-07T16:00:00.000Z",
            "keyImages": [
              {
                "type": "OfferImageWide",
                "url": "https://cdn1.epicgames.com/spt-assets/3251b927449a471c883aab00c076d63c/astro-duel-2-ukpi8.png"
              },
              {
                "type": "OfferImageTall",
                "url": "https://cdn1.epicgames.com/spt-assets/3251b927449a471c883aab00c076d63c/astro-duel-2-1w3a4.png"
              

KeyError: 'id'

In [24]:
miss_inds = [1 ,3 ,8 ,10 ,12 ,22 ,28 ,35 ,42 ,66 ,67 ,68 ,
             72 ,74 ,76 ,84 ,85 ,86 ,90 ,92 ,98 ,100 ,102 ,
             103 ,106 ,109 ,112 ,119 ,121 ,127 ,128 ,130 ,133 ,
             135 ,138 ,144 ,146 ,149 ,152 ,153 ,156 ,158 ,159 ,
             160 ,166 ,171 ,173 ,189 ,193 ,195 ,200 ,202 ,205 ,
             211 ,215 ,217 ,220 ,227 ,230 ,231 ,232 ,233 ,235 ,
             236 ,239 ,250 ,252 ,258 ,270 ,271 ,274 ,275 ,277 ,
             282 ,287 ,291 ,295 ,298 ,306 ,313 ,315 ,316 ,327 ,
             330 ,332 ,334 ,335 ,337 ,339 ,340 ,342 ,343 ,348 ,
             349 ,360 ,363 ,364 ,368 ,373 ,382 ,383 ,393 ,403 ,
             404 ,416 ,426 ,431 ,437 ,438 ,439 ,440 ,441 ,442 ,444 ,445 ,462]
for i in miss_inds:
    print("\nORIGINAL:", getname(i))
    titleresults(getname(i)) 


# df entry: ":" vs api " -" with a space and hyphen
# df entry: lowercase vs api UPPERCASE variants
# df entry: no special chars, vs api R, TM, icons
    
# (LAST resort) use if all chars in df entry == same # of FIRST chars in api
#   more difficult: if 90% of chars match??
# alternate: IGNORE ALL non-space, nonalphanumeric ENTIRELY for matching!!



ORIGINAL: Deus Ex: Mankind Divided
Deus Ex - Mankind Divided
Deus Ex: Mankind Divided - Digital Deluxe Edition

ORIGINAL: Aerial_Knight’s Never Yield
Aerial_Knight's Never Yield
Aerial_Knight's Never Yield - Deluxe Edition
Aerial_Knight's We Never Yield

ORIGINAL: Doors: Paradox
Doors - Paradox

ORIGINAL: Love
20 Minutes Till Dawn
A Monster's Expedition
A Rats Quest - The Way Back Home
Acceptance
Amber
Anodyne 2 Game and Soundtrack Bundle
At Eve's Wake Definitive Edition
Bird of Paradise
Black Hair Girl is Best Girl
Blood on the Thames

ORIGINAL: Marvel’s Guardians of the Galaxy
Marvel's Guardians of the Galaxy

ORIGINAL: The Outer Worlds: Spacer’s Choice Edition
The Outer Worlds: Spacer's Choice Edition

ORIGINAL: Destiny 2: Legacy Collection

ORIGINAL: Earthlock
EARTHLOCK
Ikonei Island: An Earthlock Adventure

ORIGINAL: Q.U.B.E. 2 Season Pass
Q.U.B.E. 2 Deluxe Edition

ORIGINAL: Grime
A Woman's Lot
Agatha Christie - Murder on the Orient Express
Agatha Christie - Murder on the Orient

In [34]:
def get_better_gamedata(gamename, searchsize = 5, less_strict = False):
    # takes a string gamename, searches for it (defaults to 5 search results), finds exact name/title match
    # returns dict with id, descr[iption], namespace, orig[inal]_price, fmt_orig_price (nicely formatted), and tags (list)
    dict = api.fetch_store_games(count = searchsize, keywords = gamename)
    match = None
    for element in dict["data"]["Catalog"]["searchStore"]["elements"]:
        # ideal case
        if element["title"] == gamename:
            match = element
            break

    # less strict matching mode, if enabled
    # generally want to change the api to match the og df name
    # will re-loop through returned results a second time looking for a match
    if less_strict == True:
        for element in dict["data"]["Catalog"]["searchStore"]["elements"]:
            # api space-hyphens become : like og df only, should never false positive
            if re.sub(r" -", ":", element["title"]) == gamename:
                match = element
                break
            # get rid of ALL nonalphanumerics, including special characters and whitespace
            # ALSO checks lowercase for case issues at same time
            if re.sub(r'[^a-zA-Z0-9]', '', element["title"]).lower() == re.sub(r'[^a-zA-Z0-9]', '', gamename).lower():
                match = element
                break
        # last resort, does ALL the dataframe version match the START of the api version? If so use the first result
        for element in dict["data"]["Catalog"]["searchStore"]["elements"]:
            if element["title"][:len(gamename)] == gamename:
                match = element
                break

    # NEEDED ONLY IF not wrapped in single_get (which handles errors)
    # if match == None:
    #     return {}
    
    return_dict = {
    "id": match["id"],
    "descr": match["description"],
    "namespace": match["namespace"],
    "orig_price": match["price"]["totalPrice"]["originalPrice"],
    "fmt_orig_price": match["price"]["totalPrice"]["fmtPrice"]["originalPrice"],
    "tags": match["tags"]  # list but looks like individual dicts
    #TODO: possibly format the tags better
    #TODO: there might be one or two relevant pieces more to grab
    }
    return return_dict

In [35]:

# for i in miss_inds:
#     time.sleep(.5)
#     print("\nORIGINAL:", getname(i))
#     get_better_gamedata(getname(i), 30, True) 
miss_names = [getname(i) for i in miss_inds]
# miss_df = [single_get(a_game) for a_game in gdf["Name"]]  # pulls all strings from "Name" column in gdf, same order
miss_df = [single_get(a_game) for a_game in miss_names]
miss_df


ORIGINAL: Deus Ex: Mankind Divided

ORIGINAL: Aerial_Knight’s Never Yield

ORIGINAL: Doors: Paradox

ORIGINAL: Love

ORIGINAL: Marvel’s Guardians of the Galaxy

ORIGINAL: The Outer Worlds: Spacer’s Choice Edition

ORIGINAL: Destiny 2: Legacy Collection

ORIGINAL: Earthlock

ORIGINAL: Q.U.B.E. 2 Season Pass

ORIGINAL: Grime

ORIGINAL: The Dungeon of Naheulbeuk: The Amulet of Chaos

ORIGINAL: theHunter: Call of the Wild

ORIGINAL: Payday 2

ORIGINAL: Fallout: New Vegas Ultimate Edition

ORIGINAL: The Sims 4 The Daring Lifestyle Bundle

ORIGINAL: Second Extinction

ORIGINAL: Mordhau

ORIGINAL: Dying Light: The Following – Enhanced Edition

ORIGINAL: World of Warships – Starter Pack: Ishizuchi

ORIGINAL: Warhammer 40,000: Gladius – Relics of War

ORIGINAL: Dishonored: Death of the Outsider

ORIGINAL: Hell Is Others

ORIGINAL: Epistory: Typing Chronicles

ORIGINAL: Gamedec: Definitive Edition

ORIGINAL: Shadow Tactics: Aiko’s Choice

ORIGINAL: Dishonored: Definitive Edition

ORIGINAL: F.I.

[{'id': 'a74220fd07e34b01987fb801433fb2e6',
  'descr': 'Secure the Digital Deluxe edition now to gain access to the Season Pass, an Extra In-Game Mission and more!',
  'namespace': '5c5b2b01c8ce475d86cdd2c76fd11f7b',
  'orig_price': 4499,
  'fmt_orig_price': '$44.99',
  'tags': [{'id': '1336'}, {'id': '1370'}, {'id': '9547'}, {'id': '1084'}]},
 {'id': '2fecb1793ce446408cd869611400a797',
  'descr': "Take the role of Wally. A mysterious character that has recovered what was taken from him. Hopefully, you're fast enough to outrun your enemies.   Run, Jump, Slide or Dash for acrobatic variants leading to dope combinations and avoid the challenges that await.",
  'namespace': '2ed7ed64d8604265924b85bbd5670220',
  'orig_price': 1199,
  'fmt_orig_price': '$11.99',
  'tags': [{'id': '1216'},
   {'id': '21894'},
   {'id': '9547'},
   {'id': '9549'},
   {'id': '1263'}]},
 {'id': 'b8dee54bce824477bc0b7cb298f9f9a9',
  'descr': 'A relaxing puzzle escape game! Make your way through a variety of hand

In [53]:
# single_get(getname(1))
# for only 752pm data
miss_df2 = pd.DataFrame(miss_df)
miss_df2.head()

# if rerunning after I returned error handling to single_get instead of get_better_gamadata:
# miss_df3 = [d if d is not None else {} for d in miss_df]
# miss_df3 = pd.DataFrame(miss_df3)

# JUST RAN 8pm
# miss_df2.to_csv("~/Desktop/dshw/EGS_project/df_missings.csv", index = True)

miss_df2["OG_index"] = miss_inds
miss_df2["Name"] = miss_names

miss_df2 = miss_df2.set_index('OG_index')
# print(miss_df2.head())

# print(dff.head())
withmiss_df = dff.combine_first(miss_df2)
withmiss_df.head()
# withmiss_df.shape

withmiss_df.to_csv("~/Desktop/dshw/EGS_project/df_complete.csv", index = True)