# Export steam applist

This notebook should be used to export the app list (with details about the app, not just the id) that is scraped using the `./steam-applist-scraper.py` script, since it stores the information in a binary file. This notebook will export the list in a JSON file.

To reduce the size of the final file from 2 GB to 1 GB, only apps that are games (that excludes demos and helper programs), have already been released and are not free are kept. This filtering is done with the below code.

```py
filtered_apps = {appid: details for appid, details in apps_dict.items() if (
    not details['is_free']) and details['type'] == 'game' and (not details['release_date']['coming_soon'])}
len(filtered_apps)
```

In [1]:
import pickle
from pathlib import Path
import os

checkpoint_folder = Path("checkpoints")

apps_dict_filename_prefix = 'apps_dict'
exc_apps_filename_prefix = 'excluded_apps_list'
error_apps_filename_prefix = 'error_apps_list'

apps_dict = {}
excluded_apps_list = []
error_apps_list = []

In [2]:
def check_latest_checkpoints(checkpoint_folder, apps_dict_filename_prefix, exc_apps_filename_prefix, error_apps_filename_prefix):
    # app_dict
    all_pkl = []
    
    for root, dirs, files in os.walk(checkpoint_folder):
        all_pkl = list(map(lambda f: Path(root, f), files))
        all_pkl = [p for p in all_pkl if p.suffix == '.p']
        break
            
    apps_dict_ckpt_files = [f for f in all_pkl if apps_dict_filename_prefix in f.name and "ckpt" in f.name]
    exc_apps_list_ckpt_files = [f for f in all_pkl if exc_apps_filename_prefix in f.name and "ckpt" in f.name]
    error_apps_ckpt_files = [f for f in all_pkl if error_apps_filename_prefix in f.name and 'ckpt' in f.name]

    apps_dict_ckpt_files.sort()
    exc_apps_list_ckpt_files.sort()
    error_apps_ckpt_files.sort()

    latest_apps_dict_ckpt_path = apps_dict_ckpt_files[-1] if apps_dict_ckpt_files else None
    latest_exc_apps_list_ckpt_path = exc_apps_list_ckpt_files[-1] if exc_apps_list_ckpt_files else None
    latest_error_apps_list_ckpt_path = error_apps_ckpt_files[-1] if error_apps_ckpt_files else None

    return latest_apps_dict_ckpt_path, latest_exc_apps_list_ckpt_path, latest_error_apps_list_ckpt_path

In [3]:
def load_pickle(path_to_load:Path) -> dict:
    obj = pickle.load(open(path_to_load, "rb"))
    # print(f'Successfully loaded {str(path_to_load)}')
    
    return obj

In [4]:
if not checkpoint_folder.exists():
    print(f'Fail to find checkpoint folder: {checkpoint_folder}')
    print(f'Start at blank.')

In [5]:
latest_apps_dict_ckpt_path, latest_exc_apps_list_ckpt_path, latest_error_apps_list_ckpt_path = check_latest_checkpoints(checkpoint_folder, apps_dict_filename_prefix, exc_apps_filename_prefix, error_apps_filename_prefix)

if latest_apps_dict_ckpt_path:
    apps_dict = load_pickle(latest_apps_dict_ckpt_path)
    print('Successfully load apps_dict checkpoint:', latest_apps_dict_ckpt_path)
    print(f'Number of apps in apps_dict: {len(apps_dict)}')

if latest_exc_apps_list_ckpt_path:
    excluded_apps_list = load_pickle(latest_exc_apps_list_ckpt_path)
    print("Successfully load excluded_apps_list checkpoint:", latest_exc_apps_list_ckpt_path)
    print(f'Number of apps in excluded_apps_list: {len(excluded_apps_list)}')

if latest_error_apps_list_ckpt_path:
    error_apps_list = load_pickle(latest_error_apps_list_ckpt_path)
    print("Successfully load error_apps_list checkpoint:", latest_error_apps_list_ckpt_path)
    print(f'Number of apps in error_apps_list: {len(error_apps_list)}')

Successfully load apps_dict checkpoint: checkpoints/apps_dict-ckpt-fin.p
Number of apps in apps_dict: 237721
Successfully load excluded_apps_list checkpoint: checkpoints/excluded_apps_list-ckpt-fin.p
Number of apps in excluded_apps_list: 25321
Successfully load error_apps_list checkpoint: checkpoints/error_apps_list-ckpt-fin.p
Number of apps in error_apps_list: 264


In [6]:
apps_dict_keys = list(apps_dict.keys())

In [7]:
list(apps_dict[apps_dict_keys[0]].keys())

['type',
 'name',
 'steam_appid',
 'required_age',
 'is_free',
 'detailed_description',
 'about_the_game',
 'short_description',
 'fullgame',
 'header_image',
 'capsule_image',
 'capsule_imagev5',
 'website',
 'pc_requirements',
 'mac_requirements',
 'linux_requirements',
 'developers',
 'publishers',
 'package_groups',
 'platforms',
 'categories',
 'release_date',
 'support_info',
 'background',
 'background_raw',
 'content_descriptors',
 'ratings',
 'appid']

In [9]:
app_details = apps_dict[apps_dict_keys[0]]
app_details

{'type': 'demo',
 'name': '天才退魔師才不會屈服於觸手妖魔 Demo',
 'steam_appid': 3145730,
 'required_age': 0,
 'is_free': True,
 'detailed_description': '',
 'about_the_game': '',
 'short_description': '',
 'fullgame': {'appid': '2799920',
  'name': "Talented Exorcist won't submit to Tentacle Demon"},
 'header_image': 'https://shared.akamai.steamstatic.com/store_item_assets/steam/apps/3145730/header.jpg?t=1728289934',
 'capsule_image': 'https://shared.akamai.steamstatic.com/store_item_assets/steam/apps/3145730/0e58fa032cb39d4df5d8899135fa6e07b6835555/capsule_231x87.jpg?t=1728289934',
 'capsule_imagev5': 'https://shared.akamai.steamstatic.com/store_item_assets/steam/apps/3145730/0e58fa032cb39d4df5d8899135fa6e07b6835555/capsule_184x69.jpg?t=1728289934',
 'website': None,
 'pc_requirements': {'minimum': '<strong>Minimum:</strong><br><ul class="bb_ul"><li><strong>OS *:</strong> Windows® 7/8/8.1/10/11<br></li><li><strong>Processor:</strong> Intel Core2 Duo or better<br></li><li><strong>Memory:</strong> 4 

In [10]:
app_details['is_free']

True

In [11]:
app_details['type'] == 'demo'

True

In [34]:
filtered_apps = {appid: details for appid, details in apps_dict.items() if (
    not details['is_free']) and details['type'] == 'game' and (not details['release_date']['coming_soon'])}
len(filtered_apps)

98101

In [13]:
len(apps_dict)

237721

In [38]:
filtered_apps[list(filtered_apps.keys())[15]]

{'type': 'game',
 'name': 'The medical examination diary: Teamwork in Paradise.',
 'steam_appid': 3145810,
 'required_age': 0,
 'is_free': False,
 'detailed_description': '<p class="bb_paragraph" >The story of &quot;The medical examination diary: Teamwork in Paradise.&quot;</p><p class="bb_paragraph" >changes depending on the choices you make,</p><p class="bb_paragraph" >It is a novel game with multiple endings.</p><p class="bb_paragraph" ></p><p class="bb_paragraph" >The illustrator [Moneti] draws the story,</p><p class="bb_paragraph" >Please enjoy your days of practical training with these two beautiful girls.</p><p class="bb_paragraph" >If you can keep them in a good mood,</p><p class="bb_paragraph" >a special development may be waiting for you. ......?</p><p class="bb_paragraph" ></p><p class="bb_paragraph" ></p><h2 class="bb_tag" ><strong>Feature</strong></h2><p class="bb_paragraph" ></p><ul class="bb_ul"><li><p class="bb_paragraph" >Multiple endings with choices</p></li><li><p cl

In [39]:
import json

with open('filtered_apps_dict.json', 'w') as f:
  json.dump(filtered_apps, f)

In [1]:
import json
with open('filtered_apps_dict.json', 'r') as f:
  filtered_apps = json.load(f)

In [2]:
with open('filtered_apps_ids.json', 'w') as f:
  json.dump(list(filtered_apps.keys()), f)