### Configure the REST Client

In [11]:
from pokemontcgsdk import RestClient
import yaml as yaml

with open('../sensitive_config/api_config.yaml','r') as f:
    config = yaml.safe_load(f)

RestClient.configure(config['api_key'])

In [12]:
from pokemontcgsdk import Card
from pokemontcgsdk import Set
import dataclasses as dc
from datetime import datetime
import time

In [13]:
import json
from pathlib import Path
from tqdm import tqdm_notebook
import os

data_path = Path().cwd().parent / 'data'
raw_data_path = data_path / 'raw'
partitioned_by_sets_extract_path = raw_data_path / 'sets_partitioned'

In [36]:
from typing import Optional, Any, Union
from functools import reduce

## Define the helper methods that convert the SDKs output into JSON-ready outputs
def convert_dataclasses_to_dicts(dc_list: Union[list[Card], list[Set]]) -> list[dict]:
    """Convert a Python dataclass object to a dictionary for serialization to JSON"""
    return map(dc.asdict, dc_list) #[dc.asdict(dataclass) for dataclass in dc_list]

def deep_get(dictionary: dict, key_string:str, default:Optional[Any]=None) -> Any:
        """
        This helper function allows a nested key string to be searched within a nested dictionary. 
        @keystring should use periods to separate keys within the hierarchy levels. Ex: 'set.name' --> dictionary['set']['name']
        """
        return reduce(lambda d,key: d.get(key,default) if isinstance(d,dict) else default, # function
                      key_string.split('.'),                                               # sequence
                      dictionary)                                                          # intial value

def subset_fields_of_interest(data:list[dict], fields:list[str]) -> list[dict]:
    """Get a subset dictionary from a nested data container dictionary"""
    MISSING_DEFAULT = 'MISSING_KEY'
    data_subset = []
    for datum in data:
        datum_subset: dict = dict()
        for field in fields:
            value = deep_get(datum, field, default=MISSING_DEFAULT)
            if value==MISSING_DEFAULT:
                raise KeyError(f'The field "{field}" could not be found within the nested hierarchy of the data')
            else:
                datum_subset[field] = value
        data_subset.append(datum_subset)
    return data_subset

def wrap_with_metadata(data: list[dict]) -> dict:
    """Add metadata around the pulled data"""
    get_current_time = lambda: datetime.today().strftime('%m/%d/%Y %H:%M:%S')
    wrapped_data = {'pulled_on':get_current_time(),
                    'count':len(data),
                    'data':data}
    return wrapped_data

def write_to_json(dc_list: Union[list[Card], list[Set]], output_filepath:str,
                  fields_of_interest:Optional[list[str]]=None) -> None:
    """This simplifies the process of writing an SDK Object to JSON files"""
    data = convert_dataclasses_to_dicts(dc_list)
    if fields_of_interest is not None:
        data = subset_fields_of_interest(data, fields_of_interest)

    with open(output_filepath, 'w') as f:
        json.dump(wrap_with_metadata(data), f, indent=4)


## Get a Catalog of Sets:

In [37]:
sets_catalog = Set.all()
# Convert list of Dataclass objects to list of dictionaries
sets_catalog = convert_dataclasses_to_dicts(sets_catalog)

In [38]:
fields_of_interest = ['id','total']
sets_catalog_subset = subset_fields_of_interest(sets_catalog, fields_of_interest)

In [41]:
sets_catalog_subset[-5:]

[{'id': 'sv6pt5', 'total': 99},
 {'id': 'sv7', 'total': 175},
 {'id': 'sv8', 'total': 252},
 {'id': 'sv8pt5', 'total': 180},
 {'id': 'sv9', 'total': 190}]

## For Each Set, Extract the New Price Data from API

In [None]:
## Setup Query String
price_capture_fields_of_interest = ['name','id','tcgplayer','legalities']
# This is because the Dacite python package that QueryBuilder uses creates a Card dataclass from the response dictionary
# So even if the '?select=name,id,...' from price_capture_fields_of_interest is executed with a correct API response,
# there are non Optional type fields in the Card dataclass that require definition or an error occurs.
mandatory_other_fields = ['images','number','supertype', 'set'] 
fields_subset = price_capture_fields_of_interest + mandatory_other_fields
foi_string = ','.join(fields_subset)

query = {'q':'set.id:sv9', 'select':foi_string}

data = Card.where(**query)

In [47]:
len(subset_fields_of_interest(convert_dataclasses_to_dicts(data), price_capture_fields_of_interest))

190

In [None]:
for card_set in tqdm_notebook(sets_catalog_subset):
    