# Utilizing the MtG Gatherer API I will grab all card data

In [1]:
%pip install mtgsdk

from mtgsdk import Card
from mtgsdk import Set
from mtgsdk import Type
from mtgsdk import Supertype
from mtgsdk import Subtype
from mtgsdk import Changelog

Note: you may need to restart the kernel to use updated packages.


## Card Properties Per Class

    name
    multiverse_id
    layout
    names
    mana_cost
    cmc
    colors
    color_identity
    type
    supertypes
    subtypes
    rarity
    text
    flavor
    artist
    number
    power
    toughness
    loyalty
    variations
    watermark
    border
    timeshifted
    hand
    life
    reserved
    release_date
    starter
    rulings
    foreign_names
    printings
    original_text
    original_type
    legalities
    source
    image_url
    set
    set_name
    id  

# Card Set Info

    code
    name
    gatherer_code
    old_code
    magic_cards_info_code
    release_date
    border
    type
    block
    online_only
    booster
    mkm_id
    mkm_name

In [2]:
card = Card.find(386615)

In [3]:
card.image_url

'http://gatherer.wizards.com/Handlers/Image.ashx?multiverseid=386616&type=card'

# The Gatherer API is not providing high quality images and is very slow

Each query was taking 10 seconds which is not feasible.

# Time to use ScryFall

I will use a bulk download from Scryfall.com found here https://scryfall.com/docs/api/bulk-data

The dataset I am using is the "Default Cards" which is a JSON file containing every card object on Scryfall in English or the printed language if the card is only available in one language.

With this, we can grab each image and save it for use in the model.


In [4]:
import json

# Load the JSON file
with open('./Datasets/default-cards-20240817211226.json', 'r') as file:
    cards_data = json.load(file)


## Let us look at the data a bit to see what we are working with

In [5]:
print('Length of Dataset:' + str(len(cards_data)))
print('Number of features per card:' + str(len(cards_data[0])))
print('Features:' + str(cards_data[0].keys()))

Length of Dataset:99238
Number of features per card:59
Features:dict_keys(['object', 'id', 'oracle_id', 'multiverse_ids', 'mtgo_id', 'arena_id', 'tcgplayer_id', 'name', 'lang', 'released_at', 'uri', 'scryfall_uri', 'layout', 'highres_image', 'image_status', 'image_uris', 'mana_cost', 'cmc', 'type_line', 'oracle_text', 'colors', 'color_identity', 'keywords', 'produced_mana', 'legalities', 'games', 'reserved', 'foil', 'nonfoil', 'finishes', 'oversized', 'promo', 'reprint', 'variation', 'set_id', 'set', 'set_name', 'set_type', 'set_uri', 'set_search_uri', 'scryfall_set_uri', 'rulings_uri', 'prints_search_uri', 'collector_number', 'digital', 'rarity', 'card_back_id', 'artist', 'artist_ids', 'illustration_id', 'border_color', 'frame', 'full_art', 'textless', 'booster', 'story_spotlight', 'prices', 'related_uris', 'purchase_uris'])


## I am curious to see if any cards do not have an image associated with them

In [6]:
print('Printing Unique Image Status Features:' + str(set([card['image_status'] for card in cards_data])))

Printing Unique Image Status Features:{'highres_scan', 'lowres', 'placeholder', 'missing'}


## So it seems like placeholder and missing may be an issue, time to investigate

It seems like 'missing' does indeed mean there are no images.

In [9]:
number_missing = 0

for card in cards_data:
    if card.get('image_status') == 'missing':  # Check if 'image_status' is 'missing'
        number_missing += 1
        print(f"IMAGE STATUS = missing for card: {card.get('name', 'Unknown Name')}")
        
        # Check if 'image_uris' exists in the card data
        if 'image_uris' in card:
            print(card['image_uris'])  # Print image_uris if it exists
        else:
            print("No image_uris available for this card.")  # Handle case where image_uris is missing
        
        print('-' * 50)  # Separator for better readability

print(f"Number of cards with 'image_status' = 'missing': {number_missing}")


IMAGE STATUS = missing for card: Memory Lapse // Memory Lapse
No image_uris available for this card.
--------------------------------------------------
IMAGE STATUS = missing for card: Zabaz, the Glimmerwasp // Zabaz, the Glimmerwasp
No image_uris available for this card.
--------------------------------------------------
IMAGE STATUS = missing for card: Extus, Oriq Overlord // Extus, Oriq Overlord
No image_uris available for this card.
--------------------------------------------------
IMAGE STATUS = missing for card: Prismatic Ending // Prismatic Ending
No image_uris available for this card.
--------------------------------------------------
IMAGE STATUS = missing for card: Professor of Symbology // Professor of Symbology
No image_uris available for this card.
--------------------------------------------------
IMAGE STATUS = missing for card: Arrogant Poet // Arrogant Poet
No image_uris available for this card.
--------------------------------------------------
IMAGE STATUS = missing

## It seems like placeholder means that the card image has some watermark on it

The watermark is over a low res card image and says "Localized Image Not Available"

In [11]:
num_placeholder = 0

for card in cards_data:
    if card.get('image_status') == 'placeholder':  # Check if 'image_status' is 'placeholder'
        num_placeholder += 1
        print(f"IMAGE STATUS = placeholder for card: {card.get('name', 'Unknown Name')}")
        
        # Check if 'image_uris' exists in the card data
        if 'image_uris' in card:
            print(card['image_uris'])  # Print image_uris if it exists
        else:
            print("No image_uris available for this card.")  # Handle case where image_uris is missing
        
        print('-' * 50)  # Separator for better readability

print(f"Number of cards with 'image_status' = 'placeholder': {num_placeholder}")

IMAGE STATUS = placeholder for card: Vaevictis Asmadi
{'small': 'https://cards.scryfall.io/small/front/0/0/00177fcf-92af-475a-a7f5-11ab645388a5.jpg?1562894998', 'normal': 'https://cards.scryfall.io/normal/front/0/0/00177fcf-92af-475a-a7f5-11ab645388a5.jpg?1562894998', 'large': 'https://cards.scryfall.io/large/front/0/0/00177fcf-92af-475a-a7f5-11ab645388a5.jpg?1562894998', 'png': 'https://cards.scryfall.io/png/front/0/0/00177fcf-92af-475a-a7f5-11ab645388a5.png?1562894998', 'art_crop': 'https://cards.scryfall.io/art_crop/front/0/0/00177fcf-92af-475a-a7f5-11ab645388a5.jpg?1562894998', 'border_crop': 'https://cards.scryfall.io/border_crop/front/0/0/00177fcf-92af-475a-a7f5-11ab645388a5.jpg?1562894998'}
--------------------------------------------------
IMAGE STATUS = placeholder for card: Island
{'small': 'https://cards.scryfall.io/small/front/0/0/001eb913-2afe-4d7d-89a1-7c35de92d702.jpg?1540162762', 'normal': 'https://cards.scryfall.io/normal/front/0/0/001eb913-2afe-4d7d-89a1-7c35de92d702.

# Time to remove the cards with missing or placeholder images

Removing only 729 cards out of 99238 is not big deal. These cards are most likely abscure reprints, artworks, or promos of cards.

In [12]:
total_dropped = 0

for card in cards_data:
    if card.get('image_status') == 'missing' or card.get('image_status') == 'placeholder':
        total_dropped += 1
        cards_data.remove(card)

print(f"Total number of cards dropped: {total_dropped}")

Total number of cards dropped: 729


# Now that we cleaned the data of missing images, it is time to download the images

We will be ratelimiting to 1 call per 100 milliseconds

In [None]:
import os
import time
import requests
from requests.exceptions import RequestException

# Create a directory to save the images if it doesn't exist
os.makedirs("Datasets/mtg_images", exist_ok=True)

# Function to download an image with retries
def download_image_with_retries(url, file_path, retries=3):
    attempt = 0
    while attempt < retries:
        try:
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                with open(file_path, 'wb') as f:
                    f.write(response.content)
                print(f"Downloaded {file_path}")
                return True
            else:
                print(f"Failed to download {file_path}: Status code {response.status_code}")
        except RequestException as e:
            print(f"Error downloading {file_path}: {e}")
        attempt += 1
        time.sleep(2)  # Wait before retrying
    return False

# Iterate through the cards and download images
download_rate = 10  # images per second
delay = 1 / download_rate

for card in cards_data:
    image_url = card['image_uris'].get('normal')  # Choose the desired image size (e.g., normal, large)
    card_id = card['id']
    
    if image_url:
        file_path = f"mtg_images/{card_id}.jpg"
        
        # Skip if file already exists
        if os.path.exists(file_path):
            print(f"Image {file_path} already exists. Skipping.")
            continue
        
        # Download the image with retry logic
        success = download_image_with_retries(image_url, file_path)
        
        # Wait to respect the download rate limit
        if success:
            time.sleep(delay)
