# MTG -  Basic Lands Classifier
## Fetch Data

In this notebook, we consume [Scryfall API](https://scryfall.com/docs/api) in order to fetch the available images of basic lands which we'll use to develop our classification model.

In [1]:
import os
import pandas as pd
from joblib import Parallel, delayed
from multiprocessing import cpu_count
from requests import get
from shutil import copyfileobj
from tqdm import tqdm

pd.set_option('display.max_rows', 150)

In [2]:
# Fetch bulk data from Scryfall
bulk = ' https://api.scryfall.com/bulk-data'
response = get(bulk)
response.json()

{'object': 'list',
 'has_more': False,
 'data': [{'object': 'bulk_data',
   'id': '27bf3214-1271-490b-bdfe-c0be6c23d02e',
   'type': 'oracle_cards',
   'updated_at': '2022-01-17T10:03:58.341+00:00',
   'uri': 'https://api.scryfall.com/bulk-data/27bf3214-1271-490b-bdfe-c0be6c23d02e',
   'name': 'Oracle Cards',
   'description': 'A JSON file containing one Scryfall card object for each Oracle ID on Scryfall. The chosen sets for the cards are an attempt to return the most up-to-date recognizable version of the card.',
   'compressed_size': 13183051,
   'download_uri': 'https://c2.scryfall.com/file/scryfall-bulk/oracle-cards/oracle-cards-20220117100358.json',
   'content_type': 'application/json',
   'content_encoding': 'gzip'},
  {'object': 'bulk_data',
   'id': '6bbcf976-6369-4401-88fc-3a9e4984c305',
   'type': 'unique_artwork',
   'updated_at': '2022-01-17T10:13:55.857+00:00',
   'uri': 'https://api.scryfall.com/bulk-data/6bbcf976-6369-4401-88fc-3a9e4984c305',
   'name': 'Unique Artwork

In [3]:
# Fetch unique artworks
download_uri = response.json()['data'][1]['download_uri']
response = get(download_uri)
df = pd.DataFrame(response.json())

In [4]:
# First rows of the table
df.head()

Unnamed: 0,object,id,oracle_id,multiverse_ids,mtgo_id,mtgo_foil_id,tcgplayer_id,cardmarket_id,name,lang,...,color_indicator,life_modifier,hand_modifier,tcgplayer_etched_id,content_warning,printed_name,printed_type_line,printed_text,variation_of,flavor_name
0,card,0000579f-7b35-4ed3-b44c-db2a538066fe,44623693-51d6-49ad-8cd7-140505caf02f,[109722],25527.0,25528.0,14240.0,13850.0,Fury Sliver,en,...,,,,,,,,,,
1,card,00006596-1166-4a79-8443-ca9f82e6db4e,8ae3562f-28b7-4462-96ed-be0cf7052ccc,[189637],34586.0,34587.0,33347.0,21851.0,Kor Outfitter,en,...,,,,,,,,,,
2,card,0000cd57-91fe-411f-b798-646e965eec37,9f0d82ae-38bf-45d8-8cda-982b6ead1d72,[435231],65170.0,65171.0,145764.0,301766.0,Siren Lookout,en,...,,,,,,,,,,
3,card,0001f1ef-b957-4a55-b47f-14839cdbab6f,ef027846-be81-4959-a6b5-56bd01b1e68a,[472997],78170.0,,198861.0,400134.0,Venerable Knight,en,...,,,,,,,,,,
4,card,00020b05-ecb9-4603-8cc1-8cfa7a14befc,d96ac790-428b-4a64-8dbd-6baa73eb6210,[394089],,,95585.0,272052.0,Wildcall,en,...,,,,,,,,,,


In [5]:
# Basic info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30893 entries, 0 to 30892
Data columns (total 82 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   object               30893 non-null  object 
 1   id                   30893 non-null  object 
 2   oracle_id            30888 non-null  object 
 3   multiverse_ids       30893 non-null  object 
 4   mtgo_id              22662 non-null  float64
 5   mtgo_foil_id         16827 non-null  float64
 6   tcgplayer_id         29504 non-null  float64
 7   cardmarket_id        28198 non-null  float64
 8   name                 30893 non-null  object 
 9   lang                 30893 non-null  object 
 10  released_at          30893 non-null  object 
 11  uri                  30893 non-null  object 
 12  scryfall_uri         30893 non-null  object 
 13  layout               30893 non-null  object 
 14  highres_image        30893 non-null  bool   
 15  image_status         30893 non-null 

In [6]:
# Filter basic lands
basics = df.loc[df['name'].isin(['Forest', 'Island', 'Mountain', 'Plains', 'Swamp']),
                ['id', 'name', 'released_at', 'image_uris', 'set_name', 'collector_number', 'artist']] \
           .sort_values(['released_at', 'set_name', 'collector_number'])

In [7]:
# Extract 'art_crop_uri'
basics['art_crop_uri'] = basics['image_uris'].apply(lambda x: x['art_crop'])

In [8]:
# Random sample of the dataset
basics.sample(5)

Unnamed: 0,id,name,released_at,image_uris,set_name,collector_number,artist,art_crop_uri
29061,f108b0fb-420a-422d-ae85-9a99c0f73169,Swamp,2017-12-08,{'small': 'https://c1.scryfall.com/file/scryfa...,Unstable,214,John Avon,https://c1.scryfall.com/file/scryfall-cards/ar...
16563,8a136975-e513-4c50-9d4a-855d03630470,Plains,1998-06-24,{'small': 'https://c1.scryfall.com/file/scryfa...,Portal Second Age,152,Fred Fields,https://c1.scryfall.com/file/scryfall-cards/ar...
27420,e3133726-0eda-480c-9d67-64719cb77f1d,Swamp,2008-10-03,{'small': 'https://c1.scryfall.com/file/scryfa...,Shards of Alara,241,Aleksi Briclot,https://c1.scryfall.com/file/scryfall-cards/ar...
26239,d92ef517-2417-43a2-8b1a-0673d1531c65,Plains,2019-05-03,{'small': 'https://c1.scryfall.com/file/scryfa...,War of the Spark,250,Jonas De Ro,https://c1.scryfall.com/file/scryfall-cards/ar...
9678,51852e9d-9183-4f9a-a724-cb60cf677e38,Island,2013-09-27,{'small': 'https://c1.scryfall.com/file/scryfa...,Theros,237,Raoul Vitale,https://c1.scryfall.com/file/scryfall-cards/ar...


In [9]:
# Basic info
basics.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1246 entries, 21429 to 29093
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   id                1246 non-null   object
 1   name              1246 non-null   object
 2   released_at       1246 non-null   object
 3   image_uris        1246 non-null   object
 4   set_name          1246 non-null   object
 5   collector_number  1246 non-null   object
 6   artist            1246 non-null   object
 7   art_crop_uri      1246 non-null   object
dtypes: object(8)
memory usage: 87.6+ KB


In [10]:
# Function to get the images
def get_img(dst_dir, img_uri, fname):
    """Downloads an image from a given URI.
    
    Parameters
    ----------
    dst_dir : str
        Destination directory of the image.
    img_uri : srt
        URI from where the image is downloaded.
    fname : str
        Filename of the image.
    """
    # Filename path
    fname_path = os.path.join(dst_dir, f"{fname}.jpg")
    
    # Download the image in case we don't have it already
    if not os.path.exists(fname_path):
        try:
            with open(fname_path, 'wb') as out_file:
                copyfileobj(get(img_uri, stream=True).raw, out_file)
        except:
            pass

In [11]:
# Directory in which we'll save the images
data_dir = 'data/imgs'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

# Parallelize the tasks of getting the images
executor = Parallel(n_jobs=cpu_count())
tasks = (delayed(get_img)(data_dir, img_uri, idx) for (img_uri, idx) in
         tqdm(zip(basics['art_crop_uri'], basics['id'])))
execution = executor(tasks)

1246it [00:00, 1992.15it/s]


In [12]:
# Check whether we've successfully downloaded every image in the dataset
len(os.listdir(data_dir)) == len(basics)

True

In [13]:
# Export basics DataFrame to csv
basics.to_csv('data/basic_lands_artworks.csv', index=False)