In [2]:
import requests
import time
import pickle
import os
import CF_standard_functions as cf_sf # Custom functions 
import pandas as pd
import numpy as np

In [3]:
# Set these parameters for saving and loading data
output_dir = os.getcwd()
file_name = 'CardData.pkl'
full_file = output_dir+"\\"+file_name

**Get Data from Scryfall**

https://scryfall.com/

For Scryfall we put the search query in as plain text as though it were the search
bar of the website. However we have to use some ASCII encoding of special 
characters (like =, <) as they have specific meanings used in an html string.
For example, %3D represents an equals sign , %3A represents a colon.
Look these up here: 

https://www.w3schools.com/tags/ref_urlencode.ASP

Separate clauses of the search are split with a plus sign.
You can use f-string interpolation to make it easier to edit the query.

In [165]:
# Scryfall query parameters:
order = "cmc"
# Unique printings of cards that have flavor text, English prints only, and ignoring "funny" cards (like thise from the un-sets)
querystring = "has%3Aflavor+unique%3Aprints+%2Dis%3Dfunny+language%3Den&unique=prints"
url = f"https://api.scryfall.com/cards/search?order={order}&q={querystring}"

In [5]:
print(cf_sf.file_age_in_hours(full_file))
print(os.path.isfile(full_file))
print(cf_sf.file_age_in_hours(full_file))


166.3744252928098
True
166.37442557056744


In [167]:
# First check if data already exists and is "fresh". If so, load it in:
# Here defining fresh as newer than a week
if os.path.isfile(full_file) and cf_sf.file_age_in_hours(full_file) < 24*3 :
    print('Loading data from disk...')
    file = open(full_file, 'rb')
    data = pickle.load(file)
    file.close()
    print('Done!')
else:# Otherwise get the data from Scryfall. This can take a while... 
    sf_response = requests.get(url).json()
    data = sf_response['data']
    curr_page = 1;
    print(f"Page {curr_page}",end='\r')
    while sf_response['has_more']:  
        time.sleep(0.2) # Be polite and patient with the API
        url_next = sf_response['next_page']
        sf_response = requests.get(url_next).json()
        data_new = sf_response['data']
        data = data + data_new
        curr_page += 1 
        print(f"Page {curr_page}",end='\r')
    print(f"Loaded {curr_page} pages of cards")
    
    print("Saving...")
    file = open(full_file,'wb')    
    pickle.dump(data, file)
    file.close()
    print("Done!")
    
nCards = len(data)
print(nCards,"cards in data set")

Loaded 230 pages of cards
Saving...
Done!
40195 cards in data set


### Organize and Trim

Convert the list to a Pandas dataframe and trim down to unique instances of flavor text.\
To make separating cards by colors easier, we add a Boolean variable to the dataframe for each color.\
We also add a Boolean for whether or not a card is monocolored (not colorless and not multicolored).

In [190]:
data = pd.DataFrame(data)
data.groupby('flavor_text',as_index=False).first()

data.head()

Unnamed: 0,object,id,oracle_id,multiverse_ids,mtgo_id,tcgplayer_id,cardmarket_id,name,lang,released_at,...,power,toughness,color_indicator,loyalty,is_W,is_U,is_B,is_R,is_G,is_monocolor
0,card,a95b7645-154f-4904-bf71-db7eb24d4df2,a3da7d5b-2c2b-45fe-b9c5-413b8c8fc0a2,[489982],82652.0,218575.0,483994.0,Academy Ruins,en,2020-08-07,...,,,,,False,False,False,False,False,False
1,card,aa2c9306-b065-4813-8817-ae46a7725b5e,a3da7d5b-2c2b-45fe-b9c5-413b8c8fc0a2,[370424],49054.0,68454.0,262142.0,Academy Ruins,en,2013-06-07,...,,,,,False,False,False,False,False,False
2,card,af09af65-0a3b-42df-b0fd-372e2158beac,a3da7d5b-2c2b-45fe-b9c5-413b8c8fc0a2,[116725],25739.0,14150.0,13757.0,Academy Ruins,en,2006-10-06,...,,,,,False,False,False,False,False,False
3,card,579cce0c-5afe-4104-97e8-fd303e8fcc28,a3da7d5b-2c2b-45fe-b9c5-413b8c8fc0a2,[],,275318.0,,Academy Ruins,en,2022-04-22,...,,,,,False,False,False,False,False,False
4,card,69e55604-56da-44b5-aa78-f5de76ce9d20,a3da7d5b-2c2b-45fe-b9c5-413b8c8fc0a2,[491378],,218577.0,483999.0,Academy Ruins,en,2020-08-07,...,,,,,False,False,False,False,False,False


In [191]:
for c in ['W','U','B','R','G']:
    tf = data.colors.apply(lambda x : np.shape(x) != (0,) and np.any(np.isin(x,c)))
    data['is_'+c] = tf

tf = np.sum(data.loc[:][['is_W','is_U','is_B','is_R','is_G']],axis=1) == 1
data['is_monocolor'] = tf

data_monocolor = data.loc[data.is_monocolor][['name','colors','flavor_text']].copy()
data_monocolor.reset_index(drop=True, inplace=True)


  mask |= (ar1 == a)


## Subset monocolored cards
In our first task we will classify flavor text by color. For convenience, subset only the name, flavortext, and color of monocolored cards.\
For our classifier we need numeric groups, so we will make numeric categories for the colors using ScikitLearn.\
(sklearns method is a bit overkill here, since we only have 5 categories. But it is practical if we have >dozens of categories) 

For this to work, we must first convert the **colors** column from an array to a string.

In [226]:

data_monocolor['colors'] = data_monocolor.colors.apply(lambda x: ''.join(x))

from sklearn import preprocessing

le = preprocessing.LabelEncoder()
le.fit(data_monocolor.colors)
data_monocolor['categorical_label'] = le.transform(data_monocolor.colors)
data_monocolor.sample(15)
#Transform labels back to original encoding.
# le.inverse_transform(data_monocolor['categorical_label'])


Unnamed: 0,name,colors,flavor_text,categorical_label
10559,Walking Corpse,B,"""Is it hopeless? When wave upon wave of our de...",0
16378,Sage's Row Denizen,U,"""I offer you wisdom untainted by false loyalty...",3
29506,"Razaketh, the Foulblooded",B,Liliana insisted on coming to Amonkhet to conf...,0
26361,Soulbound Guardians,W,The kor's most righteous dead are given the gr...,4
15190,Monk Idealist,W,"""Belief is the strongest mortar.""",4
26557,Surrakar Banisher,U,Surrakar grab things they *don't* want.,3
837,Doomed Traveler,W,He vowed he would never rest until he reached ...,4
3450,Swords to Plowshares,W,The smallest seed of regret can bloom into red...,4
15576,"Pang Tong, ""Young Phoenix""",W,""". . . It was Pang Tong's boat-connecting sche...",4
12620,Dream Cache,U,Dreams' riches are easily spent.\n—Suq'Ata adage,3


'b'