# Getting Started

Scryfall API:
"We kindly ask that you insert 50 – 100 milliseconds of delay between the requests you send to the server. (i.e., 10 requests per second on average)."

In [1]:
# import all the things
import numpy as np
import pandas as pd
import json, requests, pickle
from bs4 import BeautifulSoup

In [2]:
# Get setlist into dataframe
link = 'https://api.scryfall.com/sets'
response = requests.get(link)
sets = response.json()['data']
sets_df = pd.DataFrame(sets)

## Clean up set data 

In [3]:
# Ignore sets with no release date (likely promotional / outliers), set release date to index
sets_df.dropna(subset=['released_at'], inplace=True)
sets_df.set_index('released_at', inplace=True)

# Remove repetitive information and online data
sets_df.drop(['digital','mtgo_code','parent_set_code','object'], axis=1, inplace=True)

In [4]:
# Make even cleaner for ease of exploration
clean_df = sets_df[['name','code','block_code','card_count','set_type']]
clean_df['set_type'].unique()

array(['masters', 'box', 'expansion', 'commander', 'memorabilia',
       'starter', 'core', 'duel_deck', 'spellbook', 'draft_innovation',
       'funny', 'from_the_vault', 'archenemy', 'planechase',
       'treasure_chest', 'premium_deck'], dtype=object)

In [5]:
# after extensive manual checking of set legality / onlineness
sets_to_drop = [
    'me1',
    'me2',
    'me3',
    'me4',
    'vma',
    'tpr',
    'e02',
    'gnt',
    'td0',
    'mgb',
    'ana',
    'w17',
    'w16',
    'itp'
]
types_to_drop = [
    'memorabilia',
    'funny',
    'treasure_chest'
]

In [6]:
final_sets = sets_df[sets_df['code'].apply(lambda x: x not in sets_to_drop)]
final_sets = final_sets[final_sets['set_type'].apply(lambda x: x not in types_to_drop)]
final_sets.head()

Unnamed: 0_level_0,block,block_code,card_count,code,foil_only,icon_svg_uri,name,scryfall_uri,search_uri,set_type,uri
released_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-12-07,,,41,uma,False,https://img.scryfall.com/sets/uma.svg?1541394000,Ultimate Masters,https://scryfall.com/sets/uma,https://api.scryfall.com/cards/search?order=se...,masters,https://api.scryfall.com/sets/uma
2018-10-05,Guilds of Ravnica,grn,273,grn,False,https://img.scryfall.com/sets/grn.svg?1541394000,Guilds of Ravnica,https://scryfall.com/sets/grn,https://api.scryfall.com/cards/search?order=se...,expansion,https://api.scryfall.com/sets/grn
2018-08-09,,,307,c18,False,https://img.scryfall.com/sets/c18.svg?1541394000,Commander 2018,https://scryfall.com/sets/c18,https://api.scryfall.com/cards/search?order=se...,commander,https://api.scryfall.com/sets/c18
2018-07-13,,,314,m19,False,https://img.scryfall.com/sets/m19.svg?1541394000,Core Set 2019,https://scryfall.com/sets/m19,https://api.scryfall.com/cards/search?order=se...,core,https://api.scryfall.com/sets/m19
2018-06-22,,,41,gs1,False,https://img.scryfall.com/sets/gs1.svg?1541394000,Global Series Jiang Yanggu & Mu Yanling,https://scryfall.com/sets/gs1,https://api.scryfall.com/cards/search?order=se...,duel_deck,https://api.scryfall.com/sets/gs1


## Get Card Data

In [7]:
# get 1 page
link = 'https://api.scryfall.com/cards?page=1'
response = requests.get(link)
cards = response.json()['data']
cards_df = pd.DataFrame(cards)

In [8]:
cards_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175 entries, 0 to 174
Data columns (total 55 columns):
all_parts            2 non-null object
artist               175 non-null object
border_color         175 non-null object
card_faces           10 non-null object
cmc                  175 non-null float64
collector_number     175 non-null object
color_identity       175 non-null object
colors               165 non-null object
colorshifted         175 non-null bool
digital              175 non-null bool
edhrec_rank          136 non-null float64
eur                  14 non-null object
flavor_text          78 non-null object
foil                 175 non-null bool
frame                175 non-null object
full_art             175 non-null bool
futureshifted        175 non-null bool
highres_image        175 non-null bool
id                   175 non-null object
illustration_id      112 non-null object
image_uris           165 non-null object
lang                 175 non-null object
layout  

In [9]:
# Filters: not legal in vintage (tokens, joke cards, conspiracies, etc.), only english cards
clean_cards = cards_df[(cards_df['legalities'].apply(lambda x: x['vintage']!='not_legal')) & (cards_df['lang']=='en')]
clean_cards.set_index('id', inplace=True)
clean_cards.info()

<class 'pandas.core.frame.DataFrame'>
Index: 148 entries, ff92804a-0c62-4eb8-bbba-f1ca6f426b6e to 1895cf0c-9c2d-41f9-9819-7348ac9e25f0
Data columns (total 54 columns):
all_parts            0 non-null object
artist               148 non-null object
border_color         148 non-null object
card_faces           0 non-null object
cmc                  148 non-null float64
collector_number     148 non-null object
color_identity       148 non-null object
colors               148 non-null object
colorshifted         148 non-null bool
digital              148 non-null bool
edhrec_rank          136 non-null float64
eur                  9 non-null object
flavor_text          73 non-null object
foil                 148 non-null bool
frame                148 non-null object
full_art             148 non-null bool
futureshifted        148 non-null bool
highres_image        148 non-null bool
illustration_id      95 non-null object
image_uris           148 non-null object
lang                 148 non-n

In [10]:
# Features to keep
MVP_features = [
    'name',
    'set_name',
    'type_line',    
    'mana_cost',
    'rarity',
    'oracle_text',
    'power',
    'toughness',
    'loyalty',
    'cmc',
    'set',
    'color_identity',
    'colors',    
    'reprint',
    'layout',
    'legalities',
]

misc_features = [
    'all_parts',
    'artist',
    'border_color',
    'card_faces',
    'edhrec_rank',
    'flavor_text',
    'foil',
    'nonfoil',
    'full_art',
    'watermark'    
    'timeshifted',
    'colorshifted',
    'futureshifted',
    'illustration_id',
    'multiverse_ids',
    'oracle_id',
    'prints_search_uri',
    'rulings_uri',
    'set_search_uri',
]

In [11]:
clean_cards[MVP_features].head()

Unnamed: 0_level_0,name,set_name,type_line,mana_cost,rarity,oracle_text,power,toughness,loyalty,cmc,set,color_identity,colors,reprint,layout,legalities
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
ff92804a-0c62-4eb8-bbba-f1ca6f426b6e,"Urborg, Tomb of Yawgmoth",Ultimate Box Topper,Legendary Land,,mythic,Each land is a Swamp in addition to its other ...,,,,0.0,puma,[],[],True,normal,"{'standard': 'not_legal', 'future': 'not_legal..."
5e63fd70-ca5b-45fd-b551-9ebe02410e9c,Stirring Wildwood,Ultimate Box Topper,Land,,mythic,Stirring Wildwood enters the battlefield tappe...,,,,0.0,puma,"[G, W]",[],True,normal,"{'standard': 'not_legal', 'future': 'not_legal..."
54f41726-e0bb-4154-a2db-4b68b50f5032,Raging Ravine,Ultimate Box Topper,Land,,mythic,Raging Ravine enters the battlefield tapped.\n...,,,,0.0,puma,"[G, R]",[],True,normal,"{'standard': 'not_legal', 'future': 'not_legal..."
7c9fb3d9-e018-4aa3-9c14-1a51fae176b4,Lavaclaw Reaches,Ultimate Box Topper,Land,,mythic,Lavaclaw Reaches enters the battlefield tapped...,,,,0.0,puma,"[B, R]",[],True,normal,"{'standard': 'not_legal', 'future': 'not_legal..."
ff790ded-af9f-4e93-84b7-ddadff5ccad4,Karakas,Ultimate Box Topper,Legendary Land,,mythic,{T}: Add {W}.\n{T}: Return target legendary cr...,,,,0.0,puma,[W],[],True,normal,"{'standard': 'not_legal', 'future': 'not_legal..."


Next Step: Write loop into scraper, get all the cards! Get the hell off jupyter ntoebooK!

### Formats
Starting w/ modern:
* Standard
* Modern
* Extended
* Legacy
* Vintage
* Block Constructed (deprecated)
* Extended (deprecated)
* Commander
* Casual

Questions:
    Should I only care about non foils?
    Should I do reprint / set search on my own, or use the API? 

## Post-Scryfall Scrape: How do the cards look?

In [133]:
all_cards_df = pd.read_csv('all_vintage_cards.csv')
all_cards_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41291 entries, 0 to 41290
Data columns (total 17 columns):
id                41291 non-null object
name              41291 non-null object
set_name          41291 non-null object
type_line         41291 non-null object
mana_cost         36309 non-null object
rarity            41291 non-null object
oracle_text       40216 non-null object
power             18913 non-null object
toughness         18913 non-null object
loyalty           306 non-null object
cmc               41291 non-null float64
set               41291 non-null object
color_identity    41291 non-null object
colors            41113 non-null object
reprint           41291 non-null bool
layout            41291 non-null object
legalities        41291 non-null object
dtypes: bool(1), float64(1), object(15)
memory usage: 5.1+ MB


## MTGPrice Scraping

In [74]:
# trying slimit parser
from slimit import ast
from slimit.parser import Parser
from slimit.visitors import nodevisitor

# Turn card data into soup
link = 'https://www.mtgprice.com/sets/Visions/Vampiric_Tutor'
soup = BeautifulSoup(requests.get(link).content, 'html.parser')

# GET RESULTS
text_to_find = 'var results = ['
history=[]
for script in soup.findAll('script', type='text/javascript'):
    if text_to_find in script.text:
        parser = Parser()
        tree = parser.parse(script.text)
        for node in nodevisitor.visit(tree):
            if isinstance(node, ast.Assign) and getattr(node.left, 'value', '') == "\"data\"":
                for prices in node.right.items:
                    history.append([prices.items[0].value,prices.items[1].value])
                break
print(np.array(history).shape)



(1787, 2)


### Let's try a whole set

In [127]:
setname = 'Visions'
cards = [
    'Vampiric Tutor',
    'Squandered Resources',
    'Anvil of Bogardan'
]

hist_dict = {}

for card in cards:
    # Turn card data into soup
    link = 'https://www.mtgprice.com/sets/Visions/'+'_'.join(card.split(' '))
    soup = BeautifulSoup(requests.get(link).content, 'html.parser')

    # GET RESULTS
    text_to_find = 'var results = ['
    history=[]
    for script in soup.findAll('script', type='text/javascript'):
        if text_to_find in script.text:
            parser = Parser()
            tree = parser.parse(script.text)
            for node in nodevisitor.visit(tree):
                if isinstance(node, ast.Assign) and getattr(node.left, 'value', '') == "\"data\"":
                    for prices in node.right.items:
                        history.append([prices.items[0].value,prices.items[1].value])
                    break
    hist_dict[card] = np.array(history)
print(hist_dict)



{'Vampiric Tutor': array([['1327305600000', '24.99'],
       ['1327392000000', '22.49'],
       ['1327478400000', '19.99'],
       ...,
       ['1539676800000', '51.025'],
       ['1539763200000', '51.025'],
       ['1541404800000', '45.98']], dtype='<U13'), 'Squandered Resources': array([['1327305600000', '0.89'],
       ['1327392000000', '0.89'],
       ['1331193600000', '1.5125'],
       ...,
       ['1539676800000', '20.83'],
       ['1539763200000', '20.83'],
       ['1541404800000', '18.0']], dtype='<U13'), 'Anvil of Bogardan': array([['1327305600000', '5.74'],
       ['1327392000000', '5.74'],
       ['1327651200000', '7.555'],
       ...,
       ['1539676800000', '18.630001'],
       ['1539763200000', '18.630001'],
       ['1541404800000', '14.93']], dtype='<U13')}


In [130]:
hist_dict['Anvil of Bogardan'][-1]

array(['1541404800000', '14.93'], dtype='<U13')

In [132]:
all_cards_df.head()

NameError: name 'all_cards_df' is not defined