# Scryfall ETL and Star Schema Construction
### API Doc: https://scryfall.com/docs/api

In [1]:
import requests
import pandas as pd
from datetime import datetime
import os

# Step 1: Retrieve Bulk Data

In [2]:
bulk_response = requests.get("https://api.scryfall.com/bulk-data")
bulk_response.raise_for_status()
bulk_data = bulk_response.json()
default_cards_info = next(item for item in bulk_data["data"] if item["type"] == "default_cards")
cards_response = requests.get(default_cards_info["download_uri"])
cards_response.raise_for_status()
cards_json = cards_response.json()
df = pd.json_normalize(cards_json)

# Step 2: Derived Attributes

In [3]:
df["is_multicolor"] = df["color_identity"].apply(lambda x: isinstance(x, list) and len(x) > 1)
df["is_colorless"] = df["color_identity"].apply(lambda x: isinstance(x, list) and len(x) == 0)

# Utility Functions

In [4]:
def explode_list_column(df, id_col, list_col):
    df_copy = df[[id_col, list_col]].copy()
    df_copy = df_copy.explode(list_col).dropna().reset_index(drop=True)
    df_copy.columns = [id_col, list_col.rstrip('s')]
    return df_copy

# Step 3: Dim Tables

## DIM Colors (One-To-Many)

In [5]:
colors_dim_df = explode_list_column(df, id_col="id", list_col="color_identity")

In [6]:
colors_dim_df

Unnamed: 0,id,color_identity
0,0000419b-0bba-4488-8f7a-6194544ce91e,G
1,0000579f-7b35-4ed3-b44c-db2a538066fe,R
2,00006596-1166-4a79-8443-ca9f82e6db4e,W
3,0000a54c-a511-4925-92dc-01b937f9afad,W
4,0000cd57-91fe-411f-b798-646e965eec37,U
...,...,...
116494,fffc85fb-1a40-4f83-a36e-cec0b7be658a,U
116495,fffce2f7-b619-4483-a75e-916343194641,G
116496,fffdf7f3-a230-417a-883a-069aabcbcca7,B
116497,fffe7b2b-22c3-4e6a-9b1b-c6d7b29b9f86,W


## DIM Keywords (One-To-Many)

In [7]:
keywords_dim_df = explode_list_column(df, id_col="id", list_col="keywords")

In [8]:
keywords_dim_df

Unnamed: 0,id,keyword
0,0000a54c-a511-4925-92dc-01b937f9afad,Flying
1,0000cd57-91fe-411f-b798-646e965eec37,Flying
2,0000cd57-91fe-411f-b798-646e965eec37,Explore
3,00012bd8-ed68-4978-a22d-f450c8a6e048,Enchant
4,0001c639-8bd0-426f-89cb-4ca61f3cc054,Paradox
...,...,...
60323,fff9989f-77a3-4f73-ade6-c04306c98501,Morbid
60324,fff9ed67-3c45-48ff-a1e7-f95ff35b782b,Cycling
60325,fffc85fb-1a40-4f83-a36e-cec0b7be658a,Fight
60326,fffdf7f3-a230-417a-883a-069aabcbcca7,Flying


## DIM Rarity (One-to-One)

In [9]:
ordered_rarities = ['common', 'uncommon', 'rare', 'mythic', 'bonus', 'special']
rarity_mapping = {rarity: idx + 1 for idx, rarity in enumerate(ordered_rarities)}
df['rarity_id'] = df['rarity'].map(rarity_mapping)
dim_rarity_df = pd.DataFrame({'rarity': ordered_rarities})
dim_rarity_df['rarity_id'] = dim_rarity_df.index + 1
dim_rarity_df = dim_rarity_df[['rarity_id', 'rarity']]

In [10]:
dim_rarity_df

Unnamed: 0,rarity_id,rarity
0,1,common
1,2,uncommon
2,3,rare
3,4,mythic
4,5,bonus
5,6,special


## DIM Set (One-to-Many) 

In [11]:
dim_set_df = df[['set', 'set_name', 'set_type']].drop_duplicates().reset_index(drop=True)
dim_set_df['set_id'] = dim_set_df.index + 1
set_mapping = dict(zip(dim_set_df['set'], dim_set_df['set_id']))
df['set_id'] = df['set'].map(set_mapping)
dim_set_df = dim_set_df[['set_id', 'set', 'set_name', 'set_type']]

In [12]:
dim_set_df

Unnamed: 0,set_id,set,set_name,set_type
0,1,blb,Bloomburrow,expansion
1,2,tsp,Time Spiral,expansion
2,3,zen,Zendikar,expansion
3,4,tmm2,Modern Masters 2015 Tokens,token
4,5,xln,Ixalan,expansion
...,...,...,...,...
968,969,pchk,Champions of Kamigawa Promos,promo
969,970,ovoc,Crimson Vow Commander Display Commanders,memorabilia
970,971,ppcy,Prophecy Promos,promo
971,972,plny,Lunar New Year 2018,promo


## DIM Legalities (One-to-One)

In [13]:
legal_cols = [col for col in df.columns if col.startswith("legalities.")]
dim_legalities_df = df[['id'] + legal_cols].copy()
dim_legalities_df.columns = dim_legalities_df.columns.str.replace('legalities.', '', regex=False)

In [14]:
dim_legalities_df

Unnamed: 0,id,standard,future,historic,timeless,gladiator,pioneer,modern,legacy,pauper,...,commander,oathbreaker,standardbrawl,brawl,alchemy,paupercommander,duel,oldschool,premodern,predh
0,0000419b-0bba-4488-8f7a-6194544ce91e,legal,legal,legal,legal,legal,legal,legal,legal,legal,...,legal,legal,legal,legal,legal,legal,legal,not_legal,legal,legal
1,0000579f-7b35-4ed3-b44c-db2a538066fe,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,legal,legal,not_legal,...,legal,legal,not_legal,not_legal,not_legal,not_legal,legal,not_legal,not_legal,legal
2,00006596-1166-4a79-8443-ca9f82e6db4e,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,legal,legal,legal,...,legal,legal,not_legal,not_legal,not_legal,legal,legal,not_legal,not_legal,legal
3,0000a54c-a511-4925-92dc-01b937f9afad,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,...,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal
4,0000cd57-91fe-411f-b798-646e965eec37,not_legal,not_legal,legal,legal,legal,legal,legal,legal,legal,...,legal,legal,not_legal,legal,not_legal,legal,legal,not_legal,not_legal,not_legal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107792,fffcbc4e-c6dc-4808-b262-f7c453e74dd8,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,...,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal
107793,fffce2f7-b619-4483-a75e-916343194641,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,legal,legal,legal,...,legal,legal,not_legal,not_legal,not_legal,legal,legal,not_legal,legal,legal
107794,fffdf7f3-a230-417a-883a-069aabcbcca7,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,legal,not_legal,...,legal,legal,not_legal,not_legal,not_legal,not_legal,legal,not_legal,not_legal,not_legal
107795,fffe7b2b-22c3-4e6a-9b1b-c6d7b29b9f86,not_legal,not_legal,legal,legal,legal,legal,legal,legal,legal,...,legal,legal,not_legal,legal,not_legal,legal,legal,not_legal,not_legal,not_legal


## Dim Type (One-To-One)

In [15]:
# Dim Type Table
df[['supertypes', 'types', 'subtypes']] = df['type_line'].str.extract(
    r'^(?:(?P<supertypes>[\w ]+?) )?(?P<types>[^—]+?)(?: — (?P<subtypes>.*))?$'
)
for col in ['supertypes', 'types', 'subtypes']:
    df[col] = df[col].str.strip().replace(r'^\s*$', pd.NA, regex=True)

dim_type_df = df[['supertypes', 'types', 'subtypes']].drop_duplicates().reset_index(drop=True)
dim_type_df.insert(0, 'type_id', range(1, len(dim_type_df) + 1))
df = df.merge(dim_type_df, on=['supertypes', 'types', 'subtypes'], how='left')

In [16]:
dim_type_df

Unnamed: 0,type_id,supertypes,types,subtypes
0,1,Basic,Land,Forest
1,2,,Creature,Sliver
2,3,,Creature,Kor Soldier
3,4,Token,Creature,Spirit
4,5,,Creature,Siren Pirate
...,...,...,...,...
4360,4361,,Creature,Human Scout Werewolf // Creature — Werewolf
4361,4362,Token,Creature,Spirit Warrior
4362,4363,,Creature,Capybara
4363,4364,,Creature,Ninja


# Step 4: Fact Tables

## Fact Card

In [17]:
# Fact Card Table
fact_card_df = df[['id', 'released_at', 'name', 'mana_cost', 'cmc', 'power',
                   'toughness', 'is_multicolor', 'is_colorless', 'rarity_id',
                   'set_id', 'type_id']]

In [18]:
fact_card_df

Unnamed: 0,id,released_at,name,mana_cost,cmc,power,toughness,is_multicolor,is_colorless,rarity_id,set_id,type_id
0,0000419b-0bba-4488-8f7a-6194544ce91e,2024-08-02,Forest,,0.0,,,False,False,1,1,1
1,0000579f-7b35-4ed3-b44c-db2a538066fe,2006-10-06,Fury Sliver,{5}{R},6.0,3,3,False,False,2,2,2
2,00006596-1166-4a79-8443-ca9f82e6db4e,2009-10-02,Kor Outfitter,{W}{W},2.0,2,2,False,False,1,3,3
3,0000a54c-a511-4925-92dc-01b937f9afad,2015-05-22,Spirit,,0.0,1,1,False,False,1,4,4
4,0000cd57-91fe-411f-b798-646e965eec37,2017-09-29,Siren Lookout,{2}{U},3.0,1,2,False,False,1,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...
107792,fffcbc4e-c6dc-4808-b262-f7c453e74dd8,2009-09-30,Celestine Reef,,0.0,,,False,True,3,312,477
107793,fffce2f7-b619-4483-a75e-916343194641,2003-07-28,Horned Troll,{2}{G},3.0,2,2,False,False,1,139,476
107794,fffdf7f3-a230-417a-883a-069aabcbcca7,2023-09-08,Faerie Bladecrafter,{2}{B},3.0,2,2,False,False,3,36,594
107795,fffe7b2b-22c3-4e6a-9b1b-c6d7b29b9f86,2018-01-19,Exultant Skymarcher,{1}{W}{W},3.0,2,3,False,False,1,218,200


## Fact Price (Daily Run and appends previous day)

In [19]:
price_fact_df_new = df[['id', 'prices.usd', 'prices.usd_foil', 'prices.usd_etched',
                        'prices.eur', 'prices.eur_foil', 'prices.tix']].copy()
price_fact_df_new['date_loaded'] = pd.to_datetime('today').normalize()
master_path = "price_fact_df_master.csv"

if os.path.exists(master_path):
    price_fact_df_master = pd.read_csv(master_path)
    starting_id = price_fact_df_master['price_fact_id'].max() + 1
    price_fact_df_new.insert(0, 'price_fact_id', range(starting_id, starting_id + len(price_fact_df_new)))
    updated_df = pd.concat([price_fact_df_master, price_fact_df_new], ignore_index=True)
    updated_df.to_csv(master_path, index=False)
else:
    price_fact_df_new.insert(0, 'price_fact_id', range(1, len(price_fact_df_new) + 1))
    price_fact_df_new.to_csv(master_path, index=False)

## Fact Price Snapshot (Backup)

In [20]:
# Create snapshot backup
today_str = datetime.today().strftime('%Y-%m-%d')
price_fact_df_new.to_csv(f"price_fact_snapshot_{today_str}.csv", index=False)

## Sample View

In [21]:
price_fact_df_new

Unnamed: 0,price_fact_id,id,prices.usd,prices.usd_foil,prices.usd_etched,prices.eur,prices.eur_foil,prices.tix,date_loaded
0,323391,0000419b-0bba-4488-8f7a-6194544ce91e,0.25,0.50,,0.31,0.38,0.03,2025-07-06
1,323392,0000579f-7b35-4ed3-b44c-db2a538066fe,0.32,3.79,,0.21,1.25,0.03,2025-07-06
2,323393,00006596-1166-4a79-8443-ca9f82e6db4e,0.14,1.70,,0.31,1.88,0.03,2025-07-06
3,323394,0000a54c-a511-4925-92dc-01b937f9afad,0.10,,,,,,2025-07-06
4,323395,0000cd57-91fe-411f-b798-646e965eec37,0.03,0.29,,0.02,0.17,0.03,2025-07-06
...,...,...,...,...,...,...,...,...,...
107792,431183,fffcbc4e-c6dc-4808-b262-f7c453e74dd8,2.13,,,0.43,,,2025-07-06
107793,431184,fffce2f7-b619-4483-a75e-916343194641,0.09,,,0.16,,0.03,2025-07-06
107794,431185,fffdf7f3-a230-417a-883a-069aabcbcca7,1.44,,,0.78,,2.10,2025-07-06
107795,431186,fffe7b2b-22c3-4e6a-9b1b-c6d7b29b9f86,0.06,0.68,,0.06,0.21,0.03,2025-07-06
