# Scryfall ETL and Star Schema Construction
### API Doc: https://scryfall.com/docs/api

In [1]:
import requests
import pandas as pd
from datetime import datetime
import os

# Step 1: Retrieve Bulk Data (Extract)

In [2]:
bulk_response = requests.get("https://api.scryfall.com/bulk-data")
bulk_response.raise_for_status()
bulk_data = bulk_response.json()
default_cards_info = next(item for item in bulk_data["data"] if item["type"] == "default_cards")
cards_response = requests.get(default_cards_info["download_uri"])
cards_response.raise_for_status()
cards_json = cards_response.json()
df = pd.json_normalize(cards_json)

# Step 2: Derived Attributes

In [3]:
df["is_multicolor"] = df["color_identity"].apply(lambda x: isinstance(x, list) and len(x) > 1)
df["is_colorless"] = df["color_identity"].apply(lambda x: isinstance(x, list) and len(x) == 0)

# Utility Functions

In [4]:
def explode_list_column(df, id_col, list_col):
    df_copy = df[[id_col, list_col]].copy()
    df_copy = df_copy.explode(list_col).dropna().reset_index(drop=True)
    df_copy.columns = [id_col, list_col.rstrip('s')]
    return df_copy

# Step 3: Dim Tables (Transform)

## DIM Colors (One-To-Many)

In [5]:
dim_colors_df = explode_list_column(df, id_col="id", list_col="color_identity")

In [6]:
dim_colors_df.head()

Unnamed: 0,id,color_identity
0,0000419b-0bba-4488-8f7a-6194544ce91e,G
1,0000579f-7b35-4ed3-b44c-db2a538066fe,R
2,00006596-1166-4a79-8443-ca9f82e6db4e,W
3,0000a54c-a511-4925-92dc-01b937f9afad,W
4,0000cd57-91fe-411f-b798-646e965eec37,U


## DIM Keywords (One-To-Many)

In [7]:
dim_keywords_df = explode_list_column(df, id_col="id", list_col="keywords")

In [8]:
dim_keywords_df.head()

Unnamed: 0,id,keyword
0,0000a54c-a511-4925-92dc-01b937f9afad,Flying
1,0000cd57-91fe-411f-b798-646e965eec37,Flying
2,0000cd57-91fe-411f-b798-646e965eec37,Explore
3,00012bd8-ed68-4978-a22d-f450c8a6e048,Enchant
4,0001c639-8bd0-426f-89cb-4ca61f3cc054,Paradox


## DIM Rarity (One-to-One)

In [9]:
ordered_rarities = ['common', 'uncommon', 'rare', 'mythic', 'bonus', 'special']
rarity_mapping = {rarity: idx + 1 for idx, rarity in enumerate(ordered_rarities)}
df['rarity_id'] = df['rarity'].map(rarity_mapping)
dim_rarity_df = pd.DataFrame({'rarity': ordered_rarities})
dim_rarity_df['rarity_id'] = dim_rarity_df.index + 1
dim_rarity_df = dim_rarity_df[['rarity_id', 'rarity']]

In [10]:
dim_rarity_df.head()

Unnamed: 0,rarity_id,rarity
0,1,common
1,2,uncommon
2,3,rare
3,4,mythic
4,5,bonus


## DIM Set (One-to-Many) 

In [11]:
dim_set_df = df[['set', 'set_name', 'set_type']].drop_duplicates().reset_index(drop=True)
dim_set_df['set_id'] = dim_set_df.index + 1
set_mapping = dict(zip(dim_set_df['set'], dim_set_df['set_id']))
df['set_id'] = df['set'].map(set_mapping)
dim_set_df = dim_set_df[['set_id', 'set', 'set_name', 'set_type']]

In [12]:
dim_set_df.head()

Unnamed: 0,set_id,set,set_name,set_type
0,1,blb,Bloomburrow,expansion
1,2,tsp,Time Spiral,expansion
2,3,zen,Zendikar,expansion
3,4,tmm2,Modern Masters 2015 Tokens,token
4,5,xln,Ixalan,expansion


## DIM Legalities (One-to-One)

In [13]:
legal_cols = [col for col in df.columns if col.startswith("legalities.")]
dim_legalities_df = df[['id'] + legal_cols].copy()
dim_legalities_df.columns = dim_legalities_df.columns.str.replace('legalities.', '', regex=False)

In [14]:
dim_legalities_df.head()

Unnamed: 0,id,standard,future,historic,timeless,gladiator,pioneer,modern,legacy,pauper,...,commander,oathbreaker,standardbrawl,brawl,alchemy,paupercommander,duel,oldschool,premodern,predh
0,0000419b-0bba-4488-8f7a-6194544ce91e,legal,legal,legal,legal,legal,legal,legal,legal,legal,...,legal,legal,legal,legal,legal,legal,legal,not_legal,legal,legal
1,0000579f-7b35-4ed3-b44c-db2a538066fe,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,legal,legal,not_legal,...,legal,legal,not_legal,not_legal,not_legal,not_legal,legal,not_legal,not_legal,legal
2,00006596-1166-4a79-8443-ca9f82e6db4e,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,legal,legal,legal,...,legal,legal,not_legal,not_legal,not_legal,legal,legal,not_legal,not_legal,legal
3,0000a54c-a511-4925-92dc-01b937f9afad,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,...,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal,not_legal
4,0000cd57-91fe-411f-b798-646e965eec37,not_legal,not_legal,legal,legal,legal,legal,legal,legal,legal,...,legal,legal,not_legal,legal,not_legal,legal,legal,not_legal,not_legal,not_legal


## DIM Image URL (0/One-to-One)

In [15]:
image_cols = [col for col in df.columns if col.startswith("image_uris.")]
dim_image_df = df[['id'] + image_cols].copy()
dim_image_df.columns = dim_image_df.columns.str.replace('image_uris.', '', regex=False)
image_only_cols = dim_image_df.columns.difference(['id'])
dim_image_df = dim_image_df[~dim_image_df[image_only_cols].isnull().all(axis=1)]

In [16]:
dim_image_df.head()

Unnamed: 0,id,small,normal,large,png,art_crop,border_crop
0,0000419b-0bba-4488-8f7a-6194544ce91e,https://cards.scryfall.io/small/front/0/0/0000...,https://cards.scryfall.io/normal/front/0/0/000...,https://cards.scryfall.io/large/front/0/0/0000...,https://cards.scryfall.io/png/front/0/0/000041...,https://cards.scryfall.io/art_crop/front/0/0/0...,https://cards.scryfall.io/border_crop/front/0/...
1,0000579f-7b35-4ed3-b44c-db2a538066fe,https://cards.scryfall.io/small/front/0/0/0000...,https://cards.scryfall.io/normal/front/0/0/000...,https://cards.scryfall.io/large/front/0/0/0000...,https://cards.scryfall.io/png/front/0/0/000057...,https://cards.scryfall.io/art_crop/front/0/0/0...,https://cards.scryfall.io/border_crop/front/0/...
2,00006596-1166-4a79-8443-ca9f82e6db4e,https://cards.scryfall.io/small/front/0/0/0000...,https://cards.scryfall.io/normal/front/0/0/000...,https://cards.scryfall.io/large/front/0/0/0000...,https://cards.scryfall.io/png/front/0/0/000065...,https://cards.scryfall.io/art_crop/front/0/0/0...,https://cards.scryfall.io/border_crop/front/0/...
3,0000a54c-a511-4925-92dc-01b937f9afad,https://cards.scryfall.io/small/front/0/0/0000...,https://cards.scryfall.io/normal/front/0/0/000...,https://cards.scryfall.io/large/front/0/0/0000...,https://cards.scryfall.io/png/front/0/0/0000a5...,https://cards.scryfall.io/art_crop/front/0/0/0...,https://cards.scryfall.io/border_crop/front/0/...
4,0000cd57-91fe-411f-b798-646e965eec37,https://cards.scryfall.io/small/front/0/0/0000...,https://cards.scryfall.io/normal/front/0/0/000...,https://cards.scryfall.io/large/front/0/0/0000...,https://cards.scryfall.io/png/front/0/0/0000cd...,https://cards.scryfall.io/art_crop/front/0/0/0...,https://cards.scryfall.io/border_crop/front/0/...


## DIM Purchase (0/One-to-One)

In [17]:
purch_cols = [col for col in df.columns if col.startswith("purchase_uris.")]
dim_purchase_df = df[['id'] + purch_cols].copy()
dim_purchase_df.columns = dim_purchase_df.columns.str.replace('purchase_uris.', '', regex=False)
purch_only_cols = dim_purchase_df.columns.difference(['id'])
dim_purchase_df = dim_purchase_df[~dim_purchase_df[purch_only_cols].isnull().all(axis=1)]

In [18]:
dim_purchase_df.head()

Unnamed: 0,id,tcgplayer,cardmarket,cardhoarder
0,0000419b-0bba-4488-8f7a-6194544ce91e,https://partner.tcgplayer.com/c/4931599/183015...,https://www.cardmarket.com/en/Magic/Products?i...,https://www.cardhoarder.com/cards/129825?affil...
1,0000579f-7b35-4ed3-b44c-db2a538066fe,https://partner.tcgplayer.com/c/4931599/183015...,https://www.cardmarket.com/en/Magic/Products?i...,https://www.cardhoarder.com/cards/25527?affili...
2,00006596-1166-4a79-8443-ca9f82e6db4e,https://partner.tcgplayer.com/c/4931599/183015...,https://www.cardmarket.com/en/Magic/Products?i...,https://www.cardhoarder.com/cards/34586?affili...
3,0000a54c-a511-4925-92dc-01b937f9afad,https://partner.tcgplayer.com/c/4931599/183015...,https://www.cardmarket.com/en/Magic/Products/S...,https://www.cardhoarder.com/cards?affiliate_id...
4,0000cd57-91fe-411f-b798-646e965eec37,https://partner.tcgplayer.com/c/4931599/183015...,https://www.cardmarket.com/en/Magic/Products?i...,https://www.cardhoarder.com/cards/65170?affili...


## Dim Type (One-To-One)

In [19]:
# Dim Type Table
df[['supertypes', 'types', 'subtypes']] = df['type_line'].str.extract(
    r'^(?:(?P<supertypes>[\w ]+?) )?(?P<types>[^—]+?)(?: — (?P<subtypes>.*))?$'
)
for col in ['supertypes', 'types', 'subtypes']:
    df[col] = df[col].str.strip().replace(r'^\s*$', pd.NA, regex=True)

dim_type_df = df[['supertypes', 'types', 'subtypes']].drop_duplicates().reset_index(drop=True)
dim_type_df.insert(0, 'type_id', range(1, len(dim_type_df) + 1))
df = df.merge(dim_type_df, on=['supertypes', 'types', 'subtypes'], how='left')

In [20]:
dim_type_df.head()

Unnamed: 0,type_id,supertypes,types,subtypes
0,1,Basic,Land,Forest
1,2,,Creature,Sliver
2,3,,Creature,Kor Soldier
3,4,Token,Creature,Spirit
4,5,,Creature,Siren Pirate


# Step 4: Fact Tables (Transform Cont..)

## Fact Card

In [21]:
# Fact Card Table
fact_card_df = df[['id', 'released_at', 'name', 'mana_cost', 'cmc', 'power',
                   'toughness', 'is_multicolor', 'is_colorless', 'rarity_id',
                   'set_id', 'type_id']].copy()
fact_card_df['released_at'] = pd.to_datetime(fact_card_df['released_at'])

In [22]:
fact_card_df.head()

Unnamed: 0,id,released_at,name,mana_cost,cmc,power,toughness,is_multicolor,is_colorless,rarity_id,set_id,type_id
0,0000419b-0bba-4488-8f7a-6194544ce91e,2024-08-02,Forest,,0.0,,,False,False,1,1,1
1,0000579f-7b35-4ed3-b44c-db2a538066fe,2006-10-06,Fury Sliver,{5}{R},6.0,3.0,3.0,False,False,2,2,2
2,00006596-1166-4a79-8443-ca9f82e6db4e,2009-10-02,Kor Outfitter,{W}{W},2.0,2.0,2.0,False,False,1,3,3
3,0000a54c-a511-4925-92dc-01b937f9afad,2015-05-22,Spirit,,0.0,1.0,1.0,False,False,1,4,4
4,0000cd57-91fe-411f-b798-646e965eec37,2017-09-29,Siren Lookout,{2}{U},3.0,1.0,2.0,False,False,1,5,5


## Fact Price (Daily Run and appends previous day)

In [23]:
price_fact_df_new = df[['id', 'prices.usd', 'prices.usd_foil', 'prices.usd_etched',
                        'prices.eur', 'prices.eur_foil', 'prices.tix']].copy()
price_fact_df_new['date_loaded'] = pd.to_datetime('today').normalize()
master_path = "price_fact_df_master.csv"

if os.path.exists(master_path):
    price_fact_df_master = pd.read_csv(master_path)
    price_fact_df_new['date_loaded'] = price_fact_df_new['date_loaded'].dt.date
    starting_id = price_fact_df_master['price_fact_id'].max() + 1 
    price_fact_df_new.insert(0, 'price_fact_id', range(starting_id, starting_id + len(price_fact_df_new)))
    updated_df = pd.concat([price_fact_df_master, price_fact_df_new], ignore_index=True)
    updated_df.to_csv(master_path, index=False)
else:
    price_fact_df_new.insert(0, 'price_fact_id', range(1, len(price_fact_df_new) + 1))
    price_fact_df_new.to_csv(master_path, index=False)

## Read CSV again and adjust date_loaded to datetime

In [24]:
price_fact_df_master = pd.read_csv(master_path)
fact_price_df=price_fact_df_master
fact_price_df['date_loaded'] = pd.to_datetime(fact_price_df['date_loaded'], format="%Y-%m-%d", errors='coerce')

## Fact Price Snapshot (Backup)

In [25]:
# Create snapshot backup
today_str = datetime.today().strftime('%Y-%m-%d')
price_fact_df_new.to_csv(f"price_fact_snapshot_{today_str}.csv", index=False)

# Step 5: Load

## Big Query Connect (After setting up environment in Google Cloud Console)

### Initialize

In [26]:
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Users\nicoy\Desktop\GitHub\MTG-Scryfall-Datawarehouse\mtg-sa-key.json"

client = bigquery.Client()
project_id = client.project
dataset_id = "mtg_dataset"

# Create dataset with full project.dataset ID
dataset = bigquery.Dataset(f"{project_id}.{dataset_id}")
dataset.location = "US"

# Create dataset only if it doesn't already exist
client.create_dataset(dataset, exists_ok=True)

Dataset(DatasetReference('mtg-data-pipeline', 'mtg_dataset'))

### Load fact_card

In [27]:
# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.fact_card"

# Define your table schema explicitly
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("released_at", "DATE"),
    bigquery.SchemaField("name", "STRING"),
    bigquery.SchemaField("mana_cost", "STRING"),
    bigquery.SchemaField("cmc", "FLOAT"),
    bigquery.SchemaField("power", "STRING"),
    bigquery.SchemaField("toughness", "STRING"),
    bigquery.SchemaField("is_multicolor", "BOOLEAN"),
    bigquery.SchemaField("is_colorless", "BOOLEAN"),
    bigquery.SchemaField("rarity_id", "INTEGER"),
    bigquery.SchemaField("set_id", "INTEGER"),
    bigquery.SchemaField("type_id", "INTEGER"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(fact_card_df, table_id, job_config=job_config)
job.result()  # Wait for completion

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 108794 rows to mtg-data-pipeline.mtg_dataset.fact_card.


### Load fact_price

In [28]:
# Clean column names (optional but recommended)
fact_price_df.columns = fact_price_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.fact_price"

# Define the table schema
schema = [
    bigquery.SchemaField("price_fact_id", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("prices_usd", "FLOAT"),
    bigquery.SchemaField("prices_usd_foil", "FLOAT"),
    bigquery.SchemaField("prices_usd_etched", "FLOAT"),
    bigquery.SchemaField("prices_eur", "FLOAT"),
    bigquery.SchemaField("prices_eur_foil", "FLOAT"),
    bigquery.SchemaField("prices_tix", "FLOAT"),
    bigquery.SchemaField("date_loaded", "DATE"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(fact_price_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 2707305 rows to mtg-data-pipeline.mtg_dataset.fact_price.


### Load dim_colors

In [29]:
# Clean column names (optional but recommended)
dim_colors_df.columns = dim_colors_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.dim_colors"

# Define the table schema
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("color_identity", "STRING"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(dim_colors_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")




Uploaded 117426 rows to mtg-data-pipeline.mtg_dataset.dim_colors.


### Load dim_keywords

In [30]:
# Clean column names (optional but recommended)
dim_keywords_df.columns = dim_keywords_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.dim_keywords"

# Define the table schema
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("keyword", "STRING"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(dim_keywords_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 60925 rows to mtg-data-pipeline.mtg_dataset.dim_keywords.


### Load dim_type

In [31]:
# Clean column names (optional but recommended)
dim_type_df.columns = dim_type_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.dim_type"

# Define the table schema
schema = [
    bigquery.SchemaField("type_id", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("supertypes", "STRING"),
    bigquery.SchemaField("types", "STRING"),
    bigquery.SchemaField("subtypes", "STRING"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(dim_type_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 4440 rows to mtg-data-pipeline.mtg_dataset.dim_type.


### Load dim_set

In [32]:
# Clean column names (optional but recommended)
dim_set_df.columns = dim_set_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.dim_set"

# Define the table schema
schema = [
    bigquery.SchemaField("set_id", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("set", "STRING"),
    bigquery.SchemaField("set_name", "STRING"),
    bigquery.SchemaField("set_type", "STRING"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(dim_set_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 981 rows to mtg-data-pipeline.mtg_dataset.dim_set.


### Load dim_rarity

In [33]:
# Clean column names (optional but recommended)
dim_rarity_df.columns = dim_rarity_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.dim_rarity"

# Define the table schema
schema = [
    bigquery.SchemaField("rarity_id", "INTEGER", mode="REQUIRED"),
    bigquery.SchemaField("rarity", "STRING"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(dim_rarity_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 6 rows to mtg-data-pipeline.mtg_dataset.dim_rarity.


### Load dim_legalities

In [34]:
# Clean column names (optional but recommended)
dim_legalities_df.columns = dim_legalities_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.dim_legalities"

# Define the table schema
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("standard", "STRING"),
    bigquery.SchemaField("future", "STRING"),
    bigquery.SchemaField("historic", "STRING"),
    bigquery.SchemaField("timeless", "STRING"),
    bigquery.SchemaField("gladiator", "STRING"),
    bigquery.SchemaField("pioneer", "STRING"),
    bigquery.SchemaField("modern", "STRING"),
    bigquery.SchemaField("legacy", "STRING"),
    bigquery.SchemaField("pauper", "STRING"),
    bigquery.SchemaField("vintage", "STRING"),
    bigquery.SchemaField("penny", "STRING"),
    bigquery.SchemaField("commander", "STRING"),
    bigquery.SchemaField("oathbreaker", "STRING"),
    bigquery.SchemaField("standardbrawl", "STRING"),
    bigquery.SchemaField("brawl", "STRING"),
    bigquery.SchemaField("alchemy", "STRING"),
    bigquery.SchemaField("paupercommander", "STRING"),
    bigquery.SchemaField("duel", "STRING"),
    bigquery.SchemaField("oldschool", "STRING"),
    bigquery.SchemaField("premodern", "STRING"),
    bigquery.SchemaField("predh", "STRING"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(dim_legalities_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 108794 rows to mtg-data-pipeline.mtg_dataset.dim_legalities.


### Load dim_Image

In [35]:
# Clean column names (optional but recommended)
dim_image_df.columns = dim_image_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.dim_image"

# Define the table schema
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("small", "STRING"),
    bigquery.SchemaField("normal", "STRING"),
    bigquery.SchemaField("large", "STRING"),
    bigquery.SchemaField("png", "STRING"),
    bigquery.SchemaField("art_crop", "STRING"),
    bigquery.SchemaField("border_crop", "STRING"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(dim_image_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 105021 rows to mtg-data-pipeline.mtg_dataset.dim_image.


### Load dim_purchase

In [36]:
# Clean column names (optional but recommended)
dim_purchase_df.columns = dim_purchase_df.columns.str.replace('[^a-zA-Z0-9_]', '_', regex=True)

# Define your new table name
table_id = "mtg-data-pipeline.mtg_dataset.dim_purchase"

# Define the table schema
schema = [
    bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("tcgplayer", "STRING"),
    bigquery.SchemaField("cardmarket", "STRING"),
    bigquery.SchemaField("cardhoarder", "STRING"),
]

# Create job config with overwrite and schema
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=schema
)

# Upload to BigQuery
job = client.load_table_from_dataframe(dim_purchase_df, table_id, job_config=job_config)
job.result()  # Wait until complete

# Confirm success
table = client.get_table(table_id)
print(f"Uploaded {table.num_rows} rows to {table_id}.")



Uploaded 105302 rows to mtg-data-pipeline.mtg_dataset.dim_purchase.
