In [None]:
import json


def read_file(file_path: str):
    with open(file_path) as file:
        return json.loads(file.read())


dst = "latest_cards.json"

In [None]:
from urllib.request import urlretrieve

from requests import get

print("Fetching bulk data available on Scryfall")
result = get("https://api.scryfall.com/bulk-data")
bulk_data = result.json()["data"]

# type: all_cards
print("Getting the URI for the all_cards bulk data")
all_cards = next(filter(lambda x: x["type"] == "all_cards", bulk_data), None)
raw_cards_uri = all_cards["download_uri"]

In [None]:
raw_cards_uri

In [None]:
import logging


class ProgressBar:
    def __init__(self):
        self.progress = 0

    def progress_hook(self, blocknum, blocksize, totalsize):
        progress_update = (
            blocknum * blocksize / totalsize * 100
            if totalsize != -1
            else blocknum * blocksize / 3e9 * 100
        )

        if progress_update - self.progress > 1:
            self.progress = progress_update
            logging.info(f"Downloading {progress_update:.2f}%")

In [None]:
urlretrieve(raw_cards_uri, dst, reporthook=ProgressBar().progress_hook)

In [None]:
print(f"Update {all_cards['updated_at']}")

In [None]:
import os

DATABASE = os.getenv("DATABASE", "mtg")
DATABASE_HOST = "localhost"
DATABASE_USER = os.getenv("DATABASE_USER", "root")
DATABASE_PASSWORD = os.getenv("DATABASE_PASSWORD", "root")
DATABASE_PORT = os.getenv("DATABASE_PORT", "27017")

In [None]:
print(f"mongodb://{DATABASE_USER}:{DATABASE_PASSWORD}@{DATABASE_HOST}:{DATABASE_PORT}")

In [None]:
from pymongo import MongoClient

client = MongoClient(
    f"mongodb://{DATABASE_USER}:{DATABASE_PASSWORD}@{DATABASE_HOST}:{DATABASE_PORT}"
)
db = client[DATABASE]
collection = db["cards"]

In [None]:
from pymongo import ASCENDING, HASHED, TEXT, IndexModel

created = collection.create_indexes(
    [
        # IndexModel([("object", HASHED)]),
        # IndexModel([("card_faces.object", ASCENDING)]),
        IndexModel([("id", HASHED)]),
        IndexModel([("oracle_id", HASHED)]),
        IndexModel([("multiverse_ids", ASCENDING)]),
        # IndexModel([("mtgo_id", HASHED)]),
        IndexModel([("arena_id", HASHED)]),
        # IndexModel([("tcgplayer_id", HASHED)]),
        IndexModel([("name", ASCENDING)]),
        IndexModel([("name_search", ASCENDING)]),
        IndexModel([("lang", HASHED)]),
        IndexModel([("released_at", ASCENDING)]),
        IndexModel([("layout", HASHED)]),
        # IndexModel([("highres_image", HASHED)]),
        # IndexModel([("image_status", HASHED)]),
        IndexModel([("cmc", ASCENDING)]),
        IndexModel([("colors", ASCENDING)]),
        # IndexModel([("card_faces.colors", ASCENDING)]),
        IndexModel([("color_identity", ASCENDING)]),
        # IndexModel([("card_faces.color_indicator", ASCENDING)]),
        IndexModel([("produced_mana", ASCENDING)]),
        IndexModel([("games", ASCENDING)]),
        IndexModel([("keywords", ASCENDING)]),
        # IndexModel([("reserved", HASHED)]),
        # IndexModel([("foil", HASHED)]),
        # IndexModel([("nonfoil", HASHED)]),
        # IndexModel([("finishes", ASCENDING)]),
        IndexModel([("oversized", HASHED)]),
        IndexModel([("promo", HASHED)]),
        IndexModel([("reprint", HASHED)]),
        IndexModel([("variation", HASHED)]),
        # IndexModel([("set_id", HASHED)]),
        IndexModel([("set", ASCENDING)]),
        IndexModel([("set_name", ASCENDING)]),
        # IndexModel([("set_type", ASCENDING)]),
        IndexModel([("collector_number", ASCENDING)]),
        IndexModel([("digital", HASHED)]),
        IndexModel([("rarity", HASHED)]),
        IndexModel([("artist", ASCENDING)]),
        # IndexModel([("artist_ids", ASCENDING)]),
        # IndexModel([("illustration_id", HASHED)]),
        # IndexModel([("card_faces.illustration_id", ASCENDING)]),
        IndexModel([("border_color", HASHED)]),
        IndexModel([("frame", HASHED)]),
        IndexModel([("full_art", HASHED)]),
        IndexModel([("textless", HASHED)]),
        IndexModel([("booster", HASHED)]),
        # IndexModel([("story_spotlight", HASHED)]),
        # IndexModel([("mtgo_foil_id", HASHED)]),
        IndexModel([("watermark", HASHED)]),
        # IndexModel([("cardmarket_id", HASHED)]),
        IndexModel([("power", ASCENDING)]),
        IndexModel([("toughness", ASCENDING)]),
        IndexModel([("edhrec_rank", ASCENDING)]),
        IndexModel([("penny_rank", ASCENDING)]),
        # IndexModel([("security_stamp", ASCENDING)]),
        # IndexModel([("legalities", HASHED)]),
        IndexModel([("legalities.standard", HASHED)]),
        IndexModel([("legalities.future", HASHED)]),
        IndexModel([("legalities.historic", HASHED)]),
        IndexModel([("legalities.timeless", HASHED)]),
        IndexModel([("legalities.gladiator", HASHED)]),
        IndexModel([("legalities.pioneer", HASHED)]),
        IndexModel([("legalities.explorer", HASHED)]),
        IndexModel([("legalities.modern", HASHED)]),
        IndexModel([("legalities.legacy", HASHED)]),
        IndexModel([("legalities.pauper", HASHED)]),
        IndexModel([("legalities.vintage", HASHED)]),
        IndexModel([("legalities.penny", HASHED)]),
        IndexModel([("legalities.commander", HASHED)]),
        IndexModel([("legalities.oathbreaker", HASHED)]),
        IndexModel([("legalities.standardbrawl", HASHED)]),
        IndexModel([("legalities.brawl", HASHED)]),
        IndexModel([("legalities.alchemy", HASHED)]),
        IndexModel([("legalities.paupercommander", HASHED)]),
        IndexModel([("legalities.duel", HASHED)]),
        IndexModel([("legalities.oldschool", HASHED)]),
        IndexModel([("legalities.premodern", HASHED)]),
        IndexModel([("legalities.predh", HASHED)]),
    ]
)

In [None]:
created

In [None]:
collection.create_index(
    [
        ("name", TEXT),
        ("card_faces.name", TEXT),
        ("type_line", TEXT),
        ("card_faces.type_line", TEXT),
        ("artist", TEXT),
        ("card_faces.artist", TEXT),
        ("oracle_text", TEXT),
        ("card_faces.oracle_text", TEXT),
        ("flavor_text", TEXT),
        ("card_faces.flavor_text", TEXT),
        ("printed_name", TEXT),
        ("card_faces.printed_name", TEXT),
        ("printed_type_line", TEXT),
        ("card_faces.printed_type_line", TEXT),
        ("printed_text", TEXT),
        ("card_faces.printed_text", TEXT),
        ("flavor_name", TEXT),
        ("set_name", TEXT),
        ("set", TEXT),
        ("keywords", TEXT),
    ],
    **{
        "name": "text_index",
        "weights": {
            "name": 10,
            "card_faces.name": 5,
            "artist": 10,
            "card_faces.artist": 5,
            "type_line": 10,
            "card_faces.type_line": 5,
            "oracle_text": 2,
            "card_faces.oracle_text": 1,
            "flavor_text": 2,
            "card_faces.flavor_text": 2,
            "printed_name": 5,
            "card_faces.printed_name": 5,
            "printed_type_line": 5,
            "card_faces.printed_type_line": 5,
            "printed_text": 2,
            "card_faces.printed_text": 2,
            "flavor_name": 1,
            "set_name": 10,
            "set": 10,
            "keywords": 5,
        },
    },
)

In [None]:
# collection.drop()
# from pymongo import InsertOne


# requesting = []
# for card in df:
#     requesting.append(InsertOne(card))

# collection.bulk_write(requesting)
# client.close()

In [None]:
def yield_differences(a: dict, b: dict, parent: str = ""):
    all_keys = set(a.keys()).union(set(b.keys()))
    for key in all_keys:
        full_key = f"{parent}.{key}" if parent != "" else key

        a_value = a.get(key, "_NO VALUE_")
        b_value = b.get(key, "_NO VALUE_")

        if a_value == b_value:
            continue

        if isinstance(a_value, dict) and isinstance(b_value, dict):
            yield from yield_differences(a_value, b_value, full_key)
            continue

        if isinstance(a_value, list) and isinstance(b_value, list):
            for i, t_zip in enumerate(zip(a_value, b_value)):
                full_arr_key = f"{full_key}.{i}"
                arr_a, arr_b = t_zip
                if arr_a == arr_b:
                    continue

                if arr_a is None or arr_b is None:
                    yield full_arr_key, arr_a, arr_b
                    continue

                if isinstance(arr_a, dict) and isinstance(arr_b, dict):
                    yield from yield_differences(arr_a, arr_b, full_arr_key)
                    continue

                yield full_arr_key, arr_a, arr_b
            continue

        yield full_key, a_value, b_value

In [None]:
from datetime import datetime

last_update_datetime = datetime.fromisoformat(all_cards["updated_at"])

In [None]:
# db.create_collection(
#    "card_stocks_daily",
#    **{
#       "timeseries": {
#          "timeField": "date",
#          "metaField": "card_id",
#          "granularity": "hours"
#       },
#       "expireAfterSeconds": 60*60*24*30*12 # 1 year
#    }
# )

card_stocks_daily = db["card_stocks_daily"]

In [None]:
# db.create_collection(
#    "edhrec_daily",
#    **{
#       "timeseries": {
#          "timeField": "date",
#          "metaField": "card_id",
#          "granularity": "hours"
#       },
#       "expireAfterSeconds": 60*60*24*30*12 # 1 year
#    }
# )

edhrec_daily = db["edhrec_daily"]

In [None]:
print("Reading the all_cards bulk data")
df = read_file(dst)

In [None]:
from copy import deepcopy

from pymongo import InsertOne, UpdateOne
from tqdm import tqdm
from unidecode import unidecode

batch_updates = []
price_updates = []
edhrec_update = []

to_add = deepcopy(df)
sorted_cards = sorted(to_add, key=lambda x: x["id"])
sorted_existing = collection.find().sort("id", 1)
next_existing = next(sorted_existing, None)

for card in tqdm(sorted_cards):
    card_id = card["id"]
    card["name_search"] = unidecode(card["name"]).lower()

    if card.get("_id"):
        print(f"Card should not have an _id, {card}")
        break

    prices = {
        price_key: (
            card["prices"][price_key]
            if not card["prices"][price_key]
            else float(card["prices"][price_key])
        )
        for price_key in card["prices"]
    }
    price_updates.append(
        InsertOne({"date": last_update_datetime, "card_id": card_id, "prices": prices})
    )
    edhrec_update.append(
        InsertOne(
            {
                "date": last_update_datetime,
                "card_id": card_id,
                "edhrec_rank": card.get("edhrec_rank", None),
            }
        )
    )

    del card["prices"]
    if "edhrec_rank" in card:
        del card["edhrec_rank"]

    if not next_existing or next_existing["id"] != card_id:
        batch_updates.append(InsertOne(card))
        continue

    db_id = next_existing["_id"]
    del next_existing["_id"]

    update = {}
    for key, before, after in yield_differences(next_existing, card):
        update[key] = after

    if update:
        batch_updates.append(UpdateOne({"_id": db_id}, {"$set": update}))

    next_existing = next(sorted_existing, None)

In [None]:
len(batch_updates), len(price_updates), len(edhrec_update)

In [None]:
price_updates[:10]

In [None]:
if batch_updates:
    collection.bulk_write(batch_updates)

In [None]:
if price_updates:
    card_stocks_daily.bulk_write(price_updates)

In [None]:
if edhrec_update:
    edhrec_daily.bulk_write(edhrec_update)