Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scryfall data filtering improvements #17

Merged
merged 4 commits into from
Jan 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,9 @@ Changelog
Development
-----------

- ...
- Fix filtering of digital only cards
- Switch from inclusion to exclusion for set types
- Add exclusion of card layouts (to catch tokens in non-token sets)

2.3.0
-----
Expand Down
2 changes: 2 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

- Group sets by parent set is xlsx output
- Block? probably not
- At the very least, merge promos into somewhere?
- Filter non-English card sets
- Option to filter out non-English sets / cards
- diffs to stdout
- Use setuptools-scm so I don't have to think about releases as much
58 changes: 33 additions & 25 deletions mtg_ssm/containers/bundles.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Data bundle definitions."""

from typing import List, NamedTuple, Set
from typing import List, NamedTuple, Optional, Set

from mtg_ssm.scryfall.models import ScryCard, ScrySet, ScrySetType
from mtg_ssm.scryfall.models import ScryCard, ScryCardLayout, ScrySet, ScrySetType


class ScryfallDataSet(NamedTuple):
Expand All @@ -12,29 +12,37 @@ class ScryfallDataSet(NamedTuple):
cards: List[ScryCard]


def remove_digital(scryfall_data: ScryfallDataSet) -> ScryfallDataSet:
"""Filter a ScryfallDataSet to remove all digital only sets and cards."""
rejected_setcodes = set()
accepted_sets = []
for set_ in scryfall_data.sets:
if set_.digital:
rejected_setcodes.add(set_.code)
else:
accepted_sets.append(set_)
accepted_cards = [c for c in scryfall_data.cards if c.set not in rejected_setcodes]
return ScryfallDataSet(sets=accepted_sets, cards=accepted_cards)


def filter_set_types(
scryfall_data: ScryfallDataSet, set_types: Set[ScrySetType]
def filter_cards_and_sets(
scryfall_data: ScryfallDataSet,
*,
exclude_set_types: Optional[Set[ScrySetType]] = None,
exclude_card_layouts: Optional[Set[ScryCardLayout]] = None,
exclude_digital: bool = False,
) -> ScryfallDataSet:
"""Filter a ScryfallDataSet to include only specified set types."""
rejected_setcodes = set()
accepted_sets = []
"""Filter a ScryfallDataSet to exclude desired set types, card layouts, and digital only products."""
accepted_setcodes = set()
for set_ in scryfall_data.sets:
if set_.set_type in set_types:
accepted_sets.append(set_)
else:
rejected_setcodes.add(set_.code)
accepted_cards = [c for c in scryfall_data.cards if c.set not in rejected_setcodes]
if exclude_set_types and set_.set_type in exclude_set_types:
continue
if exclude_digital and set_.digital:
continue
accepted_setcodes.add(set_.code)

accepted_cards = []
nonempty_setcodes = set()
for card in scryfall_data.cards:
if card.set not in accepted_setcodes:
continue
if exclude_card_layouts and card.layout in exclude_card_layouts:
continue
if exclude_digital and card.digital:
continue
accepted_cards.append(card)
nonempty_setcodes.add(card.set)

accepted_sets = [
s
for s in scryfall_data.sets
if s.code in accepted_setcodes and s.code in nonempty_setcodes
]
return ScryfallDataSet(sets=accepted_sets, cards=accepted_cards)
12 changes: 12 additions & 0 deletions mtg_ssm/containers/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
from mtg_ssm.containers.indexes import Oracle


class Error(Exception):
"""Base exception for this module."""


class CardNotFoundError(Error):
"""Raised when we attempt to add a card to a count but cannot find it in the oracle."""


class CountType(str, enum.Enum):
"""Enum for possible card printing types (nonfoil, foil)."""

Expand Down Expand Up @@ -39,6 +47,10 @@ def aggregate_card_counts(
if value:
counts[count_type] = value + counts.get(count_type, 0)
if counts:
if scryfall_id not in oracle.index.id_to_card:
raise CardNotFoundError(
f"Found counts for card={scryfall_id} not found scryfall data"
)
card_counts[scryfall_id] = counts
return card_counts

Expand Down
63 changes: 52 additions & 11 deletions mtg_ssm/ssm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from mtg_ssm.containers.collection import MagicCollection
from mtg_ssm.containers.indexes import Oracle
from mtg_ssm.scryfall import fetcher
from mtg_ssm.scryfall.models import ScrySetType
from mtg_ssm.scryfall.models import ScryCardLayout, ScrySetType


def epilog() -> str:
Expand All @@ -32,13 +32,28 @@ def set_type_list(value: str) -> Set[ScrySetType]:
set_types.add(ScrySetType(set_str))
except ValueError as err:
msg = (
f"{set_str} in {value} is not a valid set_type, please use a commas separated list of values from: "
f"{set_str} in {value} is not a valid set_type, please use a comma separated list of values from: "
+ ", ".join(ScrySetType)
)
raise argparse.ArgumentTypeError(msg) from err
return set_types


def card_layout_list(value: str) -> Set[ScryCardLayout]:
"""Argparse type to convert a string to a set of Scryfall Card Layouts."""
card_layouts = set()
for layout_str in value.split(","):
try:
card_layouts.add(ScryCardLayout(layout_str))
except ValueError as err:
msg = (
f"{layout_str} in {value} is not a valid layout_type, please use a comma separated list of values from: "
+ ", ".join(ScryCardLayout)
)
raise argparse.ArgumentTypeError(msg) from err
return card_layouts


def get_args(args: Optional[List[str]] = None) -> argparse.Namespace:
"""Parse and return application arguments."""
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -66,15 +81,29 @@ def get_args(args: Optional[List[str]] = None) -> argparse.Namespace:
"May be repeated for multiple different extensions.",
)

default_set_types = set(ScrySetType) - {ScrySetType.MEMORABILIA, ScrySetType.TOKEN}
default_exclude_set_types = {ScrySetType.MEMORABILIA, ScrySetType.TOKEN}
parser.add_argument(
"--set-types",
default=",".join(default_set_types),
"--exclude-set-types",
default=",".join(default_exclude_set_types),
type=set_type_list,
help="List of set types to include as a comma separted list of values from: "
help="List of set types to exclude from data as a comma separated list of values from: "
+ ", ".join(ScrySetType),
)

default_exclude_card_layouts = {
ScryCardLayout.ART_SERIES,
ScryCardLayout.DOUBLE_FACED_TOKEN,
ScryCardLayout.EMBLEM,
ScryCardLayout.TOKEN,
}
parser.add_argument(
"--exclude-card-layouts",
default=",".join(default_exclude_card_layouts),
type=card_layout_list,
help="List of card layouts to exclude from data as a comma separated list of values from: "
+ ", ".join(ScryCardLayout),
)

# Commands
subparsers = parser.add_subparsers(dest="action", title="actions")
subparsers.required = True
Expand Down Expand Up @@ -136,12 +165,20 @@ def get_args(args: Optional[List[str]] = None) -> argparse.Namespace:
return parsed_args


def get_oracle(set_types: Set[ScrySetType], include_digital: bool) -> Oracle:
def get_oracle(
*,
exclude_set_types: Set[ScrySetType],
exclude_card_layouts: Set[ScryCardLayout],
include_digital: bool,
) -> Oracle:
"""Get a card_db with current mtgjson data."""
scrydata = fetcher.scryfetch()
scrydata = bundles.filter_set_types(scrydata, set_types)
if not include_digital:
scrydata = bundles.remove_digital(scrydata)
scrydata = bundles.filter_cards_and_sets(
scrydata,
exclude_set_types=exclude_set_types,
exclude_card_layouts=exclude_card_layouts,
exclude_digital=not include_digital,
)
return Oracle(scrydata)


Expand Down Expand Up @@ -228,7 +265,11 @@ def diff_cmd(args: argparse.Namespace, oracle: Oracle) -> None:
def main() -> None:
"""Get args and run the appropriate command."""
args = get_args()
oracle = get_oracle(args.set_types, args.include_digital)
oracle = get_oracle(
exclude_set_types=args.exclude_set_types,
exclude_card_layouts=args.exclude_card_layouts,
include_digital=args.include_digital,
)
args.func(args, oracle)


Expand Down
48 changes: 39 additions & 9 deletions tests/containers/test_bundles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,45 @@
from uuid import UUID

from mtg_ssm.containers import bundles
from mtg_ssm.scryfall.models import ScryCardLayout


def test_remove_digital(scryfall_data: bundles.ScryfallDataSet) -> None:
assert "vma" in {s.code for s in scryfall_data.sets}
assert UUID("116ec16c-3b4b-45be-83c8-333bccc29e35") in {
c.id for c in scryfall_data.cards
}
digital_removed = bundles.remove_digital(scryfall_data)
assert "vma" not in {s.code for s in digital_removed.sets}
assert UUID("116ec16c-3b4b-45be-83c8-333bccc29e35") not in {
c.id for c in digital_removed.cards
}
set_codes = {s.code for s in scryfall_data.sets}
card_ids = {c.id for c in scryfall_data.cards}
card_names = {c.name for c in scryfall_data.cards}

assert "vma" in set_codes
assert UUID("116ec16c-3b4b-45be-83c8-333bccc29e35") in card_ids

assert "khm" in set_codes
assert "Cosmos Elixir" in card_names
assert "A-Cosmos Elixir" in card_names

digital_removed = bundles.filter_cards_and_sets(scryfall_data, exclude_digital=True)
set_codes2 = {s.code for s in digital_removed.sets}
card_ids2 = {c.id for c in digital_removed.cards}
card_names2 = {c.name for c in digital_removed.cards}

assert "vma" not in set_codes2
assert UUID("116ec16c-3b4b-45be-83c8-333bccc29e35") not in card_ids2

assert "khm" in set_codes2
assert "Cosmos Elixir" in card_names2
assert "A-Cosmos Elixir" not in card_names2


def test_exclude_token_layout(scryfall_data: bundles.ScryfallDataSet) -> None:
set_codes = {s.code for s in scryfall_data.sets}
card_names = {c.name for c in scryfall_data.cards}

assert "p03" in set_codes and "sld" in set_codes
assert "Goblin" in card_names

tokens_removed = bundles.filter_cards_and_sets(
scryfall_data, exclude_card_layouts={ScryCardLayout.TOKEN}
)
set_codes2 = {s.code for s in tokens_removed.sets}
card_names2 = {c.name for c in tokens_removed.cards}
assert "p03" not in set_codes2 and "sld" in set_codes2
assert "Goblin" not in card_names2
43 changes: 27 additions & 16 deletions tests/containers/test_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from mtg_ssm.containers import counts
from mtg_ssm.containers.bundles import ScryfallDataSet
from mtg_ssm.containers.counts import CountType, ScryfallCardCount
from mtg_ssm.containers.counts import CardNotFoundError, CountType, ScryfallCardCount
from mtg_ssm.containers.indexes import Oracle
from mtg_ssm.containers.legacy import NoMatchError

Expand Down Expand Up @@ -111,37 +111,48 @@ def test_diff_card_counts(
id="no id",
),
pytest.param(
[{"scryfall_id": UUID("00000000-0000-0000-0000-000000000001"), "foil": 1}],
{UUID("00000000-0000-0000-0000-000000000001"): {counts.CountType.FOIL: 1}},
[{"scryfall_id": UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"), "foil": 1}],
{UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"): {counts.CountType.FOIL: 1}},
id="id and int",
),
pytest.param(
[{"scryfall_id": "00000000-0000-0000-0000-000000000001", "foil": "1"}],
{UUID("00000000-0000-0000-0000-000000000001"): {counts.CountType.FOIL: 1}},
[{"scryfall_id": "9d26f171-5bb6-463c-8473-53b6cc27ed66", "foil": "1"}],
{UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"): {counts.CountType.FOIL: 1}},
id="text and text",
),
pytest.param(
[{"scryfall_id": "00000000-0000-0000-0000-000000000001", "nonfoil": "1"}],
{},
marks=pytest.mark.xfail(raises=CardNotFoundError),
id="count with bad id",
),
pytest.param(
[{"scryfall_id": "00000000-0000-0000-0000-000000000001", "nonfoil": ""}],
{},
id="no count with bad id",
),
pytest.param(
[
{
"scryfall_id": "00000000-0000-0000-0000-000000000001",
"scryfall_id": "9d26f171-5bb6-463c-8473-53b6cc27ed66",
"foil": "1",
"nonfoil": "",
}
],
{UUID("00000000-0000-0000-0000-000000000001"): {counts.CountType.FOIL: 1}},
{UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"): {counts.CountType.FOIL: 1}},
id="empty string",
),
pytest.param(
[
{"scryfall_id": "00000000-0000-0000-0000-000000000001", "foil": "1"},
{"scryfall_id": "00000000-0000-0000-0000-000000000002", "foil": "0"},
{"scryfall_id": "00000000-0000-0000-0000-000000000003", "nonfoil": "1"},
{"scryfall_id": "9d26f171-5bb6-463c-8473-53b6cc27ed66", "foil": "1"},
{"scryfall_id": "758abd53-6ad2-406e-8615-8e48678405b4", "foil": "0"},
{"scryfall_id": "0180d9a8-992c-4d55-8ac4-33a587786993", "nonfoil": "1"},
],
{
UUID("00000000-0000-0000-0000-000000000001"): {
UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"): {
counts.CountType.FOIL: 1
},
UUID("00000000-0000-0000-0000-000000000003"): {
UUID("0180d9a8-992c-4d55-8ac4-33a587786993"): {
counts.CountType.NONFOIL: 1
},
},
Expand All @@ -150,20 +161,20 @@ def test_diff_card_counts(
pytest.param(
[
{
"scryfall_id": UUID("00000000-0000-0000-0000-000000000001"),
"scryfall_id": UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"),
"foil": 1,
},
{
"scryfall_id": UUID("00000000-0000-0000-0000-000000000001"),
"scryfall_id": UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"),
"nonfoil": 1,
},
{
"scryfall_id": UUID("00000000-0000-0000-0000-000000000001"),
"scryfall_id": UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"),
"foil": 1,
},
],
{
UUID("00000000-0000-0000-0000-000000000001"): {
UUID("9d26f171-5bb6-463c-8473-53b6cc27ed66"): {
counts.CountType.FOIL: 2,
counts.CountType.NONFOIL: 1,
}
Expand Down
Loading