In [None]:
import sys, os

sys.path.insert(0, os.path.abspath("../.."))

from yugiquery import *

init_notebook_mode(all_interactive=True)

header("My Collection")

---

Table of Contents <a class="jp-toc-ignore"></a>
=================

*   [1  Data loading](#Data-loading)
    *   [1.1  Read collection](#Read-collection)
*   [2  Check changes](#Check-changes)
    *   [2.1  Load previous data](#Load-previous-data)
    *   [2.2  Generate changelogs](#Generate-changelogs)
    *   [2.3  Save data](#Save-data)
*   [3  Data visualization](#Data-visualization)
    *   [3.1  Full data](#Full-data)
    *   [3.2  Card types](#Card-types)
    *   [3.3  Monsters](#Monsters)
        *   [3.3.1  Attributes](#Attributes)
        *   [3.3.2  Primary types](#Primary-types)
            *   [3.3.2.1  Has effect discrimination](#Has-effect-discrimination)
            *   [3.3.2.2  Is pendulum discrimination](#Is-pendulum-discrimination)
            *   [3.3.2.3  By attribute](#By-attribute)
        *   [3.3.3  Secondary types](#Secondary-types)
            *   [3.3.3.1  By attribute](#By-attribute)
            *   [3.3.3.2  By primary type](#By-primary-type)
        *   [3.3.4  Monster types](#Monster-types)
            *   [3.3.4.1  By Attribute](#By-Attribute)
            *   [3.3.4.2  By primary type](#By-primary-type)
            *   [3.3.4.3  By secondary type](#By-secondary-type)
        *   [3.3.5  ATK](#ATK)
        *   [3.3.6  DEF](#DEF)
        *   [3.3.7  Level/Rank](#Level/Rank)
            *   [3.3.7.1  ATK statistics](#ATK-statistics)
            *   [3.3.7.2  DEF statistics](#DEF-statistics)
        *   [3.3.8  Pendulum scale](#Pendulum-scale)
            *   [3.3.8.1  ATK statistics](#ATK-statistics)
            *   [3.3.8.2  DEF statistics](#DEF-statistics)
            *   [3.3.8.3  Level/Rank statistics](#Level/Rank-statistics)
        *   [3.3.9  Link](#Link)
            *   [3.3.9.1  ATK statistics](#ATK-statistics)
        *   [3.3.10  Link Arrows](#Link-Arrows)
            *   [3.3.10.1  By combination](#By-combination)
            *   [3.3.10.2  By unique](#By-unique)
            *   [3.3.10.3  By link](#By-link)
    *   [3.4  Spell & Trap](#Spell-&-Trap)
        *   [3.4.1  Properties](#Properties)
    *   [3.5  Effect type](#Effect-type)
        *   [3.5.1  Card type discrimination](#Card-type-discrimination)
    *   [3.6  Archseries](#Archseries)
        *   [3.6.1  By card type](#By-card-type)
        *   [3.6.2  By primary type](#By-primary-type)
        *   [3.6.3  By secondary type](#By-secondary-type)
        *   [3.6.4  By monster type](#By-monster-type)
        *   [3.6.5  By property](#By-property)
    *   [3.7  Artworks](#Artworks)
        *   [3.7.1  By card type](#By-card-type)
        *   [3.7.2  By primary type](#By-primary-type)
    *   [3.8  Errata](#Errata)
        *   [3.8.1  By card type](#By-card-type)
        *   [3.8.2  By primary type](#By-primary-type)
        *   [3.8.3  By artwork](#By-artwork)
    *   [3.9  TCG & OCG status](#TCG-&-OCG-status)
        *   [3.9.1  TGC status](#TGC-status)
            *   [3.9.1.1  By card type](#By-card-type)
            *   [3.9.1.2  By monster type](#By-monster-type)
            *   [3.9.1.3  By archseries](#By-archseries)
        *   [3.9.2  OCG status](#OCG-status)
            *   [3.9.2.1  By card type](#By-card-type)
            *   [3.9.2.2  By monster type](#By-monster-type)
            *   [3.9.2.3  By archseries](#By-archseries)
        *   [3.9.3  TCG vs. OCG status](#TCG-vs.-OCG-status)
*   [4  Epilogue](#Epilogue)
    *   [4.1  HTML export](#HTML-export)
<!-- *   [4.2  Git](#Git) -->

# Data loading

## Read collection

In [None]:
# Timestamp
timestamp = arrow.utcnow()

In [None]:
collection_df = get_collection()

In [None]:
# Process the collection data frame
collection_df = find_cards(collection_df, merge_data=True)

# Check changes

## Load previous data

In [None]:
# Get latest file if exist
previous_collection_df, previous_collection_ts = load_latest_data("collection")

if previous_collection_df is not None:
    previous_collection_df = previous_collection_df.astype(
        collection_df[previous_collection_df.columns.intersection(collection_df.columns)].dtypes.to_dict()
    )

## Generate changelogs

In [None]:
if previous_collection_df is None:
    collection_changelog = None
    print("Skipped")
else:
    collection_changelog = generate_changelog(previous_collection_df, collection_df, col="Name")
    if not collection_changelog.empty:
        display(collection_changelog)
        collection_changelog.to_csv(
            dirs.DATA
            / make_filename(
                report="collection",
                timestamp=timestamp,
                previous_timestamp=previous_collection_ts,
            ),
            index=True,
        )
        print("Changelog saved")

## Save data

In [None]:
if collection_changelog is not None and collection_changelog.empty:
    print("No changes. New data not saved")
else:
    collection_df.to_csv(
        dirs.DATA / make_filename(report="collection", timestamp=timestamp),
        index=False,
    )
    print("Data saved")

# Data visualization

In [None]:
# Helper for nunique tables
nunique_drop_cols = ["Count", "Card type", "Page name", "Page URL"]

## Full data

In [None]:
collection_df

## Card types

In [None]:
print("Total number of Card types:", collection_df["Card type"].nunique())

In [None]:
card_type_colors = [plot.colors_dict[i] for i in collection_df["Card type"].value_counts().index]
collection_df.groupby(["Card type"])["Count"].sum().plot.bar(figsize=(14, 6), grid=True, rot=0, color=card_type_colors)
plt.show()

## Monsters

### Attributes

In [None]:
print("Total number of attributes:", collection_df["Attribute"].nunique())

Unique entries for each column grouped by "Attribute"

In [None]:
collection_df.drop(columns=nunique_drop_cols).groupby("Attribute").nunique()

In [None]:
if not collection_df["Attribute"].isna().all():
    attribute_colors = [plot.colors_dict[i] for i in collection_df["Attribute"].value_counts().index]
    collection_df.groupby(["Attribute"])["Count"].sum().plot.bar(figsize=(14, 6), grid=True, rot=0, color=attribute_colors)
    plt.show()

### Primary types

In [None]:
print("Total number of primary types:", collection_df["Primary type"].nunique())

Unique entries for each column grouped by "Primary type"

In [None]:
collection_df.drop(columns=nunique_drop_cols).groupby("Primary type").nunique()

#### Has effect discrimination

In [None]:
effect = (
    pd.crosstab(
        collection_df["Primary type"],
        pd.isna(collection_df["Effect type"]),
        values=collection_df["Count"],
        aggfunc="sum",
        rownames=["Primary type"],
        colnames=["Has effect"],
    )
    .fillna(0)
    .rename(columns={True: "No Effect", False: "Effect"})
)
effect

In [None]:
monster_type_colors = {
    "No Effect": plot.colors_dict["Normal Monster"],
    "Effect": [plot.colors_dict[i] for i in effect.index],
}
effect.plot.bar(
    figsize=(14, 6),
    stacked=True,
    grid=True,
    rot=0,
    legend=True,
    color=monster_type_colors,
)
# plt.yscale('log')
plt.show()

Obs: Normal monster can have effect if it is pendulum

#### Is pendulum discrimination

In [None]:
pendulum = (
    pd.crosstab(
        collection_df["Primary type"],
        pd.isna(collection_df["Pendulum Scale"]),
        values=collection_df["Count"],
        aggfunc="sum",
        rownames=["Primary type"],
        colnames=["Is Pendulum"],
    )
    .fillna(0)
    .rename(columns={True: "Not Pendulum", False: "Pendulum"})
)
pendulum

In [None]:
monster_type_colors_b = {
    "Pendulum": plot.colors_dict["Pendulum Monster"],
    "Not Pendulum": [plot.colors_dict[i] for i in pendulum.index],
}
pendulum.plot.bar(
    figsize=(14, 6),
    stacked=True,
    grid=True,
    rot=0,
    color=monster_type_colors_b,
    legend=True,
    title="Primary types - Is pendulum",
)
plt.show()

#### By attribute

In [None]:
primmary_crosstab = pd.crosstab(
    collection_df["Primary type"], collection_df["Attribute"], values=collection_df["Count"], aggfunc="sum"
).fillna(0)
primmary_crosstab

In [None]:
plt.figure(figsize=(14, 10))
sns.heatmap(
    primmary_crosstab.T,
    annot=True,
    fmt="g",
    cmap="viridis",
    square=True,
    norm=plot.LogNorm(),
)
plt.show()

### Secondary types

In [None]:
exploded_secondary_type = collection_df.explode("Secondary type")
print(
    "Total number of secondary types:",
    exploded_secondary_type["Secondary type"].nunique(),
)

Unique entries for each column grouped by "Secondary type"

In [None]:
exploded_secondary_type.drop(columns=["Link", "Link Arrows"] + nunique_drop_cols).groupby("Secondary type").nunique()

In [None]:
secondary_type_colors = plot.colors_dict["Effect Monster"]
if not exploded_secondary_type["Secondary type"].isna().all():
    exploded_secondary_type.groupby(["Secondary type"])["Count"].sum().plot.bar(
        figsize=(14, 6),
        stacked=True,
        grid=True,
        rot=0,
        color=secondary_type_colors,
        legend=False,
    )
    plt.show()

#### By attribute

In [None]:
secondary_crosstab = pd.crosstab(
    exploded_secondary_type["Secondary type"],
    exploded_secondary_type["Attribute"],
    values=exploded_secondary_type["Count"],
    aggfunc="sum",
).fillna(0)
secondary_crosstab

In [None]:
if not exploded_secondary_type["Secondary type"].isna().all():
    plt.figure(figsize=(8, 6))
    sns.heatmap(
        secondary_crosstab[secondary_crosstab > 0],
        annot=True,
        fmt="g",
        cmap="viridis",
        square=True,
    )
    plt.show()

#### By primary type

In [None]:
secondary_crosstab_b = pd.crosstab(
    exploded_secondary_type["Primary type"],
    exploded_secondary_type["Secondary type"],
    values=exploded_secondary_type["Count"],
    aggfunc="sum",
    margins=True,
).fillna(0)
secondary_crosstab_b

In [None]:
if not exploded_secondary_type["Secondary type"].isna().all():
    plt.figure(figsize=(10, 4))
    sns.heatmap(
        secondary_crosstab_b[secondary_crosstab_b > 0],
        annot=True,
        fmt="g",
        cmap="viridis",
        square=True,
        # norm=plot.LogNorm(),
    )
    plt.show()

### Monster types

In [None]:
print("Total number of monster types:", collection_df["Monster type"].nunique())

Unique entries for each column grouped by "Monster type"

In [None]:
collection_df.drop(columns=nunique_drop_cols).groupby("Monster type").nunique()

In [None]:
if not collection_df["Monster type"].isna().all():
    monster_type_colors = plot.colors_dict["Monster Card"]
    collection_df.groupby(["Monster type"])["Count"].sum().plot.bar(
        figsize=(14, 6), grid=True, rot=45, color=monster_type_colors
    )
    plt.show()

#### By Attribute

In [None]:
monster_crosstab = pd.crosstab(
    collection_df["Monster type"], collection_df["Attribute"], values=collection_df["Count"], aggfunc="sum"
).fillna(0)
monster_crosstab

In [None]:
if not collection_df["Monster type"].isna().all():
    plt.figure(figsize=(18, 4))
    sns.heatmap(
        monster_crosstab[monster_crosstab > 0].T,
        annot=True,
        fmt="g",
        cmap="viridis",
        square=True,
        # norm=plot.LogNorm(),
    )
    plt.show()

#### By primary type

In [None]:
monster_crosstab_b = pd.crosstab(
    collection_df["Monster type"][collection_df["Monster type"].notna()],
    collection_df["Primary type"][collection_df["Monster type"].notna()],
    values=collection_df["Count"],
    aggfunc="sum",
    dropna=False,
).fillna(0)
monster_crosstab_b

In [None]:
if not collection_df["Monster type"].isna().all():
    plt.figure(figsize=(16, 4))
    sns.heatmap(
        monster_crosstab_b[monster_crosstab_b > 0].T,
        annot=True,
        fmt="g",
        cmap="viridis",
        square=True,
        norm=plot.LogNorm(),
    )
    plt.show()

#### By secondary type

In [None]:
monster_crosstab_c = pd.crosstab(
    exploded_secondary_type["Monster type"][exploded_secondary_type["Monster type"].notna()],
    exploded_secondary_type["Secondary type"][exploded_secondary_type["Monster type"].notna()],
    values=exploded_secondary_type["Count"],
    aggfunc="sum",
    dropna=False,
).fillna(0)
monster_crosstab_c

In [None]:
if not collection_df["Monster type"].isna().all():
    plt.figure(figsize=(16, 4))
    sns.heatmap(
        monster_crosstab_c[monster_crosstab_c > 0].T,
        annot=True,
        fmt="g",
        cmap="viridis",
        square=True,
        norm=plot.LogNorm(),
    )
    plt.show()

### ATK

In [None]:
print("Total number of ATK values:", collection_df["ATK"].nunique())

Unique entries for each column grouped by "ATK"

In [None]:
collection_df.drop(columns=nunique_drop_cols).groupby("ATK").nunique().sort_index(
    key=lambda x: pd.to_numeric(x, errors="coerce")
)

In [None]:
if not collection_df["ATK"].isna().all():
    atk_colors = plot.colors_dict["Monster Card"]
    collection_df.groupby(["ATK"])["Count"].sum().sort_index(key=lambda x: pd.to_numeric(x, errors="coerce")).plot.bar(
        figsize=(16, 6), grid=True, color=atk_colors
    )
    plt.show()

### DEF

In [None]:
print("Total number of DEF values:", collection_df["DEF"].nunique())

Unique entries for each column grouped by "DEF"

In [None]:
collection_df.drop(columns=nunique_drop_cols).groupby("DEF").nunique().sort_index(
    key=lambda x: pd.to_numeric(x, errors="coerce")
)

In [None]:
if not collection_df["DEF"].isna().all():
    def_colors = plot.colors_dict["Monster Card"]
    collection_df.groupby(["DEF"])["Count"].sum().sort_index(key=lambda x: pd.to_numeric(x, errors="coerce")).plot.bar(
        figsize=(16, 6), grid=True, color=def_colors
    )
    plt.show()

### Level/Rank

Unique entries for each column grouped by "Level/Rank"

In [None]:
collection_df.drop(columns=["Link", "Link Arrows"] + nunique_drop_cols).groupby("Level/Rank").nunique().sort_index(
    key=lambda x: pd.to_numeric(x, errors="coerce")
)

In [None]:
if not collection_df["Level/Rank"].isna().all():
    stars_colors = plot.colors_dict["Level"]
    collection_df.groupby(["Level/Rank"])["Count"].sum().sort_index(
        key=lambda x: pd.to_numeric(x, errors="coerce")
    ).plot.bar(figsize=(14, 6), grid=True, rot=0, color=stars_colors)
    plt.show()

#### ATK statistics

In [None]:
collection_df[["Level/Rank", "ATK"]].loc[collection_df.index.repeat(collection_df["Count"])].apply(
    pd.to_numeric, errors="coerce"
).dropna().astype(int).groupby("Level/Rank").describe().round(1)

#### DEF statistics

In [None]:
collection_df[["Level/Rank", "DEF"]].loc[collection_df.index.repeat(collection_df["Count"])].apply(
    pd.to_numeric, errors="coerce"
).dropna().astype(int).groupby("Level/Rank").describe().round(1)

### Pendulum scale

Unique entries for each column grouped by "Pendulum scale"

In [None]:
collection_df.drop(columns=["Link", "Link Arrows"] + nunique_drop_cols).groupby("Pendulum Scale").nunique().sort_index(
    key=lambda x: pd.to_numeric(x, errors="coerce")
)

In [None]:
if not collection_df["Pendulum Scale"].isna().all():
    scales_colors = plot.colors_dict["Pendulum Monster"]
    collection_df.groupby(["Pendulum Scale"])["Count"].sum().sort_index(
        key=lambda x: pd.to_numeric(x, errors="coerce")
    ).plot.bar(figsize=(14, 6), grid=True, rot=0, color=scales_colors)
    plt.show()

#### ATK statistics

In [None]:
collection_df[["Pendulum Scale", "ATK"]].loc[collection_df.index.repeat(collection_df["Count"])].apply(
    pd.to_numeric, errors="coerce"
).dropna().astype(int).groupby("Pendulum Scale").describe().round(1)

#### DEF statistics

In [None]:
collection_df[["Pendulum Scale", "DEF"]].loc[collection_df.index.repeat(collection_df["Count"])].apply(
    pd.to_numeric, errors="coerce"
).dropna().astype(int).groupby("Pendulum Scale").describe().round(1)

#### Level/Rank statistics

In [None]:
collection_df[["Pendulum Scale", "Level/Rank"]].loc[collection_df.index.repeat(collection_df["Count"])].apply(
    pd.to_numeric, errors="coerce"
).dropna().astype(int).groupby("Pendulum Scale").describe().round(1)

### Link

Unique entries for each column grouped by "Link"

In [None]:
collection_df.drop(
    columns=[
        "Primary type",
        "Secondary type",
        "Level/Rank",
        "DEF",
        "Pendulum Scale",
    ]
    + nunique_drop_cols
).groupby("Link").nunique().sort_index(key=lambda x: pd.to_numeric(x, errors="coerce"))

In [None]:
if not collection_df["Link"].isna().all():
    link_colors = plot.colors_dict["Link Monster"]
    collection_df.groupby(["Link"])["Count"].sum().sort_index(key=lambda x: pd.to_numeric(x, errors="coerce")).plot.bar(
        figsize=(14, 6), grid=True, rot=0, color=link_colors
    )
    plt.show()

#### ATK statistics

In [None]:
collection_df[["Link", "ATK"]].loc[collection_df.index.repeat(collection_df["Count"])].apply(
    pd.to_numeric, errors="coerce"
).dropna().astype(int).groupby("Link").describe().round(1)

### Link Arrows

#### By combination

In [None]:
print("Total number of link arrow combinations:", collection_df["Link Arrows"].nunique())

Unique entries for each column grouped by "Link Arrows" combinations

In [None]:
collection_df.drop(
    columns=[
        "Primary type",
        "Level/Rank",
        "Pendulum Scale",
        "Link",
        "Secondary type",
        "DEF",
    ]
    + nunique_drop_cols
).groupby("Link Arrows").nunique()

In [None]:
if not collection_df["Link Arrows"].isna().all():
    arrows_colors = plot.colors_dict["Link Monster"]
    collection_df.groupby(["Link Arrows"])["Count"].sum().plot.barh(
        figsize=(10, 20), grid=True, color=arrows_colors, title="Link arrows combinations"
    )
    plt.show()

#### By unique

Unique entries for each column grouped by unique "Link Arrows"

In [None]:
collection_df[collection_df["Link Arrows"].notna()].drop(
    columns=[
        "Primary type",
        "Level/Rank",
        "Pendulum Scale",
        "Secondary type",
        "DEF",
    ]
    + nunique_drop_cols
).explode("Link Arrows").groupby("Link Arrows").nunique()

In [None]:
if not collection_df["Link Arrows"].isna().all():
    plot.arrows(collection_df["Link Arrows"].explode("Link Arrows"))

#### By link

In [None]:
arrow_per_link = collection_df[["Link Arrows", "Link"]].explode("Link Arrows").dropna()
arrow_crosstab = pd.crosstab(
    arrow_per_link["Link Arrows"], arrow_per_link["Link"], values=collection_df["Count"], aggfunc="sum"
).fillna(0)
arrow_crosstab

In [None]:
if not arrow_crosstab.empty:
    plt.figure(figsize=(10, 6))
    sns.heatmap(
        arrow_crosstab[arrow_crosstab > 0].T,
        annot=True,
        fmt="g",
        cmap="viridis",
        square=True,
        # norm=plot.LogNorm(),
    )
    plt.show()

## Spell & Trap

### Properties

In [None]:
print("Total number of properties:", collection_df["Property"].nunique())

Unique entries for each column grouped by "Property"

In [None]:
collection_df.drop(columns=nunique_drop_cols).groupby("Property").nunique()

In [None]:
if not collection_df["Property"].isna().all():
    st_colors = [
        plot.colors_dict[i] for i in collection_df[["Card type", "Property"]].value_counts().index.get_level_values(0)
    ]
    collection_df.groupby(["Property"])["Count"].sum().plot.bar(figsize=(14, 6), grid=True, rot=45, color=st_colors)
    plt.show()

## Effect type

In [None]:
print("Total number of effect types:", collection_df["Effect type"].explode().nunique())

Unique entries for each column grouped by "Effect type"

In [None]:
collection_df.drop(columns=nunique_drop_cols).explode("Effect type").groupby("Effect type").nunique()

### Card type discrimination

In [None]:
st_diff = (
    collection_df[["Card type", "Effect type", "Count"]]
    .explode("Effect type")
    .groupby(["Effect type", "Card type"])["Count"]
    .sum()
    .unstack(0)
    .fillna(0)
    .astype(int)
)
st_diff

In [None]:
if not st_diff.empty:
    st_diff_colors = {
        "Monster Card": plot.colors_dict["Monster Card"],
        "Spell Card": plot.colors_dict["Spell Card"],
        "Trap Card": plot.colors_dict["Trap Card"],
    }
    st_diff.plot.bar(figsize=(14, 6), stacked=True, grid=True, rot=45, color=st_diff_colors)
    plt.show()

## Archseries

In [None]:
exploded_archseries = collection_df.explode("Archseries")
print("Total number of Archseries:", exploded_archseries["Archseries"].nunique())

Unique entries for each column grouped by "Archseties"

In [None]:
exploded_archseries.drop(columns=nunique_drop_cols).groupby("Archseries").nunique()

In [None]:
if not exploded_archseries["Archseries"].isna().all():
    exploded_archseries.groupby(["Archseries"])["Count"].sum().plot.barh(grid=True, title="Archtypes/Series")
    plt.show()

### By card type

In [None]:
archseries_crosstab = pd.crosstab(
    exploded_archseries["Archseries"],
    exploded_archseries["Card type"],
    margins=True,
    values=exploded_archseries["Count"],
    aggfunc="sum",
).fillna(0)
archseries_crosstab

### By primary type

In [None]:
archseries_crosstab_b = pd.crosstab(
    exploded_archseries["Archseries"],
    exploded_archseries["Primary type"],
    margins=True,
    values=exploded_archseries["Count"],
    aggfunc="sum",
).fillna(0)
archseries_crosstab_b



### By secondary type

In [None]:
exploded_archseries_secondary_type = exploded_archseries[["Archseries", "Secondary type", "Count"]].explode("Secondary type")
archseries_crosstab_c = pd.crosstab(
    exploded_archseries_secondary_type["Archseries"],
    exploded_archseries_secondary_type["Secondary type"],
    values=exploded_archseries_secondary_type["Count"],
    aggfunc="sum",
    margins=True,
).fillna(0)
archseries_crosstab_c

### By monster type

In [None]:
archseries_crosstab_d = pd.crosstab(
    exploded_archseries["Archseries"],
    exploded_archseries["Monster type"],
    margins=True,
    values=exploded_archseries["Count"],
    aggfunc="sum",
).fillna(0)
archseries_crosstab_d

### By property

In [None]:
archseries_crosstab_e = pd.crosstab(
    exploded_archseries["Archseries"],
    exploded_archseries["Property"],
    margins=True,
    values=exploded_archseries["Count"],
    aggfunc="sum",
).fillna(0)
archseries_crosstab_e

## Artworks

In [None]:
print(
    "Total number of cards with edited or alternate artworks:",
    collection_df["Artwork"].count(),
)

In [None]:
collection_df[["Name", "Password", "TCG status", "OCG status", "Artwork"]][collection_df["Artwork"].notna()]

In [None]:
artwork_value_counts = collection_df["Artwork"].value_counts()
if len(artwork_value_counts) == 3:
    plt.figure(figsize=(12, 8))
    plt.title("Artworks")
    plot.venn2(
        subsets=(
            artwork_value_counts[("Alternate",)],
            artwork_value_counts[("Edited",)],
            artwork_value_counts[("Alternate", "Edited")],
        ),
        set_labels=("Alternate artwork", "Edited artwork"),
    )
    plt.show()

### By card type

In [None]:
artwork_crosstab = pd.crosstab(
    collection_df["Artwork"].astype(str), collection_df["Card type"], values=collection_df["Count"], aggfunc="sum"
).fillna(0)
artwork_crosstab

### By primary type

In [None]:
artwork_crosstab_b = pd.crosstab(
    collection_df["Artwork"].astype(str), collection_df["Primary type"], values=collection_df["Count"], aggfunc="sum"
).fillna(0)
artwork_crosstab_b

More granularity is unnecessary

## Errata

In [None]:
print("Total number of cards with errata:", collection_df["Errata"].count())

In [None]:
collection_df[["Name", "Password", "TCG status", "OCG status", "Errata"]][collection_df["Errata"].notna()]

Unique entries for each column grouped by "Errata"

In [None]:
errata_counts = collection_df.groupby("Errata").nunique().sort_values("Name", ascending=False)
errata_counts

In [None]:
sorted_errata_name_counts = errata_counts["Name"].sort_index(key=lambda x: [(len(i), i) for i in x])

if ("Name",) in sorted_errata_name_counts and ("Type",) in sorted_errata_name_counts:
    if ("Any",) in sorted_errata_name_counts:
        sorted_errata_name_counts = sorted_errata_name_counts.drop("Any")
    plt.figure(figsize=(12, 8))
    plt.title("Errata")
    plot.venn2(
        subsets=sorted_errata_name_counts,
        set_labels=sorted_errata_name_counts.index[:-1].str[0],
    )
    plt.show()

### By card type

In [None]:
collection_df["Errata"][0]

In [None]:
# TODO
errata_crosstab = pd.crosstab(
    collection_df["Errata"].astype(str), collection_df["Card type"], values=collection_df["Count"], aggfunc="sum"
).fillna(0)
errata_crosstab.sort_values(by=errata_crosstab.columns.tolist(), ascending=False)

### By primary type

In [None]:
# TODO
errata_crosstab_b = pd.crosstab(
    collection_df["Errata"].astype(str), collection_df["Primary type"], values=collection_df["Count"], aggfunc="sum"
).fillna(0)
errata_crosstab_b.sort_values(by=errata_crosstab_b.columns.tolist(), ascending=False)

More granularity is unnecessary

### By artwork

In [None]:
# TODO
errata_crosstab_c = pd.crosstab(
    collection_df["Artwork"].astype(str), collection_df["Errata"].astype(str), values=collection_df["Count"], aggfunc="sum"
).fillna(0)
errata_crosstab_c.sort_values(by=errata_crosstab_c.columns.tolist(), ascending=False)

## TCG & OCG status

### TGC status

In [None]:
print("Total number of TCG status:", collection_df["TCG status"].nunique())

Unique entries for each column grouped by "TCG status"

In [None]:
collection_df.drop(columns=["Page name", "Page URL"]).groupby("TCG status", dropna=False).nunique()

In [None]:
collection_df.groupby(["TCG status"])["Count"].sum().plot.bar(figsize=(14, 6), logy=False, grid=True, rot=0)
plt.show()

#### By card type

In [None]:
# Remove unlimited
tcg_crosstab = (
    pd.crosstab(collection_df["Card type"], collection_df["TCG status"], values=collection_df["Count"], aggfunc="sum")
    .fillna(0)
    .drop(["Unlimited"], axis=1)
)
tcg_crosstab

In [None]:
if not tcg_crosstab.empty:
    plt.figure(figsize=(12, 6))
    sns.heatmap(
        tcg_crosstab[tcg_crosstab > 0],
        annot=True,
        fmt="g",
        cmap="viridis",
        # norm=plot.LogNorm(),
    )
    plt.show()

#### By monster type

In [None]:
# Remove unlimited
tcg_crosstab_b = (
    pd.crosstab(collection_df["Monster type"], collection_df["TCG status"], values=collection_df["Count"], aggfunc="sum")
    .fillna(0)
    .drop(["Unlimited"], axis=1)
)
tcg_crosstab_b

In [None]:
if not tcg_crosstab_b.empty:
    plt.figure(figsize=(14, 4))
    sns.heatmap(
        tcg_crosstab_b[tcg_crosstab_b > 0].T.dropna(how="all", axis=1),
        annot=True,
        fmt="g",
        cmap="viridis",
        square=True,
    )
    plt.show()

#### By archseries

In [None]:
# Remove unlimited
tcg_crosstab_c = pd.crosstab(
    exploded_archseries["Archseries"].where(exploded_archseries["OCG status"] != "Unlimited"),
    exploded_archseries["TCG status"],
    values=collection_df["Count"],
    aggfunc="sum",
    margins=True,
).fillna(0)
tcg_crosstab_c

### OCG status

In [None]:
print("Total number of OCG status:", collection_df["OCG status"].nunique())

Unique entries for each column grouped by "OCG status"

In [None]:
collection_df.drop(columns=["Page name", "Page URL"]).groupby("OCG status", dropna=False).nunique()

In [None]:
collection_df.groupby(["OCG status"])["Count"].sum().plot.bar(figsize=(14, 6), logy=False, grid=True, rot=0)
plt.show()

#### By card type

In [None]:
# Remove unlimited
ocg_crosstab = (
    pd.crosstab(collection_df["Card type"], collection_df["OCG status"], values=collection_df["Count"], aggfunc="sum")
    .fillna(0)
    .drop(["Unlimited"], axis=1)
)
ocg_crosstab

In [None]:
if not ocg_crosstab.empty:
    plt.figure(figsize=(12, 6))
    sns.heatmap(ocg_crosstab[ocg_crosstab > 0], annot=True, fmt="g", cmap="viridis")
    plt.show()

#### By monster type

In [None]:
ocg_crosstab_b = (
    pd.crosstab(collection_df["Monster type"], collection_df["OCG status"], values=collection_df["Count"], aggfunc="sum")
    .fillna(0)
    .drop(["Unlimited"], axis=1)
)
ocg_crosstab_b

In [None]:
if not ocg_crosstab_b.empty:
    plt.figure(figsize=(14, 4))
    sns.heatmap(
        ocg_crosstab_b[ocg_crosstab_b > 0].T.dropna(how="all", axis=1),
        annot=True,
        fmt="g",
        cmap="viridis",
        square=True,
    )
    plt.show()

#### By archseries

In [None]:
# Remove unlimited
ocg_crosstab_c = pd.crosstab(
    exploded_archseries["Archseries"].where(exploded_archseries["OCG status"] != "Unlimited"),
    exploded_archseries["OCG status"],
    values=collection_df["Count"],
    aggfunc="sum",
    margins=True,
).fillna(0)
ocg_crosstab_c

### TCG vs. OCG status

In [None]:
cg_crosstab = pd.crosstab(
    collection_df["OCG status"],
    collection_df["TCG status"],
    values=collection_df["Count"],
    aggfunc="sum",
    dropna=False,
    margins=False,
).fillna(0)
cg_crosstab

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(
    cg_crosstab[cg_crosstab > 0],
    annot=True,
    fmt="g",
    cmap="viridis",
    square=True,
    norm=plot.LogNorm(),
)
plt.show()

# Epilogue

In [None]:
# benchmark(report='Collection',timestamp=timestamp)

In [None]:
footer(timestamp)

## HTML export

In [None]:
# Save notebook on disck before generating HTML report
save_notebook()

In [None]:
# export_notebook(dirs.NOTEBOOKS.user / "Collection.ipynb")

## Git

In [None]:
# git.commit("*[Cc]olection*", f"Collection update - {timestamp.isoformat()}")