In [None]:
from yugiquery import *

init_notebook_mode(all_interactive=True)

header("Timeline")

---

Table of Contents <a class="jp-toc-ignore"></a>
=================
* [1 Data preparation](#data-preparation)
  * [1.1 Load data](#load-data)
  * [1.2 Format data](#format-data)
  * [1.3 Merge data](#merge-data)
* [2 Data visualization](#data-visualization)
  * [2.1 Debut](#debut)
    * [2.1.1 By Format](#by-format)
    * [2.1.2 By Region](#by-region)
    * [2.1.3 By Card type](#by-card-type)
    * [2.1.4 By Primary type](#by-primary-type)
    * [2.1.5 By Secondary type](#by-secondary-type)
    * [2.1.6 By Attribute](#by-attribute)
    * [2.1.7 By Monster type](#by-monster-type)
    * [2.1.8 By Level/Rank](#by-level/rank)
    * [2.1.9 By Pendulum scale](#by-pendulum-scale)
    * [2.1.10 By Link](#by-link)
    * [2.1.11 By ATK](#by-atk)
    * [2.1.12 By DEF](#by-def)
  * [2.2 Last release](#last-release)
    * [2.2.1 By Region](#by-region)
    * [2.2.2 By Card type](#by-card-type)
    * [2.2.3 By Primary type](#by-primary-type)
    * [2.2.4 By Secondary type](#by-secondary-type)
    * [2.2.5 By Attribute](#by-attribute)
    * [2.2.6 By Monster type](#by-monster-type)
    * [2.2.7 By Level/Rank](#by-level/rank)
    * [2.2.8 By Pendulum scale](#by-pendulum-scale)
    * [2.2.9 By Link](#by-link)
    * [2.2.10 By ATK](#by-atk)
    * [2.2.11 By DEF](#by-def)
  * [2.3 All releases](#all-releases)
    * [2.3.1 By Region](#by-region)
    * [2.3.2 By Card type](#by-card-type)
    * [2.3.3 By Primary type](#by-primary-type)
    * [2.3.4 By Secondary type](#by-secondary-type)
    * [2.3.5 By Attribute](#by-attribute)
    * [2.3.6 By Monster type](#by-monster-type)
    * [2.3.7 By Level/Rank](#by-level/rank)
    * [2.3.8 By Pendulum scale](#by-pendulum-scale)
    * [2.3.9 By Link](#by-link)
    * [2.3.10 By ATK](#by-atk)
    * [2.3.11 By DEF](#by-def)
* [3 Debug](#debug)
  * [3.1 Merge failed](#merge-failed)
  * [3.2 HTML export](#html-export)
  * [3.3 Git](#git)

# Data preparation

In [None]:
timestamp = arrow.utcnow()

## Load data

In [None]:
# Load list of important dates
with open(dirs.get_asset("json", "dates.json"), "r") as f:
    dates_json = json.load(f)
    anime_df = pd.DataFrame(dates_json["anime"]["series"]).set_index("title").map(pd.to_datetime, dayfirst=True)
    rules_df = (
        pd.DataFrame(dates_json["rules"]).set_index("title").map(pd.to_datetime, dayfirst=True).iloc[2:]
    )  # Ignore old rules

In [None]:
# Get latest file if exist
all_cards_df, _ = load_latest_data("cards")
all_speed_df, _ = load_latest_data("speed")
set_lists_df, _ = load_latest_data("sets")

## Format data

In [None]:
df_list = [all_cards_df, all_speed_df, set_lists_df]
if all(item is not None for item in df_list):
    for df in df_list:
        df["index"] = df["Name"].str.lower().str.replace("#", "")

else:
    raise SystemExit("Not enough files to proceed. Aborting!")

## Merge data

In [None]:
full_df = pd.concat([all_cards_df, all_speed_df]).drop_duplicates(ignore_index=True)
full_df = full_df.merge(set_lists_df, how="inner", on="index")
full_df = full_df.convert_dtypes()
full_df["Modification date"] = full_df[["Modification date_x", "Modification date_y"]].max(axis=1)
full_df["Name"] = full_df["Name_x"].fillna(full_df["Name_y"])
full_df.drop(
    ["index", "Name_x", "Name_y", "Modification date_x", "Modification date_y"],
    axis=1,
    inplace=True,
)
full_df.rename(columns={"Page URL_x": "Card page URL", "Page URL_y": "Set page URL"}, inplace=True)
full_df = full_df[np.append(full_df.columns[-1:], full_df.columns[:-1])]

In [None]:
def get_release_by(column, operation="debut"):
    if column is None:
        group_cols = ["Name"]
    else:
        group_cols = [column, "Name"]

    if operation == "debut":
        df = full_df.explode(column) if column else full_df
        result = df.groupby(group_cols)[full_df.filter(regex="(?i)(debut)").columns].min().min(axis=1)
    elif operation in ["last", "first"]:
        df = full_df[full_df["Release"].notna()]
        df = df.explode(column) if column else df
        agg_func = "max" if operation == "last" else "min"
        result = df.groupby(group_cols)["Release"].agg(agg_func)
    else:
        raise ValueError("Invalid operation. Choose from 'debut', 'last', or 'first'.")

    operation = operation.capitalize()
    if operation != "Debut":
        operation = f"{operation} release"

    if column is None:
        return result.sort_values().value_counts(sort=False).round(0).fillna(0).to_frame().rename_axis(operation, axis=0)
    else:
        return (
            result.sort_values()
            .groupby(group_cols[0])
            .value_counts(sort=False)
            .round(0)
            .unstack(0)
            .fillna(0)
            .rename_axis(operation)
        )

# Data visualization

In [None]:
full_df

## Debut

In [None]:
debut_counts = get_release_by(column=None, operation="debut")
debut_counts.rename({"count": "All formats"}, axis=1, inplace=True)
debut_counts

In [None]:
_ = plot.rate(debut_counts, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By Format

Monsters' debut per format

In [None]:
full_df.groupby("Name")[full_df.filter(regex="(?i)(debut)").columns].min()

Debuts per date for each format

In [None]:
format_debut_counts = (
    full_df.groupby("Name")[full_df.filter(regex="(?i)(debut)").columns]
    .min()
    .melt(var_name="Format", value_name="Debut")
    .value_counts()
    .unstack(0)
    .fillna(0)
    .sort_index()
)
format_debut_counts

In [None]:
_ = plot.rate(format_debut_counts, bg=anime_df, vlines=rules_df["begin"], subplots=True)
plt.show()

### By Region

Obs: Debut by region is taken from earliest release date in set lists. It may not be as accurate as card specific properties.

In [None]:
full_df.groupby(["Region", "Name"])["Release"].min().unstack(0)

First releases per date

In [None]:
region_debut_counts = get_release_by(column="Region", operation="first")
region_debut_counts

In [None]:
_ = plot.rate(
    region_debut_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)
plt.show()

### By Card type

In [None]:
ct_debut_counts = get_release_by(column="Card type", operation="debut")
ct_debut_counts

In [None]:
_ = plot.rate(
    ct_debut_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Primary type

In [None]:
pt_debut_counts = get_release_by(column="Primary type", operation="debut")
pt_debut_counts

In [None]:
_ = plot.rate(
    pt_debut_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Secondary type

In [None]:
st_debut_counts = get_release_by("Secondary type", operation="debut")
st_debut_counts

In [None]:
_ = plot.rate(
    st_debut_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Attribute

In [None]:
att_debut_counts = get_release_by(column="Attribute", operation="debut")
att_debut_counts

In [None]:
_ = plot.rate(
    att_debut_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Monster type

In [None]:
mt_debut_counts = get_release_by(column="Monster type", operation="debut")
mt_debut_counts

In [None]:
_ = plot.rate(mt_debut_counts, bg=anime_df, vlines=rules_df["begin"], subplots=True, limit_year=True)

### By Level/Rank

In [None]:
level_debut_series = (
    full_df[(full_df["Card type"] != "Non-game card") & (full_df["Primary type"] != "Link Monster")]
    .groupby(["Name", "Level/Rank/Link"])[full_df.filter(regex="(?i)(debut)").columns]
    .min()
    .min(axis=1)
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    level_debut_series,
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

### By Pendulum scale

In [None]:
pendulum_debut_series = (
    full_df[(full_df["Card type"] != "Non-game card")]
    .groupby(["Name", "Pendulum Scale"])[full_df.filter(regex="(?i)(debut)").columns]
    .min()
    .min(axis=1)
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    pendulum_debut_series,
    color=plot.colors_dict["Spell Card"],
    notch=True,
)
plt.show()

### By Link

In [None]:
link_debut_series = (
    full_df[(full_df["Card type"] != "Non-game card") & (full_df["Primary type"] == "Link Monster")]
    .groupby(["Name", "Level/Rank/Link"])[full_df.filter(regex="(?i)(debut)").columns]
    .min()
    .min(axis=1)
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    link_debut_series,
    color=plot.colors_dict["Link Monster"],
    notch=True,
)
plt.show()

### By ATK

In [None]:
atk_debut_series = (
    full_df[full_df["Card type"] != "Non-game card"]
    .groupby(["Name", "ATK"])[full_df.filter(regex="(?i)(debut)").columns]
    .min()
    .min(axis=1)
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    atk_debut_series,
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

### By DEF

In [None]:
def_debut_series = (
    full_df[full_df["Card type"] != "Non-game card"]
    .groupby(["Name", "DEF"])[full_df.filter(regex="(?i)(debut)").columns]
    .min()
    .min(axis=1)
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    def_debut_series,
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

## Last release

Obs: Only the last release of an individual card name

In [None]:
last_counts = get_release_by(column=None, operation="last")
last_counts.rename({"count": "All formats"}, axis=1, inplace=True)
last_counts

In [None]:
_ = plot.rate(last_counts, bg=anime_df, vlines=rules_df["begin"], limit_year=True)
plt.show()

### By Region

In [None]:
full_df.groupby(["Region", "Name"])["Release"].max().unstack(0)

Last releases by date

In [None]:
region_last_counts = get_release_by(column="Region", operation="last")
region_last_counts

In [None]:
_ = plot.rate(region_last_counts, bg=anime_df, vlines=rules_df["begin"], subplots=True)
plt.show()

### By Card type

In [None]:
ct_last_counts = get_release_by(column="Card type", operation="last")
ct_last_counts

In [None]:
_ = plot.rate(
    ct_last_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Primary type

In [None]:
pt_last_counts = get_release_by(column="Primary type", operation="last")
pt_last_counts

In [None]:
_ = plot.rate(
    pt_last_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Secondary type

In [None]:
st_last_counts = get_release_by(column="Secondary type", operation="last")
st_last_counts

In [None]:
_ = plot.rate(
    st_last_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Attribute

In [None]:
att_last_counts = get_release_by(column="Attribute", operation="last")
att_last_counts

In [None]:
_ = plot.rate(
    att_last_counts,
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Monster type

In [None]:
mt_last_counts = get_last_by("Monster type")
mt_last_counts

In [None]:
_ = plot.rate(
    mt_last_counts,
    title="Debut",
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
    limit_year=True,
)

### By Level/Rank

In [None]:
level_last_series = (
    full_df[(full_df["Card type"] != "Non-game card") & (full_df["Primary type"] != "Link Monster")]
    .groupby(["Name", "Pendulum Scale"])["Release"]
    .max()
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    level_last_series,
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

### By Pendulum scale

In [None]:
pendulum_last_series = (
    full_df[(full_df["Card type"] != "Non-game card")]
    .groupby(["Name", "Pendulum Scale"])["Release"]
    .max()
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    pendulum_last_series,
    color=plot.colors_dict["Spell Card"],
    notch=True,
)
plt.show()

### By Link

In [None]:
link_last_series = (
    full_df[(full_df["Card type"] != "Non-game card") & (full_df["Primary type"] == "Link Monster")]
    .groupby(["Name", "Level/Rank/Link"])["Release"]
    .max()
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    link_last_series,
    color=plot.colors_dict["Link Monster"],
    notch=True,
)
plt.show()

### By ATK

In [None]:
atk_last_series = (
    full_df[(full_df["Card type"] != "Non-game card")]
    .groupby(["Name", "ATK"])["Release"]
    .max()
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    atk_last_series,
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

### By DEF

In [None]:
def_last_series = (
    full_df[(full_df["Card type"] != "Non-game card")]
    .groupby(["Name", "DEF"])["Release"]
    .max()
    .reset_index()
    .drop("Name", axis=1)
)

In [None]:
_ = plot.box(
    def_last_series,
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

## All releases

Obs: All releases includes reprints

In [None]:
all_releases = full_df["Release"][full_df["Release"].notna()].value_counts().sort_index().to_frame()
all_releases.index.name = "All releases"
_ = plot.rate(all_releases, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By Region

### By Card type

In [None]:
# All releases, includes reprints - Double check
release_card_type = full_df.groupby(["Card type", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
release_card_type.groupby(release_card_type.index.strftime("%Y")).sum().T

In [None]:
release_card_type_plot = release_card_type[["Monster Card", "Spell Card", "Trap Card", "Skill Card"]]
card_type_colors = [plot.colors_dict[col] for col in release_card_type_plot.columns]
_ = plot.rate(release_card_type_plot, colors=card_type_colors, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By Primary type

In [None]:
# Sort properly
release_primary_type = (
    full_df.explode("Primary type")
    .groupby(["Primary type", "Release"])["Name"]
    .nunique()
    .unstack(0)
    .sort_index()
    .fillna(0)
    .astype(int)
)
release_primary_type.groupby(release_primary_type.index.strftime("%Y")).sum().T

In [None]:
_ = plot.rate(release_primary_type, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By Secondary type

In [None]:
# Sort properly
release_secondary_type = (
    full_df.explode("Secondary type")
    .groupby(["Secondary type", "Release"])["Name"]
    .nunique()
    .unstack(0)
    .sort_index()
    .fillna(0)
    .astype(int)
)
release_secondary_type.groupby(release_secondary_type.index.strftime("%Y")).sum().T

In [None]:
_ = plot.rate(release_secondary_type, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By Attribute

In [None]:
# Sort properly
release_attribute = full_df.groupby(["Attribute", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
release_attribute.groupby(release_attribute.index.strftime("%Y")).sum().T

In [None]:
attribute_colors = [plot.colors_dict[col] for col in release_attribute.columns]
_ = plot.rate(
    release_attribute, colors=attribute_colors, bg=anime_df, vlines=rules_df["begin"], cumsum=True, limit_year=True
)
plt.show()

### By Monster type

In [None]:
# Sort properly
release_monster_type = (
    full_df.groupby(["Monster type", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
)
release_monster_type.groupby(release_monster_type.index.strftime("%Y")).sum().T

In [None]:
_ = plot.rate(release_monster_type, bg=anime_df, vlines=rules_df["begin"], subplots=True)
plt.show()

### By Level/Rank

In [None]:
_ = plot.box(
    full_df[(full_df["Card type"] != "Non-game card") & (full_df["Primary type"] != "Link Monster")][
        ["Release", "Level/Rank/Link"]
    ],
    color=plot.colors_dict["Level"],
    notch=True,
)
plt.show()

### By Pendulum scale

In [None]:
_ = plot.box(
    full_df[full_df["Card type"] != "Non-game card"][["Release", "Pendulum Scale"]],
    color=plot.colors_dict["Pendulum Monster"],
    notch=True,
)
plt.show()

### By Link

In [None]:
_ = plot.box(
    full_df[(full_df["Card type"] != "Non-game card") & (full_df["Primary type"] != "Link Monster")][
        ["Release", "Level/Rank/Link"]
    ],
    color=plot.colors_dict["Link Monster"],
)
plt.show()

### By ATK

In [None]:
_ = plot.box(
    full_df[full_df["Card type"] != "Non-game card"][["Release", "ATK"]],
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

### By DEF

In [None]:
_ = plot.box(
    full_df[full_df["Card type"] != "Non-game card"][["Release", "DEF"]],
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

# Debug

## Merge failed

Names missing

In [None]:
all_cards_df.where(~all_cards_df["Name"].isin(full_df["Name"])).dropna(how="all")

Card number missing

In [None]:
set_lists_df.where(
    (~set_lists_df["Card number"].isin(full_df["Card number"]))
    & (~set_lists_df["Card number"].dropna().str.startswith("RD/"))
).dropna(how="all")

 # Epilogue

In [None]:
benchmark(report="timeline", timestamp=timestamp)

In [None]:
footer()

## HTML export

In [None]:
# May need to sleep for a few seconds after saving
save_notebook()

In [None]:
export_notebook(dirs.NOTEBOOKS.user / "Timeline.ipynb")

## Git

In [None]:
git.commit("*[Tt]imeline*", f"Timeline update - {timestamp.isoformat()}")