In [None]:
from yugiquery import *

init_notebook_mode(all_interactive=True)

header("Timeline")

---

Table of Contents <a class="jp-toc-ignore"></a>
=================

*   [1  Data preparation](#Data-preparation)
    *   [1.1  Load data](#Load-data)
    *   [1.2  Format data](#Format-data)
    *   [1.3  Merge data](#Merge-data)
*   [4  Data visualization](#Data-visualization)
    *   [4.1  First releases](#First-releases)
        *   [4.1.1  By region](#By-region)
    *   [4.1  Last releases](#Last-releases)
        *   [4.1.1  By region](#By-region)
    *   [4.2  All Releases](#All-Releases)
        *   [4.2.1  By card type](#By-card-type)
        *   [4.2.2  By primary type](#By-primary-type)
        *   [4.2.3  By secondary type](#By-secondary-type)
        *   [4.2.4  By attribute](#By-attribute)
        *   [4.2.5  By monster type](#By-monster-type)
        *   [4.2.6  By Level/Rank](#By-Level/Rank)
        *   [4.2.7  By ATK](#By-ATK)
        *   [4.2.8  By DEF](#By-DEF)
        *   [4.2.9  By pendulum scale](#By-pendulum-scale)
        *   [4.2.10  By link](#By-link)
*   [5  Debug](#Debug)
    *   [5.1  Merge failed](#Merge-failed)
*   [6  Epilogue](#Epilogue)
    *   [6.1  HTML export](#HTML-export)
<!-- *   [6.2  Git](#Git) -->

# Data preparation

In [None]:
timestamp = arrow.utcnow()

## Load data

In [None]:
# Load list of important dates
with open(dirs.get_asset("json" / "dates.json"), "r") as f:
    dates_json = json.load(f)
    anime_df = pd.DataFrame(dates_json["anime"]["series"]).set_index("title").map(pd.to_datetime, dayfirst=True)
    rules_df = (
        pd.DataFrame(dates_json["rules"]).set_index("title").map(pd.to_datetime, dayfirst=True).iloc[2:]
    )  # Ignore old rules

In [None]:
# Get latest file if exist
tuple_cols = [
    "Secondary type",
    "Effect type",
    "Link Arrows",
    "Archseries",
    "Artwork",
    "Errata",
    "Rarity",
    "Cover card",
]

all_cards_df, _ = load_corrected_latest("cards", tuple_cols)
all_speed_df, _ = load_corrected_latest("speed", tuple_cols)
set_lists_df, _ = load_corrected_latest("sets", tuple_cols)

## Format data

In [None]:
df_list = [all_cards_df, all_speed_df, set_lists_df]
if all(item is not None for item in df_list):
    for df in df_list:
        df["index"] = df["Name"].str.lower().str.replace("#", "")

else:
    raise SystemExit("Not enough files to proceed. Aborting!")

## Merge data

In [None]:
full_df = pd.concat([all_cards_df, all_speed_df]).drop_duplicates(ignore_index=True)
full_df = full_df.merge(set_lists_df, how="inner", on="index")
full_df = full_df.convert_dtypes()
full_df["Modification date"] = full_df[["Modification date_x", "Modification date_y"]].max(axis=1)
full_df["Name"] = full_df["Name_x"].fillna(full_df["Name_y"])
full_df.drop(
    ["index", "Name_x", "Name_y", "Modification date_x", "Modification date_y"],
    axis=1,
    inplace=True,
)
full_df.rename(columns={"Page URL_x": "Card page URL", "Page URL_y": "Set page URL"}, inplace=True)
full_df = full_df[np.append(full_df.columns[-1:], full_df.columns[:-1])]

# Data visualization

In [None]:
full_df

## First releases

Obs: Only the first release of an individual card name

In [None]:
first_release = full_df[full_df["Release"].notna()].groupby("Name")["Release"].agg("min")
first_release.to_frame(name="First release")

In [None]:
first_release_count = first_release.sort_values().value_counts(sort=False).to_frame(name="All cards")
first_release_count.index.name = "First Release"
plot.rate(first_release_count, bg=anime_df, vlines=rules_df["begin"])

### By region

In [None]:
first_release_region = full_df[full_df["Release"].notna()].groupby(["Region", "Name"])["Release"].agg("min")
first_release_region.to_frame(name="First release")

In [None]:
first_release_region_count = (
    first_release_region.sort_values().groupby(["Region"]).value_counts(sort=False).unstack(0).fillna(0).round(0)
)
first_release_region_count.index.name = "Release"
plot.rate_subplots(
    first_release_region_count,
    title="First Release",
    bg=anime_df,
    vlines=rules_df["begin"],
)

## Last releases

Obs: Only the last release of an individual card name

In [None]:
last_release = full_df[full_df["Release"].notna()].groupby("Name")["Release"].agg("max")
last_release.to_frame(name="Last release")

In [None]:
last_release_count = last_release.sort_values().value_counts(sort=False).to_frame(name="All cards")
last_release_count.index.name = "Last Release"
plot.rate(last_release_count, bg=anime_df, vlines=rules_df["begin"])

### By region

In [None]:
last_release_region = full_df[full_df["Release"].notna()].groupby(["Region", "Name"])["Release"].agg("max")
last_release_region.to_frame(name="Last release")

In [None]:
last_release_region_count = (
    last_release_region.sort_values().groupby(["Region"]).value_counts(sort=False).unstack(0).fillna(0).round(0)
)
last_release_region_count.index.name = "Release"
plot.rate_subplots(
    last_release_region_count,
    title="Last Release",
    bg=anime_df,
    vlines=rules_df["begin"],
)

## All Releases

Obs: All releases includes reprints

In [None]:
all_releases = full_df["Release"][full_df["Release"].notna()].value_counts().sort_index().to_frame()
all_releases.index.name = "All releases"
plot.rate(all_releases, bg=anime_df, vlines=rules_df["begin"])

### By card type

In [None]:
# All releases, includes reprints - Double check
release_card_type = full_df.groupby(["Card type", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
release_card_type.groupby(release_card_type.index.strftime("%Y")).sum().T

In [None]:
card_type_colors = [plot.colors_dict[col] for col in release_card_type.columns]
plot.rate(release_card_type, colors=card_type_colors, bg=anime_df, vlines=rules_df["begin"])

### By primary type

In [None]:
# All releases, includes reprints - Double check
# Sort properly
release_primary_type = (
    full_df.groupby(["Primary type", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
)
release_primary_type.groupby(release_primary_type.index.strftime("%Y")).sum().T

In [None]:
primary_type_colors = [plot.colors_dict[col] for col in release_primary_type.columns]
plot.rate(
    release_primary_type,
    colors=primary_type_colors,
    bg=anime_df,
    vlines=rules_df["begin"],
)

### By secondary type

In [None]:
# All releases, includes reprints - Double check
# Sort properly
release_secondary_type = (
    full_df.explode("Secondary type")
    .groupby(["Secondary type", "Release"])["Name"]
    .nunique()
    .unstack(0)
    .sort_index()
    .fillna(0)
    .astype(int)
)
release_secondary_type.groupby(release_secondary_type.index.strftime("%Y")).sum().T

In [None]:
plot.rate(release_secondary_type, bg=anime_df, vlines=rules_df["begin"])

### By attribute

In [None]:
# All releases, includes reprints - Double check
# Sort properly
release_attribute = full_df.groupby(["Attribute", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
release_attribute.groupby(release_attribute.index.strftime("%Y")).sum().T

In [None]:
attribute_colors = [plot.colors_dict[col] for col in release_attribute.columns]
plot.rate(
    release_attribute,
    colors=attribute_colors,
    bg=anime_df,
    vlines=rules_df["begin"],
    cumsum=True,
)

### By monster type

In [None]:
# All releases, includes reprints - Double check
# Sort properly
release_monster_type = (
    full_df.groupby(["Monster type", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
)
release_monster_type.groupby(release_monster_type.index.strftime("%Y")).sum().T

In [None]:
plot.rate_subplots(release_monster_type, bg=anime_df, vlines=rules_df["begin"])

### By Level/Rank

In [None]:
plot.box(full_df[["Release", "Level/Rank"]], color=plot.colors_dict["Level"], notch=True)

### By ATK

In [None]:
plot.box(full_df[["Release", "ATK"]], color=plot.colors_dict["Effect Monster"], notch=True)

### By DEF

In [None]:
plot.box(full_df[["Release", "DEF"]], color=plot.colors_dict["Effect Monster"], notch=True)

### By pendulum scale

In [None]:
plot.box(
    full_df[["Release", "Pendulum Scale"]],
    color=plot.colors_dict["Pendulum Monster"],
    notch=True,
)

### By link

In [None]:
plot.box(full_df[["Release", "Link"]], color=plot.colors_dict["Link Monster"])

# Debug

## Merge failed

In [None]:
all_cards_df.where(~all_cards_df["Name"].isin(full_df["Name"])).dropna(how="all")

In [None]:
set_lists_df.where(
    (~set_lists_df["Card number"].isin(full_df["Card number"]))
    & (~set_lists_df["Card number"].dropna().str.startswith("RD/"))
).dropna(how="all")

 # Epilogue

In [None]:
benchmark(report="timeline", timestamp=timestamp)

In [None]:
footer()

## HTML export

In [None]:
# May need to sleep for a few seconds after saving
save_notebook()

In [None]:
export_notebook(dirs.NOTEBOOKS.user / "Timeline.ipynb")

## Git

In [None]:
git.commit("*[Tt]imeline*", f"Timeline update - {timestamp.isoformat()}")