In [None]:
from yugiquery import *

init_notebook_mode(all_interactive=True)

header("Timeline")

---

Table of Contents <a class="jp-toc-ignore"></a>
=================
* [1 Data preparation](#data-preparation)
  * [1.1 Load data](#load-data)
  * [1.2 Format data](#format-data)
  * [1.3 Merge data](#merge-data)
* [2 Data visualization](#data-visualization)
  * [2.1 First releases](#first-releases)
    * [2.1.1 By region](#by-region)
  * [2.2 Last releases](#last-releases)
    * [2.2.1 By region](#by-region)
  * [2.3 All Releases](#all-releases)
    * [2.3.1 By card type](#by-card-type)
    * [2.3.2 By primary type](#by-primary-type)
    * [2.3.3 By secondary type](#by-secondary-type)
    * [2.3.4 By attribute](#by-attribute)
    * [2.3.5 By monster type](#by-monster-type)
    * [2.3.6 By Level/Rank](#by-level/rank)
    * [2.3.7 By ATK](#by-atk)
    * [2.3.8 By DEF](#by-def)
    * [2.3.9 By pendulum scale](#by-pendulum-scale)
    * [2.3.10 By link](#by-link)
* [3 Debug](#debug)
  * [3.1 Merge failed](#merge-failed)
  * [3.2 HTML export](#html-export)
  <!-- * [3.3 Git](#git) -->

# Data preparation

In [None]:
timestamp = arrow.utcnow()

## Load data

In [None]:
# Load list of important dates
with open(dirs.get_asset("json", "dates.json"), "r") as f:
    dates_json = json.load(f)
    anime_df = pd.DataFrame(dates_json["anime"]["series"]).set_index("title").map(pd.to_datetime, dayfirst=True)
    rules_df = (
        pd.DataFrame(dates_json["rules"]).set_index("title").map(pd.to_datetime, dayfirst=True).iloc[2:]
    )  # Ignore old rules

In [None]:
# Get latest file if exist
all_cards_df, _ = load_latest_data("cards")
all_speed_df, _ = load_latest_data("speed")
set_lists_df, _ = load_latest_data("sets")

## Format data

In [None]:
df_list = [all_cards_df, all_speed_df, set_lists_df]
if all(item is not None for item in df_list):
    for df in df_list:
        df["index"] = df["Name"].str.lower().str.replace("#", "")

else:
    raise SystemExit("Not enough files to proceed. Aborting!")

## Merge data

In [None]:
full_df = pd.concat([all_cards_df, all_speed_df]).drop_duplicates(ignore_index=True)
full_df = full_df.merge(set_lists_df, how="inner", on="index")
full_df = full_df.convert_dtypes()
full_df["Modification date"] = full_df[["Modification date_x", "Modification date_y"]].max(axis=1)
full_df["Name"] = full_df["Name_x"].fillna(full_df["Name_y"])
full_df.drop(
    ["index", "Name_x", "Name_y", "Modification date_x", "Modification date_y"],
    axis=1,
    inplace=True,
)
full_df.rename(columns={"Page URL_x": "Card page URL", "Page URL_y": "Set page URL"}, inplace=True)
full_df = full_df[np.append(full_df.columns[-1:], full_df.columns[:-1])]

# Data visualization

In [None]:
full_df

## First releases

Obs: Only the first release of an individual card name

In [None]:
first_release = full_df[full_df["Release"].notna()].groupby("Name")["Release"].agg("min")
first_release.to_frame(name="First release")

In [None]:
first_release_count = first_release.sort_values().value_counts(sort=False).to_frame(name="All cards")
first_release_count.index.name = "First Release"
_ = plot.rate(first_release_count, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By region

In [None]:
first_release_region = full_df[full_df["Release"].notna()].groupby(["Region", "Name"])["Release"].agg("min")
first_release_region.to_frame(name="First release")

In [None]:
first_release_region_count = (
    first_release_region.sort_values().groupby(["Region"]).value_counts(sort=False).unstack(0).fillna(0).round(0)
)
first_release_region_count.index.name = "Release"
_ = plot.rate(
    first_release_region_count,
    title="First Release",
    bg=anime_df,
    vlines=rules_df["begin"],
    subplots=True,
)
plt.show()

## Last releases

Obs: Only the last release of an individual card name

In [None]:
last_release = full_df[full_df["Release"].notna()].groupby("Name")["Release"].agg("max")
last_release.to_frame(name="Last release")

In [None]:
last_release_count = last_release.sort_values().value_counts(sort=False).to_frame(name="All cards")
last_release_count.index.name = "Last Release"
_ = plot.rate(last_release_count, bg=anime_df, vlines=rules_df["begin"], limit_year=True)
plt.show()

### By region

In [None]:
last_release_region = full_df[full_df["Release"].notna()].groupby(["Region", "Name"])["Release"].agg("max")
last_release_region.to_frame(name="Last release")

In [None]:
last_release_region_count = (
    last_release_region.sort_values().groupby(["Region"]).value_counts(sort=False).unstack(0).fillna(0).round(0)
)
last_release_region_count.index.name = "Release"
_ = plot.rate(last_release_region_count, title="Last Release", bg=anime_df, vlines=rules_df["begin"], subplots=True)
plt.show()

## All Releases

Obs: All releases includes reprints

In [None]:
all_releases = full_df["Release"][full_df["Release"].notna()].value_counts().sort_index().to_frame()
all_releases.index.name = "All releases"
_ = plot.rate(all_releases, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By card type

In [None]:
# All releases, includes reprints - Double check
release_card_type = full_df.groupby(["Card type", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
release_card_type.groupby(release_card_type.index.strftime("%Y")).sum().T

In [None]:
release_card_type_plot = release_card_type[["Monster Card", "Spell Card", "Trap Card", "Skill Card"]]
card_type_colors = [plot.colors_dict[col] for col in release_card_type_plot.columns]
_ = plot.rate(release_card_type_plot, colors=card_type_colors, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By primary type

In [None]:
# All releases, includes reprints - Double check
# Sort properly
release_primary_type = (
    full_df.groupby(["Primary type", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
)
release_primary_type.groupby(release_primary_type.index.strftime("%Y")).sum().T

In [None]:
primary_type_colors = [plot.colors_dict[col] for col in release_primary_type.columns]
_ = plot.rate(
    release_primary_type,
    colors=primary_type_colors,
    bg=anime_df,
    vlines=rules_df["begin"],
)
plt.show()

### By secondary type

In [None]:
# All releases, includes reprints - Double check
# Sort properly
release_secondary_type = (
    full_df.explode("Secondary type")
    .groupby(["Secondary type", "Release"])["Name"]
    .nunique()
    .unstack(0)
    .sort_index()
    .fillna(0)
    .astype(int)
)
release_secondary_type.groupby(release_secondary_type.index.strftime("%Y")).sum().T

In [None]:
_ = plot.rate(release_secondary_type, bg=anime_df, vlines=rules_df["begin"])
plt.show()

### By attribute

In [None]:
# All releases, includes reprints - Double check
# Sort properly
release_attribute = full_df.groupby(["Attribute", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
release_attribute.groupby(release_attribute.index.strftime("%Y")).sum().T

In [None]:
attribute_colors = [plot.colors_dict[col] for col in release_attribute.columns]
_ = plot.rate(
    release_attribute, colors=attribute_colors, bg=anime_df, vlines=rules_df["begin"], cumsum=True, limit_year=True
)
plt.show()

### By monster type

In [None]:
# All releases, includes reprints - Double check
# Sort properly
release_monster_type = (
    full_df.groupby(["Monster type", "Release"])["Name"].nunique().unstack(0).sort_index().fillna(0).astype(int)
)
release_monster_type.groupby(release_monster_type.index.strftime("%Y")).sum().T

In [None]:
_ = plot.rate(release_monster_type, bg=anime_df, vlines=rules_df["begin"], subplots=True)
plt.show()

### By Level/Rank

In [None]:
_ = plot.box(
    full_df[full_df["Card type"] != "Non-game card"][["Release", "Level/Rank"]], color=plot.colors_dict["Level"], notch=True
)
plt.show()

### By ATK

In [None]:
_ = plot.box(
    full_df[full_df["Card type"] != "Non-game card"][["Release", "ATK"]],
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

### By DEF

In [None]:
_ = plot.box(
    full_df[full_df["Card type"] != "Non-game card"][["Release", "DEF"]],
    color=plot.colors_dict["Effect Monster"],
    notch=True,
)
plt.show()

### By pendulum scale

In [None]:
_ = plot.box(
    full_df[full_df["Card type"] != "Non-game card"][["Release", "Pendulum Scale"]],
    color=plot.colors_dict["Pendulum Monster"],
    notch=True,
)
plt.show()

### By link

In [None]:
_ = plot.box(full_df[full_df["Card type"] != "Non-game card"][["Release", "Link"]], color=plot.colors_dict["Link Monster"])
plt.show()

# Debug

## Merge failed

Names missing

In [None]:
all_cards_df.where(~all_cards_df["Name"].isin(full_df["Name"])).dropna(how="all")

Card number missing

In [None]:
set_lists_df.where(
    (~set_lists_df["Card number"].isin(full_df["Card number"]))
    & (~set_lists_df["Card number"].dropna().str.startswith("RD/"))
).dropna(how="all")

 # Epilogue

In [None]:
benchmark(report="timeline", timestamp=timestamp)

In [None]:
footer()

## HTML export

In [None]:
# May need to sleep for a few seconds after saving
save_notebook()

In [None]:
export_notebook(dirs.NOTEBOOKS.user / "Timeline.ipynb")

## Git

In [None]:
git.commit("*[Tt]imeline*", f"Timeline update - {timestamp.isoformat()}")