In [None]:
from yugiquery import *

init_notebook_mode(all_interactive=True)

header("My Decks")

---

Table of Contents
=================

*   [1  Data loading](#Data-loading)
    *   [1.1  Read collection](#Read-collection)
*   [2  Check changes](#Check-changes)
    *   [2.1  Load previous data](#Load-previous-data)
    *   [2.2  Generate changelogs](#Generate-changelogs)
    *   [2.3  Save data](#Save-data)
*   [3  Data visualization](#Data-visualization)
    *   [3.1  Full data](#Full-data)
    *   [3.2  Card types](#Card-types)
    *   [3.3  Monsters](#Monsters)
        *   [3.3.1  Attributes](#Attributes)
        *   [3.3.2  Primary types](#Primary-types)
            *   [3.3.2.1  Has effect discrimination](#Has-effect-discrimination)
            *   [3.3.2.2  Is pendulum discrimination](#Is-pendulum-discrimination)
            *   [3.3.2.3  By attribute](#By-attribute)
        *   [3.3.3  Secondary types](#Secondary-types)
            *   [3.3.3.1  By attribute](#By-attribute)
            *   [3.3.3.2  By secondary type](#By-secondary-type)
        *   [3.3.4  Monster types](#Monster-types)
            *   [3.3.4.1  By Attribute](#By-Attribute)
            *   [3.3.4.2  By primary type](#By-primary-type)
            *   [3.3.4.3  By secondary type](#By-secondary-type)
        *   [3.3.5  ATK](#ATK)
        *   [3.3.6  DEF](#DEF)
        *   [3.3.7  Level/Rank](#Level/Rank)
            *   [3.3.7.1  ATK statistics](#ATK-statistics)
            *   [3.3.7.2  DEF statistics](#DEF-statistics)
        *   [3.3.8  Pendulum scale](#Pendulum-scale)
            *   [3.3.8.1  ATK statistics](#ATK-statistics)
            *   [3.3.8.2  DEF statistics](#DEF-statistics)
            *   [3.3.8.3  Level/Rank statistics](#Level/Rank-statistics)
        *   [3.3.9  Link](#Link)
            *   [3.3.9.1  ATK statistics](#ATK-statistics)
        *   [3.3.10  Link Arrows](#Link-Arrows)
            *   [3.3.10.1  By combination](#By-combination)
            *   [3.3.10.2  By unique](#By-unique)
            *   [3.3.10.3  By link](#By-link)
    *   [3.4  Spell & Trap](#Spell-&-Trap)
        *   [3.4.1  Properties](#Properties)
    *   [3.5  Effect type](#Effect-type)
        *   [3.5.1  Card type discrimination](#Card-type-discrimination)
    *   [3.6  Archseries](#Archseries)
        *   [3.6.1  By card type](#By-card-type)
        *   [3.6.2  By primary type](#By-primary-type)
        *   [3.6.3  By secondary type](#By-secondary-type)
        *   [3.6.4  By monster type](#By-monster-type)
        *   [3.6.5  By property](#By-property)
    *   [3.7  Artworks](#Artworks)
        *   [3.7.1  By card type](#By-card-type)
        *   [3.7.2  By primary type](#By-primary-type)
    *   [3.8  Errata](#Errata)
        *   [3.8.1  By card type](#By-card-type)
        *   [3.8.2  By primary type](#By-primary-type)
        *   [3.8.3  By artwork](#By-artwork)
    *   [3.9  TCG & OCG status](#TCG-&-OCG-status)
        *   [3.9.1  TGC status](#TGC-status)
            *   [3.9.1.1  By card type](#By-card-type)
            *   [3.9.1.2  By monster type](#By-monster-type)
            *   [3.9.1.3  By archseries](#By-archseries)
        *   [3.9.2  OCG status](#OCG-status)
            *   [3.9.2.1  By card type](#By-card-type)
            *   [3.9.2.2  By monster type](#By-monster-type)
            *   [3.9.2.3  By archseries](#By-archseries)
        *   [3.9.3  TCG vs. OCG status](#TCG-vs.-OCG-status)
*   [4  Epilogue](#Epilogue)
    *   [4.1  HTML export](#HTML-export)
<!-- *   [4.2  Git](#Git) -->

# Data loading

In [None]:
import requests
from typing import List, Dict, Any


# yugiquery/helpers module
def read_ydk(file_path: Path | str) -> pd.DataFrame:
    """
    Read a YDK file and return a DataFrame with the card codes.

    Args:
        file_path (Path | str): Path to the YDK file.

    Returns:
        (pd.DataFrame): DataFrame with the card codes.

    """
    with open(file_path, "r") as file:
        lines = file.readlines()

    data = []
    current_section = None

    for line in lines:
        line = line.strip()
        if not line:  # Skip empty lines
            continue
        if line in ["#main", "#extra", "!side"]:
            current_section = line[1:].capitalize()
        elif current_section:
            data.append({"Code": line, "Section": current_section, "Deck": file_path.stem})

    df = pd.DataFrame(data)
    return df


# api module
def fetch_ygoprodeck() -> List[Dict[str, Any]]:
    """
    Fetch the card data from ygoprodeck.com.

    Returns:
        (List[Dict[str, Any]]): List of card data.
    """
    response = requests.get("https://db.ygoprodeck.com/api/v7/cardinfo.php")
    response.raise_for_status()
    result = response.json()
    return result["data"]


# yugiquery module
def convert_ydk(ydk_df: pd.DataFrame) -> pd.DataFrame | None:
    """
    Convert a DataFrame with YDK card codes to a DataFrame with card names.

    Args:
        ydk_df (pd.DataFrame): DataFrame with YDK card codes.

    Returns:
        (pd.DataFrame | None): DataFrame with card names. If unable to obtain the card data, returns None.
    """
    ygoprodeck_file = dirs.DATA.joinpath("ygoprodeck.json")
    try:
        result = fetch_ygoprodeck()
        with open(ygoprodeck_file, "w") as file:
            json.dump(result, file, indent=4)
        ydk_data = pd.DataFrame(result).set_index("id")
    except Exception as e:
        print(e)
        if ygoprodeck_file.is_file():
            ydk_data = pd.read_json(ygoprodeck_file).set_index("id")
        else:
            print("Could not fetch or read ygoprodeck data")
            return None

    ydk_df = ydk_df.copy()

    def get_ydk_card(code) -> float | str:
        code = int(code)
        if code not in ydk_data.index:
            return np.nan
        return ydk_data.loc[code, "name"]

    ydk_df["Name"] = ydk_df["Code"].apply(get_ydk_card)
    not_found = ydk_df[ydk_df["Name"].isna()]["Code"].values
    if len(not_found) > 0:
        print(f"\nUnable to find {len(not_found)} cards:\n ⏺", "\n ⏺ ".join(not_found), "\n")
    ydk_df = ydk_df.drop("Code", axis=1).dropna(subset=["Name"]).reset_index(drop=True)
    ydk_df["Count"] = ydk_df.groupby(["Name", "Section", "Deck"])["Name"].transform("count")
    ydk_df = ydk_df.drop_duplicates().reset_index(drop=True)
    return ydk_df


# yugiquery module
# TODO: rename
def get_ydk(*files: Path | str) -> pd.DataFrame | None:
    """
    Load YDK files and return a DataFrame with the card names.

    Args:
        files (Path | str): Paths to YDK files. If not provided, loads all YDK files in the data directory.

    Returns:
        (pd.DataFrame | None): DataFrame with card names. If unable to obtain the card data, returns None.
    """
    ydk_df = pd.DataFrame()
    if not files:
        files = list(dirs.DATA.glob("*.ydk"))
    for file in files:
        temp_df = read_ydk(file)
        ydk_df = pd.concat([ydk_df, temp_df])
        print(f"Loaded {file.name} deck")

    ydk_df = convert_ydk(ydk_df)
    return ydk_df

In [None]:
# yugiquery/helpers module
def read_decklist(file_path: Path | str) -> pd.DataFrame:
    """
    Read a decklist file and return a DataFrame with the card names.

    Args:
        file_path (Path, str): Path to the decklist file.

    Returns:
        (pd.DataFrame): DataFrame with the card names.
    """
    with open(file_path, "r") as file:
        lines = file.readlines()

    data = []
    current_section = None

    for line in lines:
        line = line.strip()
        if not line:  # Skip empty lines
            continue
        if line.endswith(":"):  # Section header
            current_section = line[:-1].capitalize()
        elif current_section:
            quantity, card_name = line.split("x ", 1)
            quantity = int(quantity)
            data.append({"Name": card_name, "Count": quantity, "Section": current_section, "Deck": file_path.stem})

    df = pd.DataFrame(data)
    return df


# yugiquery module
# TODO: rename
def get_decklists(*files: Path | str) -> pd.DataFrame:
    """
    Load decklist files and return a DataFrame with the card names.

    Args:
        files (Path | str): Paths to the decklist files. If not provided, loads all decklist files in the data directory.

    Returns:
        (pd.DataFrame): DataFrame with card names.
    """
    decklist_df = pd.DataFrame()
    if not files:
        files = list(dirs.DATA.glob("*.txt"))

    for file in files:
        temp_df = read_decklist(file)
        decklist_df = pd.concat([decklist_df, temp_df])
        print(f"Loaded {file.name} deck")

    decklist_df.replace({"Section": {"Monster": "Main", "Spell": "Main", "Trap": "Main"}}, inplace=True)
    return decklist_df

In [None]:
def assign_deck(collection_df: pd.DataFrame, deck_df: pd.DataFrame, return_collection: bool = False) -> pd.DataFrame:
    """
    Merge the collection and deck DataFrames and return a DataFrame with the card names, quantities and deck columns.

    Args:
        collection_df (pd.DataFrame): DataFrame with the collection card names and quantities.
        deck_df (pd.DataFrame): DataFrame with the deck card names and quantities.
        return_collection (bool): If True, returns the remaining collection cards in the result DataFrame.

    Returns:
        (pd.DataFrame): DataFrame with the card names and quantities.
    """
    # Initialize a list to collect result rows
    result_rows = []

    # Iterate over each row in deck_df
    for index, deck_row in deck_df.iterrows():
        card_name = deck_row["Name"]
        deck_count = deck_row["Count"]
        deck_deck = deck_row["Deck"] if "Deck" in deck_row else np.nan

        # Get sub DataFrame from collection_df where Name matches
        collection_sub_df = collection_df[collection_df["Name"] == card_name].copy()

        # If Deck column exists, sort so that rows with exact Deck match are first, np.nan second
        if "Deck" in collection_sub_df.columns:
            collection_sub_df = collection_sub_df[collection_sub_df["Deck"].isin([deck_deck, np.nan])]
            collection_sub_df = collection_sub_df.sort_values(
                by=["Deck"], ascending=[True]
            )  # Sort by Deck (exact match first)

        # Subtract from the first available row(s) in collection_sub_df until deck_count is depleted
        for sub_index, collection_row in collection_sub_df.iterrows():
            if deck_count <= 0:
                break  # Deck count fulfilled

            available_count = collection_row["Count"]
            subtract_count = min(deck_count, available_count)

            # Subtract and update deck_count
            deck_count -= subtract_count
            collection_sub_df.loc[collection_row.name, "Count"] -= subtract_count

            # Add the collection_row to the result rows
            result_row = {
                "Name": card_name,
                "Count": available_count,  # Remaining count after subtraction
                "Deck": deck_deck,
                "missing": np.nan,
            }
            for col in collection_row.index.difference(result_row.keys()):
                result_row[col] = collection_row[col]

            result_rows.append(result_row)

            # If Count reaches 0, drop the row from collection_df
            if collection_sub_df.loc[collection_row.name, "Count"] <= 0:
                collection_sub_df.drop(collection_row.name, inplace=True)

        # After processing all available cards, handle missing counts
        if deck_count > 0:
            result_row = {"Name": card_name, "Count": 0, "Deck": deck_deck, "missing": deck_count}
            # Create a new row for the deck with Count set to 0 and missing indicating what's left
            result_rows.append(result_row)

    # Convert the collected result rows into a DataFrame
    result_df = pd.DataFrame(result_rows)

    # Append any remaining rows from collection_df that were not used in the deck
    if return_collection:
        result_df = pd.concat(
            [result_df, collection_df[~collection_df["Name"].isin(result_df["Name"])]], ignore_index=True
        ).sort_values(by=["Name", "Deck"])

    # Replace 0 values in missing with NaN
    result_df["missing"] = result_df["missing"].fillna(0)

    return result_df

## Read decks

In [None]:
# Timestamp
timestamp = arrow.utcnow()

In [None]:
# Load decks from YDK and decklist files
deck_df = pd.concat([get_ydk(), get_decklists()])

In [None]:
# Process the deck data frame
deck_df = process_collection(deck_df, merge_data=True)

In [None]:
# Get latest file if exist
previous_deck_df, previousdeck_ts = load_corrected_latest("deck")

if previous_deck_df is not None:
    previous_deck_df = previous_deck_df.astype(
        deck_df[previous_deck_df.columns.intersection(deck_df.columns)].dtypes.to_dict()
    )

In [None]:
if previous_deck_df is None:
    deck_changelog = None
    print("Skipped")
else:
    deck_changelog = generate_changelog(previous_deck_df, deck_df, col="Name")
    if not deck_changelog.empty:
        display(deck_changelog)
        deck_changelog.to_csv(
            dirs.DATA
            / make_filename(
                report="deck",
                timestamp=timestamp,
                previous_timestamp=previous_deck_df,
            ),
            index=True,
        )
        print("Changelog saved")

In [None]:
if deck_changelog is not None and deck_changelog.empty:
    print("No changes. New data not saved")
else:
    deck_df.to_csv(
        dirs.DATA / make_filename(report="deck", timestamp=timestamp),
        index=False,
    )
    print("Data saved")

In [None]:
deck_df

In [None]:
# Other

# Merge the collection and deck data frames
combined_df = assign_deck(collection_df, deck_df)