In [None]:
# Configure Drive or Jupyter notebook -- only runs when first loaded
if "CONFIG_DONE" not in globals():
    # Need to mount drive and clone repo to access data and functions
    try:
        from google.colab import drive  # type: ignore

        IN_COLAB = True

        # clone repo
        !git clone https://github.com/doctorsmylie/mtg-draft-agent
        %cd mtg-draft-agent

    except ModuleNotFoundError:
        IN_COLAB = False

    # Finish configuration -- also configures notebook outside of Colab
    %run "project_path.ipynb"
else:
    print("Config done already")

In [None]:
import numpy as np
import pandas as pd

# For managing paths
import pathlib

from time import time

from itertools import product

# Start exploring
See the data in 'draft_data...' files.

In [None]:
# Folder containing all the data
print(DATA_FOLDER)

# Expansion code
expansion = "DSK"

In [None]:
draftfilename = "draft_data_public." + expansion + ".PremierDraft.csv.gz"
draft_file = pathlib.Path(DATA_FOLDER, expansion, draftfilename)

draftdata = pd.read_csv(draft_file, compression="gzip", nrows=10000)
num_cols = draftdata.shape[1]

In [None]:
for col in draftdata.columns:
    print(col)
print()

print(draftdata.shape)

Great, it looks like each row has the cards that were available to the player, the card that was picked, and the cards that the player already picked. Let's check out specific rows.

In [None]:
row = 2
for idx in range(num_cols):
    print(draftdata.columns[idx], end=": ")
    print(draftdata.iloc[row, idx])

As expected, we have the card that was picked in `pick`. The columns `pack_card` are booleans indicating if the card was there, and `pool` are the cards chosen in previous rounds. See the following specific example.

In [None]:
cols = [
    "draft_id",
    "pack_number",
    "pick_number",
    "pick",
    # Available cards (only show a couple)
    "pack_card_Infernal Phantom",
    "pack_card_Floodpits Drowner",
    "pack_card_Fear of Being Hunted",
    # Picked cards (only show a couple)
    "pool_Infernal Phantom",
    "pool_Floodpits Drowner",
    "pool_Fear of Being Hunted",
]
rows = [0, 1, 2]

for row in rows:
    for col in cols:
        print(col, end=": ")
        print(draftdata.at[row, col])
    print()

# Get list of cards

Our model will need:
1. List of available options (a list `options`)
2. Card chosen (a list `chosen`)

Our current dataframe has that data but in a 0-1 encoding. We could instead use card indices and an embedding layer. We also need to filter out incomplete drafts.

Let's look at a couple of draft_ids:

In [None]:
# Get unique draft ids
draft_ids = draftdata["draft_id"].unique()
print("Number of rows:", draftdata.shape[0])
print("Number of unique ids:", draft_ids.shape)

In [None]:
# Find which ids are complete
picks_per_id = draftdata.value_counts("draft_id")

with pd.option_context("display.max_rows", None):
    print(picks_per_id)

Since we only loaded about 10,000 rows, we have an incomplete draft. The others, however, seem complete. Let's look at the picks of a single draft_id

In [None]:
id = draft_ids[0]

cols = ["pack_number", "pick_number", "pick"]

draftdata.loc[draftdata["draft_id"] == id, cols]

We'd like a dictionary that assigns indices to card names. First, we need a list of all cards. We'll get them from the `pack_card` and `pool` column names. Any one of the two should have enough data for our dict, but we'll do it twice for a sanity check.

In [None]:
card_names_packs = draftdata.filter(regex="pack_card").columns
card_names_packs = list(card_names_packs)
# Remove annotation (pack_card_)
card_names_packs = [name.replace("pack_card_", "") for name in card_names_packs]

card_names_pool = draftdata.filter(regex="pool").columns
card_names_pool = list(card_names_pool)
# Remove annotation (pool_)
card_names_pool = [name.replace("pool_", "") for name in card_names_pool]

# Check that the results are the same
print("Card lists match?:", card_names_packs == card_names_pool)

Since the list of cards in the `pack_card` and `pool` columns are the same, we can use either. We'll print all cards for reference.

In [None]:
card_names = draftdata.filter(regex="pack_card").columns
card_names = list(card_names)
# Remove annotation (pack_card_)
card_names = [name.replace("pack_card_", "") for name in card_names]

for name in card_names:
    print(name)

And now, writing the dict is a piece of cake.

In [None]:
card_to_idx = {}
idx_to_card = {}

for idx, card in enumerate(card_names):
    card_to_idx[card] = idx
    idx_to_card[idx] = card

In [None]:
card_to_idx

I'll leave this code here, but I'll write a global function.

# Build `chosen` and `options` lists
In fact, I'll build a dict indexed by `draft_id` that contains the lists corresponding to each individual draft.

In [None]:
def count_to_list(row, prefix):
    """
    Turns a vector of counts into a list of card names, each one
    repeated as many times as the vector's entry. We obtain the
    name of the cards by extracting the names of the columns with
    non-zero value and removing the given column prefix from it.
    """
    # Filter only columns with the input prefix and tranpose
    df_prefix = row.filter(regex=prefix)
    df_prefix = df_prefix.transpose()

    # Get rows whose entry is not 0
    idx_orig = row.index[0]
    column_list = df_prefix[df_prefix[idx_orig] > 0].index

    # Remove prefix and add repetitions
    card_list = []
    for col_name in column_list:
        card_name = col_name.replace(prefix, "")
        repetitions = row.loc[idx_orig, col_name]

        card_list.extend([card_name] * repetitions)

    return card_list

In [None]:
# Draft data
num_packs = 3
num_picks = 14

# Get unique ids
draft_ids = draftdata["draft_id"].unique()

# Get columns with the player's options
prefix_pack = "pack_card_"
pack_columns = draftdata.filter(regex=prefix_pack).columns
pack_columns = list(pack_columns)

# Get columns with the player's pool of cards
prefix_pool = "pool_"
pool_columns = draftdata.filter(regex=prefix_pool).columns
pool_columns = list(pool_columns)

# Get only card names
card_names = [name.replace(prefix_pack, "") for name in pack_columns]

# Compile data for each draft_id
drafts = {}
for i, id in enumerate(draft_ids):
    time_start = time()

    # Get draft info for id
    data_id = draftdata.loc[draftdata["draft_id"] == id, :]

    # Check that we have the right amount of data
    num_rows = data_id.shape[0]
    if num_rows != num_packs * num_picks:
        print(f"{i+1}/{len(draft_ids)}", end=": ")
        print(
            f"Draft incomplete. Only {num_rows} out of {num_packs*num_picks} rows. Skipping id {id}."
        )
        continue

    # Build iterators to extract information in turn order
    draft_turns = product(range(num_packs), range(num_picks))

    chosen = []
    options = []
    pool = []
    for pack_idx, pick_idx in draft_turns:
        # Get row for the turn by filtering pack number, pick number, and draft id
        df_turn = draftdata[
            (draftdata["draft_id"] == id)
            & (draftdata["pack_number"] == pack_idx)
            & (draftdata["pick_number"] == pick_idx)
        ]

        # Get pick, cards in pack, and cards in pool
        df_index = df_turn.index[0]
        pick = df_turn.at[df_index, "pick"]
        cards_in_pack = count_to_list(df_turn, prefix_pack)
        cards_in_pool = count_to_list(df_turn, prefix_pool)

        # Store results as indices
        chosen.append(card_to_idx[pick])
        options.append([card_to_idx[card] for card in cards_in_pack])
        pool.append([card_to_idx[card] for card in cards_in_pool])

    # Store results for the id
    drafts[id] = (chosen, options, pool)

    time_end = time()
    dt = time_end - time_start
    print(f"{i+1}/{len(draft_ids)}: {np.round(dt,3)}")

## Sanity check
Verify that list of chosen cards from turn `0` to turn `i` is the same as the pool in turn `i+1`.

In [None]:
all_equal = True
for id in draft_ids:
    if id in drafts.keys():
        chosen, options, pool = drafts[id]
    else:
        continue

    # Create a new copy of the pool with the chosen cards
    pool_2 = [[]]
    for card in chosen:
        # The pool in the next turn equals the current pool
        # plus the card added this turn
        pool_curr = pool_2[-1]
        pool_next = pool_curr + [card]

        # Sort to avoid order mismatches
        pool_2.append(sorted(pool_next))

    # The last entry is the final deck, not a pool, so we remove it
    _ = pool_2.pop(-1)

    # Compare pools
    pool_sort = [sorted(P) for P in pool]

    # Display any error
    if pool_2 != pool_sort:
        print(" ---------------------- ")
        print("The pools are different")
        print(" ---------------------- ")
        print("id:", id)
        print()

        print("Original pool:")
        print(pool_sort)
        print()

        print("Reconstructed pool:")
        print(pool_2)
        print()

        all_equal = False

if all_equal:
    print("No errors!")

## Examples of output

In [None]:
id = draft_ids[0]
chosen, options, pool = drafts[id]

print("Number of chosen cards:", len(chosen))
print("Chosen cards:")
print(chosen)

In [None]:
# Displaying the player's first 5 picks
for idc in range(5):
    print(f"Turn {idc}")
    print("Chosen: ", chosen[idc])
    print("Options:", options[idc])
    print("Pool:   ", pool[idc])
    print()

In [None]:
# Display names of chosen cards using idx_to_card
for idc in chosen:
    print(idx_to_card[idc])

In [None]:
# Getting a card's index using card_to_idx
card_to_idx["Enter the Enigma"]

# Now test the functions in the module

In [None]:
import functions.card_io as card_io

In [None]:
# Get unique draft ids
draft_ids = draftdata["draft_id"].unique()

# Get card names and card-index dictionaries
card_names, card_to_idx, idx_to_card = card_io.get_cards_from_draft_df(draftdata)

# Get draft history
drafts_2, draft_ids_2 = card_io.get_played_drafts(draftdata, card_to_idx)

In [None]:
# Compare with previous results
# NOTE: We removed the ids of incomplete drafts from draft_ids_2
#       In particular, draft_ids_2 is a subset of draft_ids
print("Same draft ids?:", all(draft_ids[:-1] == draft_ids_2))

drafts_equal = True
for id in draft_ids_2:
    if drafts[id] != drafts_2[id]:
        print(f"Not equal at id={id}")
        drafts_equal = False

print("Same drafts?:", drafts_equal)