In [1]:
import pandas as pd
import numpy as np
import json
from collections import Counter, defaultdict
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None

# Data files are formatted in a weird way, this function helps read them in
def read_sts_file(arr, cap=1000):
    df = pd.DataFrame()
    nm = 0
    for str in arr:
        if nm>cap:
            break
        with open(str) as f:
            data = json.load(f)
            df = pd.concat([df, pd.DataFrame(rec['event'] for rec in data)])
            nm+=1
    
    return df

The below cell reads in the data for runs. The first line (reading in `runs.json`) reads only my personal runs. I'm using this data set for development, so it doesn't take forever to run everything.

The second line (reading `all_runs.json`) reads in about 422,000 runs from a public data set. This data is available in the Slay the Spire Discord, and the sample I have is not anywhere close to all of the available data.

In [2]:
df_backup = read_sts_file(["runs.json"])

# df_backup = read_sts_file(["all_runs.json"])

In [3]:
df = df_backup.copy()
df.head()

Unnamed: 0,gold_per_floor,floor_reached,playtime,items_purged,score,play_id,local_time,is_ascension_mode,campfire_choices,neow_cost,...,boss_relics,items_purged_floors,is_endless,potions_floor_spawned,killed_by,ascension_level,special_seed,basemod:card_modifiers,relic_stats,daily_mods
0,"[114, 39, 65, 82, 6, 35, 35, 35, 35, 45, 45, 7...",33,960,[Strike_R],555,fdbe49b9-1846-4b0f-aea4-81f669f21767,20240806233804,True,"[{'data': 'Disarm', 'floor': 8, 'key': 'SMITH'...",NONE,...,"[{'not_picked': ['Ectoplasm', 'Runic Dome'], '...",[2],False,"[6, 10, 14, 16, 18, 21, 30]",Automaton,17,,,,
1,"[114, 114, 132, 57, 77, 106, 106]",7,213,[],94,31fb9095-70e9-46cd-9069-7d6ae4c20237,20240812111502,True,[],TEN_PERCENT_HP_LOSS,...,[],[],False,"[1, 3, 5]",Gremlin Gang,17,,,,
2,"[116, 216, 11, 22, 40, 68, 68, 97, 97, 116, 116]",11,227,[],154,98eb822b-53a7-481c-9e92-367e16e75499,20240813232656,True,"[{'data': 'Evolve', 'floor': 7, 'key': 'SMITH'}]",NONE,...,[],[],False,"[1, 8, 10]",3 Louse,17,0.0,,,
3,"[112, 112, 112, 124, 136, 176, 188, 200, 212, ...",16,393,[],250,9f0d32dc-eaad-43c4-b8a7-e934c6da2614,20240814165737,True,"[{'data': 'Bash', 'floor': 7, 'key': 'SMITH'},...",NONE,...,[],[],False,"[1, 6, 12, 13, 14]",Hexaghost,17,,,,
4,"[362, 381, 391, 316, 316, 316]",6,112,[],67,a9240145-8e3c-4033-9b58-f2ccd4cec560,20240815232946,True,[],TEN_PERCENT_HP_LOSS,...,[],[],False,[],Gremlin Nob,17,,,,


I only want to use runs that meet a certain standard of achievement. On Ascension (the difficulty setting in Slay the Spire) 17 or 18, I want to only include wins. On Ascension 19 or 20, I want to include all runs that reached Floor 35 or above, which is the start of Act 3 of the game.

In [4]:
"""df = df[(df.is_endless == False) & (df.floor_reached <= 56) &
        (((df.victory == True) & (df.ascension_level >= 17)) |
         ((df.floor_reached >= 35) & (df.ascension_level >= 19)))]
df.shape[0]"""

'df = df[(df.is_endless == False) & (df.floor_reached <= 56) &\n        (((df.victory == True) & (df.ascension_level >= 17)) |\n         ((df.floor_reached >= 35) & (df.ascension_level >= 19)))]\ndf.shape[0]'

When using my personal dataset, this only includes ~25 runs. There are a couple thousand when we run it with the whole dataset, though.

This data is structured in a way that will require a lot of preprocessing. My goal is to eventually have a set of data for each floor of a run, and to compare the user's current deck to every other deck at that floor to see what card choices they would have made.

In [5]:
df.columns

Index(['gold_per_floor', 'floor_reached', 'playtime', 'items_purged', 'score',
       'play_id', 'local_time', 'is_ascension_mode', 'campfire_choices',
       'neow_cost', 'seed_source_timestamp', 'circlet_count', 'master_deck',
       'relics', 'potions_floor_usage', 'damage_taken', 'seed_played',
       'potions_obtained', 'is_trial', 'path_per_floor', 'character_chosen',
       'items_purchased', 'campfire_rested', 'item_purchase_floors',
       'current_hp_per_floor', 'gold', 'neow_bonus', 'is_prod', 'is_daily',
       'chose_seed', 'campfire_upgraded', 'win_rate', 'timestamp',
       'path_taken', 'build_version', 'purchased_purges', 'victory',
       'max_hp_per_floor', 'card_choices', 'player_experience',
       'relics_obtained', 'event_choices', 'is_beta', 'boss_relics',
       'items_purged_floors', 'is_endless', 'potions_floor_spawned',
       'killed_by', 'ascension_level', 'special_seed',
       'basemod:card_modifiers', 'relic_stats', 'daily_mods'],
      dtype='object')

Before we cut down on our columns, we need a list of all available cards, so we can filter for them later:

In [6]:
CARDS_LIST = []
for row in df["master_deck"]:
    for card in row:
        card = card.split("+")[0]
        if card not in CARDS_LIST:
            CARDS_LIST.append(card)

There are a whole lot of unimportant columns in this data set. I'll only be using the ones that relate to the cards in the deck.


In [7]:
cols = ["play_id", "floor_reached", "character_chosen", "ascension_level",
        "items_purged", "items_purged_floors", "campfire_choices", "master_deck",
        "items_purchased", "item_purchase_floors", "card_choices", "event_choices"]
df = df[cols]
df.head()

Unnamed: 0,play_id,floor_reached,character_chosen,ascension_level,items_purged,items_purged_floors,campfire_choices,master_deck,items_purchased,item_purchase_floors,card_choices,event_choices
0,fdbe49b9-1846-4b0f-aea4-81f669f21767,33,IRONCLAD,17,[Strike_R],[2],"[{'data': 'Disarm', 'floor': 8, 'key': 'SMITH'...","[AscendersBane, Strike_R, Strike_R, Defend_R, ...","[Feel No Pain, PowerPotion, Dark Embrace, Seei...","[5, 13, 31, 31, 31]","[{'not_picked': ['True Grit', 'Flame Barrier']...","[{'damage_healed': 0, 'gold_gain': 0, 'cards_t..."
1,31fb9095-70e9-46cd-9069-7d6ae4c20237,7,IRONCLAD,17,[],[],[],"[AscendersBane, Strike_R, Strike_R, Defend_R, ...",[],[],"[{'not_picked': ['Rampage', 'Heavy Blade'], 'p...","[{'damage_healed': 0, 'gold_gain': 0, 'cards_t..."
2,98eb822b-53a7-481c-9e92-367e16e75499,11,IRONCLAD,17,[],[],"[{'data': 'Evolve', 'floor': 7, 'key': 'SMITH'}]","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[Evolve, Shrug It Off, Headbutt, Cleave]","[3, 3, 3, 3]","[{'not_picked': ['Wild Strike', 'Clothesline']...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm..."
3,9f0d32dc-eaad-43c4-b8a7-e934c6da2614,16,IRONCLAD,17,[],[],"[{'data': 'Bash', 'floor': 7, 'key': 'SMITH'},...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Reckless Charge', 'Perfected...","[{'damage_healed': 0, 'gold_gain': 0, 'player_..."
4,a9240145-8e3c-4033-9b58-f2ccd4cec560,6,IRONCLAD,17,[],[],[],"[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Iron Wave', 'Warcry'], 'pick...","[{'cards_removed': ['Strike_R'], 'damage_heale..."


These columns are all of the columns that contain any information about the state of our deck, even if it's only a tiny amount. I also have a little bit of metadata about the run, so we can select only runs on high difficulties, only runs that match the character currently being played, etc.

Unfortunately, most of these columns are still not in a particularly useful state. The `master_deck` column gives us the deck at the end of the run, and the deck at the start of the run is always the same, but we'll need to use the information in every other column to be able to reconstruct the deck at any given floor.

We'll likely want to store that in a new column after we construct it.

In [8]:
def get_purge_dict(purges, floors):
    dt = {}
    for purge, floor in zip(purges, floors):
        if floor in dt:
            dt[floor].append(purge)
        else:
            dt[floor] = [purge]
    return dt

def get_purchase_dict(purchases, floors):
    dt = {}
    for purchase, floor in zip(purchases, floors):
        # Make sure the purchase is a card, otherwise ignore it
        if purchase in CARDS_LIST:
            if floor in dt:
                dt[floor].append(purchase)
            else:
                dt[floor] = [purchase]
    return dt

def get_card_picks(choices):
    picks = {}
    for choice in choices:
        floor = int(choice["floor"])
        if floor > 56:
            # Runs should end at floor 56, so we won't count anything past that
            continue
        pick = choice["picked"]
        if pick != "SKIP":
            if floor in picks:
                picks[floor].append(pick)
            else:
                picks[floor] = [pick]
    return picks

def get_event_cards(events):
    adds = {}
    removes = {}
    upgrades = {}
    for event in events:
        floor = event["floor"]
        if "cards_transformed" in event.keys():
            removes[floor] = event["cards_transformed"]
        if "cards_removed" in event.keys():
            removes[floor] = event["cards_removed"]
        if "cards_obtained" in event.keys():
            adds[floor] = event["cards_obtained"]
        if "cards_upgraded" in event.keys():
            upgrades[floor] = event["cards_upgraded"]
    return adds, removes, upgrades

# NEED CAMPFIRES
def get_campfire_cards(fires):
    upgrades = defaultdict(list)
    removes = defaultdict(list)
    for fire in fires:
        if fire["key"] == "SMITH":
            upgrades[fire["floor"]].append(fire["data"])
        elif fire["key"] == "PURGE":
            removes[fire["floor"]].append(fire["data"])
    # CHECK CASE IN WHICH RUN HAS DREAM CATCHER??
    return upgrades, removes

In [9]:
df["purges"] = df[["items_purged", "items_purged_floors"]].apply(lambda x: get_purge_dict(x.items_purged, x.items_purged_floors), axis=1)
df["purges"].head()

0    {2: ['Strike_R']}
1                   {}
2                   {}
3                   {}
4                   {}
Name: purges, dtype: object

In [10]:
df["card_purchases"] = (df[["items_purchased", "item_purchase_floors"]]
                        .apply(lambda x: get_purchase_dict(x.items_purchased, x.item_purchase_floors), axis=1))
df["card_purchases"].head()

0    {5: ['Feel No Pain'], 31: ['Dark Embrace', 'Se...
1                                                   {}
2    {3: ['Evolve', 'Shrug It Off', 'Headbutt', 'Cl...
3                                                   {}
4                                                   {}
Name: card_purchases, dtype: object

In [11]:
df["card_picks"] = df["card_choices"].apply(get_card_picks)
df["card_picks"].head()

0    {1: ['Anger'], 4: ['Hemokinesis'], 6: ['Disarm...
1    {1: ['Pommel Strike'], 3: ['Flame Barrier'], 5...
2    {1: ['Power Through'], 5: ['Thunderclap'], 6: ...
3    {1: ['Dropkick'], 6: ['Battle Trance'], 10: ['...
4    {1: ['Twin Strike'], 2: ['Second Wind'], 3: ['...
Name: card_picks, dtype: object

In [12]:
res_event = df["event_choices"].apply(get_event_cards)
res_event = pd.DataFrame.from_records(res_event, columns = ["event_card_picks", "event_card_removes", "event_card_upgrades"])
df = df.join(res_event)

In [13]:
res_fire = df["campfire_choices"].apply(get_campfire_cards)
res_fire = pd.DataFrame.from_records(res_fire, columns=["fire_upgrades", "fire_removes"])
df = df.join(res_fire)

In [14]:
df.head()

Unnamed: 0,play_id,floor_reached,character_chosen,ascension_level,items_purged,items_purged_floors,campfire_choices,master_deck,items_purchased,item_purchase_floors,card_choices,event_choices,purges,card_purchases,card_picks,event_card_picks,event_card_removes,event_card_upgrades,fire_upgrades,fire_removes
0,fdbe49b9-1846-4b0f-aea4-81f669f21767,33,IRONCLAD,17,[Strike_R],[2],"[{'data': 'Disarm', 'floor': 8, 'key': 'SMITH'...","[AscendersBane, Strike_R, Strike_R, Defend_R, ...","[Feel No Pain, PowerPotion, Dark Embrace, Seei...","[5, 13, 31, 31, 31]","[{'not_picked': ['True Grit', 'Flame Barrier']...","[{'damage_healed': 0, 'gold_gain': 0, 'cards_t...",{2: ['Strike_R']},"{5: ['Feel No Pain'], 31: ['Dark Embrace', 'Se...","{1: ['Anger'], 4: ['Hemokinesis'], 6: ['Disarm...",{7: ['Ghostly Armor']},"{7: ['Strike_R'], 20: ['Strike_R']}",{},"{8: ['Disarm'], 15: ['Bash'], 23: ['Inflame'],...",{}
1,31fb9095-70e9-46cd-9069-7d6ae4c20237,7,IRONCLAD,17,[],[],[],"[AscendersBane, Strike_R, Strike_R, Defend_R, ...",[],[],"[{'not_picked': ['Rampage', 'Heavy Blade'], 'p...","[{'damage_healed': 0, 'gold_gain': 0, 'cards_t...",{},{},"{1: ['Pommel Strike'], 3: ['Flame Barrier'], 5...",{2: ['Limit Break']},"{2: ['Strike_R'], 4: ['Strike_R']}",{},{},{}
2,98eb822b-53a7-481c-9e92-367e16e75499,11,IRONCLAD,17,[],[],"[{'data': 'Evolve', 'floor': 7, 'key': 'SMITH'}]","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[Evolve, Shrug It Off, Headbutt, Cleave]","[3, 3, 3, 3]","[{'not_picked': ['Wild Strike', 'Clothesline']...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm...",{},"{3: ['Evolve', 'Shrug It Off', 'Headbutt', 'Cl...","{1: ['Power Through'], 5: ['Thunderclap'], 6: ...",{},{},{},{7: ['Evolve']},{}
3,9f0d32dc-eaad-43c4-b8a7-e934c6da2614,16,IRONCLAD,17,[],[],"[{'data': 'Bash', 'floor': 7, 'key': 'SMITH'},...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Reckless Charge', 'Perfected...","[{'damage_healed': 0, 'gold_gain': 0, 'player_...",{},{},"{1: ['Dropkick'], 6: ['Battle Trance'], 10: ['...",{4: ['Clash']},"{4: ['Strike_R'], 8: ['Defend_R']}",{},"{7: ['Bash'], 15: ['Armaments']}",{}
4,a9240145-8e3c-4033-9b58-f2ccd4cec560,6,IRONCLAD,17,[],[],[],"[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Iron Wave', 'Warcry'], 'pick...","[{'cards_removed': ['Strike_R'], 'damage_heale...",{},{},"{1: ['Twin Strike'], 2: ['Second Wind'], 3: ['...",{},{4: ['Strike_R']},"{5: ['Defend_R', 'Defend_R']}",{},{}


Now, we have columns that represent (almost) every change to the player's deck. The only exception, as far as I can tell, are relics that change the deck like War Paint, Whetstone, Tiny House, Astrolabe, Empty Cage (maybe), Dreamcatcher, etc.

With this information, we should be able to reconstruct what the deck was at each floor of the run. Since this is our goal, we need to know what their deck was at the start of the run. Fortunately, the deck for each character is always the same at the start.

In [15]:
def get_char_starting_deck(character, ascension):
    # Gets the deck that a character starts with on floor 0
    deck = []
    if ascension >= 10:
        deck.append("AscendersBane")
    if character == "IRONCLAD":
        deck.extend(["Strike_R"] * 5 + ["Defend_R"] * 4 + ["Bash"])
    elif character == "THE_SILENT":
        deck.extend(["Strike_G", "Defend_G"] * 5 + ["Neutralize", "Survivor"])
    elif character == "DEFECT":
        deck.extend(["Strike_B","Defend_B"] * 4 + ["Zap", "Dualcast"])
        deck.extend(["Zap", "Dualcast"])
    elif character == "WATCHER":
        deck.extend(["Strike_P", "Defend_P"] * 4 + ["Eruption", "Vigilance"])

    return deck

In [16]:
get_char_starting_deck("IRONCLAD", 20)

['AscendersBane',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Defend_R',
 'Defend_R',
 'Defend_R',
 'Defend_R',
 'Bash']

Now we can implement code to turn a run's columns and their starting deck into a list of different deck states throughout the run.

For my convenience, though, I'm going to combine all of our deck information columns into three: card adds, card removes, and card upgrades.

In [17]:
def all_adds(picks, buys, events):
    dt = defaultdict(list)
    for dct in [picks, buys, events]:
        for floor, cards in dct.items():
            dt[floor].extend(cards)
    return dt

def all_removes(shops, events, fires):
    dt = defaultdict(list)
    for dct in [shops, events, fires]:
        for floor, cards in dct.items():
            dt[floor].extend(cards)
    return dt

def all_upgrades(events, fires):
    dt = defaultdict(list)
    for dct in [events, fires]:
        for floor, cards in dct.items():
            dt[floor].extend(cards)
    return dt

In [18]:
new_df = df[["play_id", "floor_reached", "character_chosen", "ascension_level", "master_deck"]]
new_df["adds"] = df.apply(lambda x: all_adds(x.card_picks, x.card_purchases, x.event_card_picks), axis=1)
new_df["removes"] = df.apply(lambda x: all_removes(x.purges, x.event_card_removes, x.fire_removes), axis=1)
new_df["upgrades"] = df.apply(lambda x: all_upgrades(x.event_card_upgrades, x.fire_upgrades), axis=1)
new_df.head()

Unnamed: 0,play_id,floor_reached,character_chosen,ascension_level,master_deck,adds,removes,upgrades
0,fdbe49b9-1846-4b0f-aea4-81f669f21767,33,IRONCLAD,17,"[AscendersBane, Strike_R, Strike_R, Defend_R, ...","{1: ['Anger'], 4: ['Hemokinesis'], 6: ['Disarm...","{2: ['Strike_R'], 7: ['Strike_R'], 20: ['Strik...","{8: ['Disarm'], 15: ['Bash'], 23: ['Inflame'],..."
1,31fb9095-70e9-46cd-9069-7d6ae4c20237,7,IRONCLAD,17,"[AscendersBane, Strike_R, Strike_R, Defend_R, ...","{1: ['Pommel Strike'], 3: ['Flame Barrier'], 5...","{2: ['Strike_R'], 4: ['Strike_R']}",{}
2,98eb822b-53a7-481c-9e92-367e16e75499,11,IRONCLAD,17,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","{1: ['Power Through'], 5: ['Thunderclap'], 6: ...",{},{7: ['Evolve']}
3,9f0d32dc-eaad-43c4-b8a7-e934c6da2614,16,IRONCLAD,17,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","{1: ['Dropkick'], 6: ['Battle Trance'], 10: ['...","{4: ['Strike_R'], 8: ['Defend_R']}","{7: ['Bash'], 15: ['Armaments']}"
4,a9240145-8e3c-4033-9b58-f2ccd4cec560,6,IRONCLAD,17,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","{1: ['Twin Strike'], 2: ['Second Wind'], 3: ['...",{4: ['Strike_R']},"{5: ['Defend_R', 'Defend_R']}"


In [19]:
def get_deck_at_floors(play_id):
    data = new_df[new_df.play_id == play_id]
    cur_deck = get_char_starting_deck(data.character_chosen.item(), data.ascension_level.item())
    deck_states = pd.Series(index=np.arange(0, 57))
    deck_states[data.floor_reached.item()] = data.master_deck.item()
    
    for floor in range(0, data.floor_reached.item()):
        adds = data.adds.item()[floor]
        removes = data.removes.item()[floor]
        upgrades = data.upgrades.item()[floor]
        
        cur_deck.extend(adds)
        for card in removes:
            try:
                cur_deck.remove(card)
            except ValueError:
                pass
        for card in upgrades:
            try:
                cur_deck.remove(card)
            except ValueError:
                pass
            if "Searing Blow" in card:
                # this is the only case where an upgrade makes a card anything other than "card+1"
                upg_level = card.split("+")[1]
                cur_deck.append(f"Searing Blow+{upg_level+1}")
            else:
                cur_deck.append(card+"+1")
        
        deck_states[floor] = cur_deck.copy()
    
    return deck_states

In [20]:
get_deck_at_floors("fdbe49b9-1846-4b0f-aea4-81f669f21767")
0

0

In [21]:
df_deck_state = pd.DataFrame(index=new_df["play_id"].copy(),
                             columns=[f"floor_{floor}" for floor in range(0, 57)])

# This probably isn't the most efficient way, but other ways weren't working
for play_id in df_deck_state.index:
    state = get_deck_at_floors(play_id)
    df_deck_state.loc[play_id][state.index] = state.values
df_deck_state.head()

Unnamed: 0_level_0,floor_0,floor_1,floor_2,floor_3,floor_4,floor_5,floor_6,floor_7,floor_8,floor_9,...,floor_47,floor_48,floor_49,floor_50,floor_51,floor_52,floor_53,floor_54,floor_55,floor_56
play_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
fdbe49b9-1846-4b0f-aea4-81f669f21767,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",...,,,,,,,,,,
31fb9095-70e9-46cd-9069-7d6ae4c20237,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Defend_R, ...",,,...,,,,,,,,,,
98eb822b-53a7-481c-9e92-367e16e75499,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",...,,,,,,,,,,
9f0d32dc-eaad-43c4-b8a7-e934c6da2614,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",...,,,,,,,,,,
a9240145-8e3c-4033-9b58-f2ccd4cec560,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",,,,...,,,,,,,,,,


Now we have a data frame that contains each run's deck state at any given floor. We'll save it to a CSV so we can load it later for our model:

In [22]:
df_deck_state.to_csv("deck_states.csv")

I also want to implement a model that uses a simple heuristic - at or before the floor the player is on, how often was each card picked vs. how often has it been seen?

In [23]:
cards_seen = {}
cards_picked = {}
for floor in range(0, 57):
    cards_seen[floor] = []
    cards_picked[floor] = []

for row in df["card_choices"]:
    for dt in row:
        seen = dt["not_picked"].copy()
        if dt["picked"] == "SKIP":
            continue
        elif dt["picked"] != "Singing Bowl":
            seen.append(dt["picked"])
            seen = [card.split("+")[0] for card in seen]
            picked = dt["picked"].split("+")[0]
            floor = int(dt["floor"])

        if floor > 56:
            continue
        cards_seen[floor].extend(seen)
        cards_picked[floor].append(picked)

for floor in range(1, 57):
    seen = cards_seen[floor-1].copy()
    seen.extend(cards_seen[floor])
    picked = cards_picked[floor-1].copy()
    picked.extend(cards_picked[floor])

    cards_seen[floor] = seen
    cards_picked[floor] = picked
    
count_seen = {}
count_picked = {}
floor_props = {}
for floor in range(0, 57):
    props = {}
    count_seen[floor] = Counter(cards_seen[floor])
    count_picked[floor] = Counter(cards_picked[floor])
    for card in CARDS_LIST:
        if card in count_picked[floor].keys() and card in count_seen[floor].keys() and count_seen[floor][card] > 0:
            #print(count_picked[floor][card])
            #print(count_seen[floor][card])
            props[card] = min(count_picked[floor][card] / count_seen[floor][card], 1.0)
        else:
            props[card] = 0
    floor_props[floor] = props

In [24]:
df_props = pd.DataFrame.from_dict(floor_props, orient="index")
df_props.index.name = "Floor"
df_props.to_csv("pickrates.csv")
df_props.head()

Unnamed: 0_level_0,AscendersBane,Strike_R,Defend_R,Bash,Anger,Hemokinesis,Feel No Pain,Disarm,Ghostly Armor,Reaper,...,WaveOfTheHand,Ragnarok,DevaForm,WindmillStrike,ForeignInfluence,Wireheading,Sanctity,LikeWater,PathToVictory,Judgement
Floor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0
1,0,0,0,0,0.8,0.0,0.0,0.6,0.0,1.0,...,0.428571,0.0,0.0,0.333333,0,0.142857,0.0,0.0,0.0625,0
2,0,0,0,0,0.692308,0.333333,1.0,0.714286,0.0,1.0,...,0.333333,0.0,0.0,0.285714,0,0.25,0.0,0.166667,0.038462,0
3,0,0,0,0,0.705882,0.333333,0.5,0.454545,0.2,1.0,...,0.375,0.0,0.0,0.222222,0,0.222222,0.111111,0.125,0.034483,0
4,0,0,0,0,0.684211,0.5,0.5,0.5,0.2,1.0,...,0.315789,0.0,0.0,0.181818,0,0.2,0.111111,0.111111,0.030303,0


The last (for now) thing we need is a data frame containing our card choices on each floor. That way, we can evaluate our models based on whether they recommended a card that was actually chosen.

In [25]:
df.head()

Unnamed: 0,play_id,floor_reached,character_chosen,ascension_level,items_purged,items_purged_floors,campfire_choices,master_deck,items_purchased,item_purchase_floors,card_choices,event_choices,purges,card_purchases,card_picks,event_card_picks,event_card_removes,event_card_upgrades,fire_upgrades,fire_removes
0,fdbe49b9-1846-4b0f-aea4-81f669f21767,33,IRONCLAD,17,[Strike_R],[2],"[{'data': 'Disarm', 'floor': 8, 'key': 'SMITH'...","[AscendersBane, Strike_R, Strike_R, Defend_R, ...","[Feel No Pain, PowerPotion, Dark Embrace, Seei...","[5, 13, 31, 31, 31]","[{'not_picked': ['True Grit', 'Flame Barrier']...","[{'damage_healed': 0, 'gold_gain': 0, 'cards_t...",{2: ['Strike_R']},"{5: ['Feel No Pain'], 31: ['Dark Embrace', 'Se...","{1: ['Anger'], 4: ['Hemokinesis'], 6: ['Disarm...",{7: ['Ghostly Armor']},"{7: ['Strike_R'], 20: ['Strike_R']}",{},"{8: ['Disarm'], 15: ['Bash'], 23: ['Inflame'],...",{}
1,31fb9095-70e9-46cd-9069-7d6ae4c20237,7,IRONCLAD,17,[],[],[],"[AscendersBane, Strike_R, Strike_R, Defend_R, ...",[],[],"[{'not_picked': ['Rampage', 'Heavy Blade'], 'p...","[{'damage_healed': 0, 'gold_gain': 0, 'cards_t...",{},{},"{1: ['Pommel Strike'], 3: ['Flame Barrier'], 5...",{2: ['Limit Break']},"{2: ['Strike_R'], 4: ['Strike_R']}",{},{},{}
2,98eb822b-53a7-481c-9e92-367e16e75499,11,IRONCLAD,17,[],[],"[{'data': 'Evolve', 'floor': 7, 'key': 'SMITH'}]","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[Evolve, Shrug It Off, Headbutt, Cleave]","[3, 3, 3, 3]","[{'not_picked': ['Wild Strike', 'Clothesline']...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm...",{},"{3: ['Evolve', 'Shrug It Off', 'Headbutt', 'Cl...","{1: ['Power Through'], 5: ['Thunderclap'], 6: ...",{},{},{},{7: ['Evolve']},{}
3,9f0d32dc-eaad-43c4-b8a7-e934c6da2614,16,IRONCLAD,17,[],[],"[{'data': 'Bash', 'floor': 7, 'key': 'SMITH'},...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Reckless Charge', 'Perfected...","[{'damage_healed': 0, 'gold_gain': 0, 'player_...",{},{},"{1: ['Dropkick'], 6: ['Battle Trance'], 10: ['...",{4: ['Clash']},"{4: ['Strike_R'], 8: ['Defend_R']}",{},"{7: ['Bash'], 15: ['Armaments']}",{}
4,a9240145-8e3c-4033-9b58-f2ccd4cec560,6,IRONCLAD,17,[],[],[],"[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Iron Wave', 'Warcry'], 'pick...","[{'cards_removed': ['Strike_R'], 'damage_heale...",{},{},"{1: ['Twin Strike'], 2: ['Second Wind'], 3: ['...",{},{4: ['Strike_R']},"{5: ['Defend_R', 'Defend_R']}",{},{}


We're only going to use card choices from 3-card rewards, since those are the only choices we're programming our recommender to make at the moment.

In [42]:
df_choices = df[["play_id", "card_choices"]]
df_choices.head()

Unnamed: 0,play_id,card_choices
0,fdbe49b9-1846-4b0f-aea4-81f669f21767,"[{'not_picked': ['True Grit', 'Flame Barrier']..."
1,31fb9095-70e9-46cd-9069-7d6ae4c20237,"[{'not_picked': ['Rampage', 'Heavy Blade'], 'p..."
2,98eb822b-53a7-481c-9e92-367e16e75499,"[{'not_picked': ['Wild Strike', 'Clothesline']..."
3,9f0d32dc-eaad-43c4-b8a7-e934c6da2614,"[{'not_picked': ['Reckless Charge', 'Perfected..."
4,a9240145-8e3c-4033-9b58-f2ccd4cec560,"[{'not_picked': ['Iron Wave', 'Warcry'], 'pick..."


In [43]:
def reformat_choices(choices):
    new_choices = {}
    offered = {}
    for choice in choices:
        floor = choice['floor']
        seen = choice['not_picked'].copy()
        picked = choice['picked']
        if picked == "SKIP" or picked == "Singing Bowl":
            # I don't care about skips, for now at least
            # Singing Bowl is a skip alternative, we don't care about those either
            continue
        seen.append(picked)
        offered[floor] = seen
        new_choices[floor] = picked
    
    return new_choices, offered

reformat_choices(df_choices.loc[0, "card_choices"])

res = df_choices["card_choices"].apply(reformat_choices)
res = pd.DataFrame([[a, b] for a, b in res.values], columns=["choices", "seen"])
df_choices = df_choices.join(res).drop("card_choices", axis=1)
df_choices.head()

Unnamed: 0,play_id,choices,seen
0,fdbe49b9-1846-4b0f-aea4-81f669f21767,"{1: 'Anger', 4: 'Hemokinesis', 6: 'Disarm', 10...","{1: ['True Grit', 'Flame Barrier', 'Anger'], 4..."
1,31fb9095-70e9-46cd-9069-7d6ae4c20237,"{1: 'Pommel Strike', 3: 'Flame Barrier', 5: 'T...","{1: ['Rampage', 'Heavy Blade', 'Pommel Strike'..."
2,98eb822b-53a7-481c-9e92-367e16e75499,"{1.0: 'Power Through', 5: 'Thunderclap', 6: 'S...","{1.0: ['Wild Strike', 'Clothesline', 'Power Th..."
3,9f0d32dc-eaad-43c4-b8a7-e934c6da2614,"{1: 'Dropkick', 6: 'Battle Trance', 10: 'Carna...","{1: ['Reckless Charge', 'Perfected Strike', 'D..."
4,a9240145-8e3c-4033-9b58-f2ccd4cec560,"{1: 'Twin Strike', 2: 'Second Wind', 3: 'Pomme...","{1: ['Iron Wave', 'Warcry', 'Twin Strike'], 2:..."


In [44]:
df_choices.to_csv("cards_seen_picked.csv")