Note that this file will have to be run twice, once on the training set and once on the test set. This is because I don't want to duplicate code too much.

In [1]:
import pandas as pd
import numpy as np
import json
from collections import Counter, defaultdict
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None

# Data files are formatted in a weird way, this function helps read them in
def read_sts_file(arr, cap=1000):
    df = pd.DataFrame()
    nm = 0
    for str in arr:
        if nm>cap:
            break
        with open(str) as f:
            data = json.load(f)
            df = pd.concat([df, pd.DataFrame(rec['event'] for rec in data)])
            nm+=1
    
    return df

The below cell reads in the data for runs. The first line (reading in `runs.json`) reads only my personal runs. I'm using this data set for development, so it doesn't take forever to run everything.

The second line (reading `all_runs.json`) reads in about 422,000 runs from a public data set. This data is available in the Slay the Spire Discord, and the sample I have is not anywhere close to all of the available data.

In [2]:
#df_backup = read_sts_file(["runs.json"])

df_backup = pd.read_json("filtered_runs_full.json")

In [3]:
df = df_backup.copy()
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,gold_per_floor,floor_reached,playtime,items_purged,score,play_id,local_time,is_ascension_mode,campfire_choices,neow_cost,...,boss_relics,items_purged_floors,is_endless,potions_floor_spawned,ascension_level,killed_by,special_seed,basemod:card_modifiers,relic_stats,daily_mods
0,"[114, 114, 39, 56, 68, 101, 101, 113, 113, 113...",52,2378,"[Defend_G, Decay, Strike_G]",1924,f035334e-865f-476e-949b-67829192437d,1970-08-22 19:22:08.135710,True,"[{'data': 'Blade Dance', 'floor': 7.0, 'key': ...",NONE,...,"[{'not_picked': ['Astrolabe', 'Snecko Eye'], '...","[3, 36, 48]",False,"[1, 5, 11, 25, 28, 29, 33, 41, 45]",20,,0.0,,,
1,"[118, 118, 118, 137, 156, 199, 199, 124, 192, ...",51,2833,"[Strike_R, Strike_R]",1452,e370d2cb-3ed0-46b2-998b-c8741675398a,1970-08-22 19:22:08.015755,True,"[{'data': 'Rampage', 'floor': 7.0, 'key': 'SMI...",NONE,...,"[{'not_picked': ['Pandora's Box', 'Philosopher...","[8, 21]",False,"[5, 6, 13, 14, 19, 31, 33, 37, 42, 45]",19,,0.0,,,
2,"[113, 113, 113, 113, 128, 128, 128, 128, 155, ...",51,1309,"[Defend_B, Strike_B]",1270,0ee32f9b-83a7-45e2-9da7-6623897d5521,1970-08-22 19:22:08.025748,True,"[{'data': 'Zap', 'floor': 6.0, 'key': 'SMITH'}...",NONE,...,"[{'not_picked': ['Astrolabe', 'Calling Bell'],...","[11, 19]",False,"[1, 10, 12, 13, 14, 16, 18, 21, 23, 27, 29, 33...",20,Awakened One,0.0,,,
3,"[111, 111, 129, 142, 142, 169, 169, 182, 257, ...",56,3306,"[Strike_B, Strike_B]",2019,b768fd8e-f7e7-427b-af3f-075fbe8812ec,1970-08-22 19:22:08.135841,True,"[{'data': 'Coolheaded', 'floor': 7.0, 'key': '...",NONE,...,"[{'not_picked': ['Empty Cage', 'Runic Pyramid'...","[11, 27]",False,"[1, 8, 10, 20, 21, 22, 33, 38, 45, 46, 54]",17,,0.0,,,
4,"[110, 110, 185, 185, 18, 43, 58, 58, 131, 131,...",56,3899,[Zap],1900,2df5033f-e5f7-4cd7-93b7-3f4b70f5475e,1970-08-22 19:22:08.005859,True,"[{'data': 'Darkness', 'floor': 8, 'key': 'SMIT...",NONE,...,"[{'not_picked': ['Velvet Choker', 'Black Star'...",[30],False,"[11, 12, 13, 16, 20, 21, 22, 28, 33, 35, 37, 55]",20,The Heart,,,,


I only want to use runs that meet a certain standard of achievement. On Ascension (the difficulty setting in Slay the Spire) 17 or 18, I want to only include wins. On Ascension 19 or 20, I want to include all runs that reached Floor 35 or above, which is the start of Act 3 of the game.

In [4]:
"""df = df[(df.is_endless == False) & (df.floor_reached <= 56) &
        (((df.victory == True) & (df.ascension_level >= 17)) |
         ((df.floor_reached >= 35) & (df.ascension_level >= 19)))]
df.shape[0]"""

'df = df[(df.is_endless == False) & (df.floor_reached <= 56) &\n        (((df.victory == True) & (df.ascension_level >= 17)) |\n         ((df.floor_reached >= 35) & (df.ascension_level >= 19)))]\ndf.shape[0]'

When using my personal dataset, this only includes ~25 runs. There are a couple thousand when we run it with the whole dataset, though.

This data is structured in a way that will require a lot of preprocessing. My goal is to eventually have a set of data for each floor of a run, and to compare the user's current deck to every other deck at that floor to see what card choices they would have made.

In [5]:
df.columns

Index(['gold_per_floor', 'floor_reached', 'playtime', 'items_purged', 'score',
       'play_id', 'local_time', 'is_ascension_mode', 'campfire_choices',
       'neow_cost', 'seed_source_timestamp', 'circlet_count', 'master_deck',
       'relics', 'potions_floor_usage', 'damage_taken', 'seed_played',
       'potions_obtained', 'is_trial', 'path_per_floor', 'character_chosen',
       'items_purchased', 'campfire_rested', 'item_purchase_floors',
       'current_hp_per_floor', 'gold', 'neow_bonus', 'is_prod', 'is_daily',
       'chose_seed', 'campfire_upgraded', 'win_rate', 'timestamp',
       'path_taken', 'build_version', 'purchased_purges', 'victory',
       'max_hp_per_floor', 'card_choices', 'player_experience',
       'relics_obtained', 'event_choices', 'is_beta', 'boss_relics',
       'items_purged_floors', 'is_endless', 'potions_floor_spawned',
       'ascension_level', 'killed_by', 'special_seed',
       'basemod:card_modifiers', 'relic_stats', 'daily_mods'],
      dtype='object')

Before we cut down on our columns, we need a list of all available cards, so we can filter for them later:

In [6]:
CARDS_LIST = []
for row in df["master_deck"]:
    for card in row:
        card = card.split("+")[0]
        if card not in CARDS_LIST:
            CARDS_LIST.append(card)

There are a whole lot of unimportant columns in this data set. I'll only be using the ones that relate to the cards in the deck.


In [7]:
cols = ["play_id", "floor_reached", "character_chosen", "ascension_level",
        "items_purged", "items_purged_floors", "campfire_choices", "master_deck",
        "items_purchased", "item_purchase_floors", "card_choices", "event_choices"]
df = df[cols]
df.head()

Unnamed: 0,play_id,floor_reached,character_chosen,ascension_level,items_purged,items_purged_floors,campfire_choices,master_deck,items_purchased,item_purchase_floors,card_choices,event_choices
0,f035334e-865f-476e-949b-67829192437d,52,THE_SILENT,20,"[Defend_G, Decay, Strike_G]","[3, 36, 48]","[{'data': 'Blade Dance', 'floor': 7.0, 'key': ...","[AscendersBane, Strike_G, Strike_G, Defend_G, ...","[Sundial, Calculated Gamble]","[20, 36]","[{'not_picked': ['Noxious Fumes', 'Escape Plan...","[{'cards_removed': ['Strike_G'], 'damage_heale..."
1,e370d2cb-3ed0-46b2-998b-c8741675398a,51,IRONCLAD,19,"[Strike_R, Strike_R]","[8, 21]","[{'data': 'Rampage', 'floor': 7.0, 'key': 'SMI...","[AscendersBane, Defend_R, Defend_R, Defend_R, ...","[Lee's Waffle, Flex+1, Apotheosis+1, Combust, ...","[11, 11, 27, 36, 47]","[{'not_picked': ['Warcry', 'Twin Strike'], 'pi...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm..."
2,0ee32f9b-83a7-45e2-9da7-6623897d5521,51,DEFECT,20,"[Defend_B, Strike_B]","[11, 19]","[{'data': 'Zap', 'floor': 6.0, 'key': 'SMITH'}...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[Glacier, Potion Belt, Sweeping Beam, Rebound,...","[11, 30, 41, 47, 47]","[{'not_picked': ['Melter', 'BootSequence'], 'p...","[{'damage_healed': 0.0, 'max_hp_gain': 5.0, 'm..."
3,b768fd8e-f7e7-427b-af3f-075fbe8812ec,56,DEFECT,17,"[Strike_B, Strike_B]","[11, 27]","[{'data': 'Coolheaded', 'floor': 7.0, 'key': '...","[AscendersBane, Defend_B, Defend_B, Zap+1, Dua...","[Art of War, War Paint, Medical Kit, Distilled...","[11, 27, 53, 53]","[{'not_picked': ['Undo', 'Leap'], 'picked': 'B...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm..."
4,2df5033f-e5f7-4cd7-93b7-3f4b70f5475e,56,DEFECT,20,[Zap],[30],"[{'data': 'Darkness', 'floor': 8, 'key': 'SMIT...","[AscendersBane, Dualcast, Melter+1, Darkness+1...","[DataDisk, Strange Spoon, Defragment, Gamblers...","[5, 30, 30, 30, 47, 54]","[{'not_picked': ['Stack', 'Heatsinks'], 'picke...","[{'damage_healed': 0, 'gold_gain': 75, 'player..."


These columns are all of the columns that contain any information about the state of our deck, even if it's only a tiny amount. I also have a little bit of metadata about the run, so we can select only runs on high difficulties, only runs that match the character currently being played, etc.

Unfortunately, most of these columns are still not in a particularly useful state. The `master_deck` column gives us the deck at the end of the run, and the deck at the start of the run is always the same, but we'll need to use the information in every other column to be able to reconstruct the deck at any given floor.

We'll likely want to store that in a new column after we construct it.

In [8]:
def get_purge_dict(purges, floors):
    dt = {}
    for purge, floor in zip(purges, floors):
        if floor in dt:
            dt[floor].append(purge)
        else:
            dt[floor] = [purge]
    return dt

def get_purchase_dict(purchases, floors):
    dt = {}
    for purchase, floor in zip(purchases, floors):
        # Make sure the purchase is a card, otherwise ignore it
        if purchase in CARDS_LIST:
            if floor in dt:
                dt[floor].append(purchase)
            else:
                dt[floor] = [purchase]
    return dt

def get_card_picks(choices):
    picks = {}
    for choice in choices:
        floor = int(choice["floor"])
        if floor > 56:
            # Runs should end at floor 56, so we won't count anything past that
            continue
        pick = choice["picked"]
        if pick != "SKIP":
            if floor in picks:
                picks[floor].append(pick)
            else:
                picks[floor] = [pick]
    return picks

def get_event_cards(events):
    adds = {}
    removes = {}
    upgrades = {}
    for event in events:
        floor = int(event["floor"])
        if "cards_transformed" in event.keys():
            removes[floor] = event["cards_transformed"]
        if "cards_removed" in event.keys():
            removes[floor] = event["cards_removed"]
        if "cards_obtained" in event.keys():
            adds[floor] = event["cards_obtained"]
        if "cards_upgraded" in event.keys():
            upgrades[floor] = event["cards_upgraded"]
    return adds, removes, upgrades

def get_campfire_cards(fires):
    upgrades = defaultdict(list)
    removes = defaultdict(list)
    for fire in fires:
        if fire["key"] == "SMITH":
            upgrades[int(fire["floor"])].append(fire["data"])
        elif fire["key"] == "PURGE":
            removes[int(fire["floor"])].append(fire["data"])
    # CHECK CASE IN WHICH RUN HAS DREAM CATCHER??
    return upgrades, removes

In [9]:
df["purges"] = df[["items_purged", "items_purged_floors"]].apply(lambda x: get_purge_dict(x.items_purged, x.items_purged_floors), axis=1)
df["purges"].head()

0    {3: ['Defend_G'], 36: ['Decay'], 48: ['Strike_...
1                  {8: ['Strike_R'], 21: ['Strike_R']}
2                 {11: ['Defend_B'], 19: ['Strike_B']}
3                 {11: ['Strike_B'], 27: ['Strike_B']}
4                                        {30: ['Zap']}
Name: purges, dtype: object

In [10]:
df["card_purchases"] = (df[["items_purchased", "item_purchase_floors"]]
                        .apply(lambda x: get_purchase_dict(x.items_purchased, x.item_purchase_floors), axis=1))
df["card_purchases"].head()

0                          {36: ['Calculated Gamble']}
1                                    {36: ['Combust']}
2    {11: ['Glacier'], 41: ['Sweeping Beam'], 47: [...
3                                                   {}
4                {30: ['Defragment'], 54: ['Panacea']}
Name: card_purchases, dtype: object

In [11]:
df["card_picks"] = df["card_choices"].apply(get_card_picks)
df["card_picks"].head()

0    {1: ['Acrobatics'], 4: ['Blade Dance'], 5: ['B...
1    {1: ['Rampage'], 4: ['Shrug It Off'], 6: ['Lim...
2    {1: ['Ball Lightning'], 5: ['Hologram'], 14: [...
3    {1: ['Ball Lightning'], 4: ['Coolheaded'], 6: ...
4    {1: ['Melter'], 6: ['Darkness'], 7: ['Cold Sna...
Name: card_picks, dtype: object

In [12]:
res_event = df["event_choices"].apply(get_event_cards)
res_event = pd.DataFrame.from_records(res_event, columns = ["event_card_picks", "event_card_removes", "event_card_upgrades"])
df = df.join(res_event)

In [13]:
res_fire = df["campfire_choices"].apply(get_campfire_cards)
res_fire = pd.DataFrame.from_records(res_fire, columns=["fire_upgrades", "fire_removes"])
df = df.join(res_fire)

In [14]:
df.head()

Unnamed: 0,play_id,floor_reached,character_chosen,ascension_level,items_purged,items_purged_floors,campfire_choices,master_deck,items_purchased,item_purchase_floors,card_choices,event_choices,purges,card_purchases,card_picks,event_card_picks,event_card_removes,event_card_upgrades,fire_upgrades,fire_removes
0,f035334e-865f-476e-949b-67829192437d,52,THE_SILENT,20,"[Defend_G, Decay, Strike_G]","[3, 36, 48]","[{'data': 'Blade Dance', 'floor': 7.0, 'key': ...","[AscendersBane, Strike_G, Strike_G, Defend_G, ...","[Sundial, Calculated Gamble]","[20, 36]","[{'not_picked': ['Noxious Fumes', 'Escape Plan...","[{'cards_removed': ['Strike_G'], 'damage_heale...","{3: ['Defend_G'], 36: ['Decay'], 48: ['Strike_...",{36: ['Calculated Gamble']},"{1: ['Acrobatics'], 4: ['Blade Dance'], 5: ['B...","{22: ['Decay'], 47: ['Madness', 'Madness']}","{2: ['Strike_G'], 39: ['Strike_G']}",{},"{7: ['Blade Dance'], 10: ['Neutralize'], 12: [...",{}
1,e370d2cb-3ed0-46b2-998b-c8741675398a,51,IRONCLAD,19,"[Strike_R, Strike_R]","[8, 21]","[{'data': 'Rampage', 'floor': 7.0, 'key': 'SMI...","[AscendersBane, Defend_R, Defend_R, Defend_R, ...","[Lee's Waffle, Flex+1, Apotheosis+1, Combust, ...","[11, 11, 27, 36, 47]","[{'not_picked': ['Warcry', 'Twin Strike'], 'pi...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm...","{8: ['Strike_R'], 21: ['Strike_R']}",{36: ['Combust']},"{1: ['Rampage'], 4: ['Shrug It Off'], 6: ['Lim...","{39: ['Madness', 'Madness'], 44: ['Shrug It Of...",{},{},"{7: ['Rampage'], 15: ['Limit Break'], 25: ['He...",{}
2,0ee32f9b-83a7-45e2-9da7-6623897d5521,51,DEFECT,20,"[Defend_B, Strike_B]","[11, 19]","[{'data': 'Zap', 'floor': 6.0, 'key': 'SMITH'}...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[Glacier, Potion Belt, Sweeping Beam, Rebound,...","[11, 30, 41, 47, 47]","[{'not_picked': ['Melter', 'BootSequence'], 'p...","[{'damage_healed': 0.0, 'max_hp_gain': 5.0, 'm...","{11: ['Defend_B'], 19: ['Strike_B']}","{11: ['Glacier'], 41: ['Sweeping Beam'], 47: [...","{1: ['Ball Lightning'], 5: ['Hologram'], 14: [...",{4: ['Leap']},{3: ['Strike_B']},{46: ['Electrodynamics']},"{6: ['Zap'], 8: ['Dualcast'], 25: ['Glacier'],...",{}
3,b768fd8e-f7e7-427b-af3f-075fbe8812ec,56,DEFECT,17,"[Strike_B, Strike_B]","[11, 27]","[{'data': 'Coolheaded', 'floor': 7.0, 'key': '...","[AscendersBane, Defend_B, Defend_B, Zap+1, Dua...","[Art of War, War Paint, Medical Kit, Distilled...","[11, 27, 53, 53]","[{'not_picked': ['Undo', 'Leap'], 'picked': 'B...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm...","{11: ['Strike_B'], 27: ['Strike_B']}",{},"{1: ['Ball Lightning'], 4: ['Coolheaded'], 6: ...","{2: ['Self Repair'], 28: ['BootSequence', 'For...","{2: ['Strike_B'], 5: ['Strike_B'], 28: ['Defen...",{},"{7: ['Coolheaded'], 12: ['Coolheaded'], 15: ['...",{}
4,2df5033f-e5f7-4cd7-93b7-3f4b70f5475e,56,DEFECT,20,[Zap],[30],"[{'data': 'Darkness', 'floor': 8, 'key': 'SMIT...","[AscendersBane, Dualcast, Melter+1, Darkness+1...","[DataDisk, Strange Spoon, Defragment, Gamblers...","[5, 30, 30, 30, 47, 54]","[{'not_picked': ['Stack', 'Heatsinks'], 'picke...","[{'damage_healed': 0, 'gold_gain': 75, 'player...",{30: ['Zap']},"{30: ['Defragment'], 54: ['Panacea']}","{1: ['Melter'], 6: ['Darkness'], 7: ['Cold Sna...",{19: ['Core Surge']},{},"{4: ['Melter', 'Defend_B']}","{8: ['Darkness'], 32: ['Defragment'], 40: ['Mu...",{}


Now, we have columns that represent (almost) every change to the player's deck. The only exception, as far as I can tell, are relics that change the deck like War Paint, Whetstone, Tiny House, Astrolabe, Empty Cage (maybe), Dreamcatcher, etc.

With this information, we should be able to reconstruct what the deck was at each floor of the run. Since this is our goal, we need to know what their deck was at the start of the run. Fortunately, the deck for each character is always the same at the start.

In [15]:
def get_char_starting_deck(character, ascension):
    # Gets the deck that a character starts with on floor 0
    deck = []
    if ascension >= 10:
        deck.append("AscendersBane")
    if character == "IRONCLAD":
        deck.extend(["Strike_R"] * 5 + ["Defend_R"] * 4 + ["Bash"])
    elif character == "THE_SILENT":
        deck.extend(["Strike_G", "Defend_G"] * 5 + ["Neutralize", "Survivor"])
    elif character == "DEFECT":
        deck.extend(["Strike_B","Defend_B"] * 4 + ["Zap", "Dualcast"])
        deck.extend(["Zap", "Dualcast"])
    elif character == "WATCHER":
        deck.extend(["Strike_P", "Defend_P"] * 4 + ["Eruption", "Vigilance"])

    return deck

In [16]:
get_char_starting_deck("IRONCLAD", 20)

['AscendersBane',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Defend_R',
 'Defend_R',
 'Defend_R',
 'Defend_R',
 'Bash']

Now we can implement code to turn a run's columns and their starting deck into a list of different deck states throughout the run.

For my convenience, though, I'm going to combine all of our deck information columns into three: card adds, card removes, and card upgrades.

In [17]:
def all_adds(picks, buys, events):
    dt = defaultdict(list)
    for dct in [picks, buys, events]:
        for floor, cards in dct.items():
            dt[floor].extend(cards)
    return dt

def all_removes(shops, events, fires):
    dt = defaultdict(list)
    for dct in [shops, events, fires]:
        for floor, cards in dct.items():
            dt[floor].extend(cards)
    return dt

def all_upgrades(events, fires):
    dt = defaultdict(list)
    for dct in [events, fires]:
        for floor, cards in dct.items():
            dt[floor].extend(cards)
    return dt

In [18]:
new_df = df[["play_id", "floor_reached", "character_chosen", "ascension_level", "master_deck"]]
new_df["adds"] = df.apply(lambda x: all_adds(x.card_picks, x.card_purchases, x.event_card_picks), axis=1)
new_df["removes"] = df.apply(lambda x: all_removes(x.purges, x.event_card_removes, x.fire_removes), axis=1)
new_df["upgrades"] = df.apply(lambda x: all_upgrades(x.event_card_upgrades, x.fire_upgrades), axis=1)
new_df.head()

Unnamed: 0,play_id,floor_reached,character_chosen,ascension_level,master_deck,adds,removes,upgrades
0,f035334e-865f-476e-949b-67829192437d,52,THE_SILENT,20,"[AscendersBane, Strike_G, Strike_G, Defend_G, ...","{1: ['Acrobatics'], 4: ['Blade Dance'], 5: ['B...","{3: ['Defend_G'], 36: ['Decay'], 48: ['Strike_...","{7: ['Blade Dance'], 10: ['Neutralize'], 12: [..."
1,e370d2cb-3ed0-46b2-998b-c8741675398a,51,IRONCLAD,19,"[AscendersBane, Defend_R, Defend_R, Defend_R, ...","{1: ['Rampage'], 4: ['Shrug It Off'], 6: ['Lim...","{8: ['Strike_R'], 21: ['Strike_R']}","{7: ['Rampage'], 15: ['Limit Break'], 25: ['He..."
2,0ee32f9b-83a7-45e2-9da7-6623897d5521,51,DEFECT,20,"[AscendersBane, Strike_B, Strike_B, Defend_B, ...","{1: ['Ball Lightning'], 5: ['Hologram'], 14: [...","{11: ['Defend_B'], 19: ['Strike_B'], 3: ['Stri...","{46: ['Electrodynamics'], 6: ['Zap'], 8: ['Dua..."
3,b768fd8e-f7e7-427b-af3f-075fbe8812ec,56,DEFECT,17,"[AscendersBane, Defend_B, Defend_B, Zap+1, Dua...","{1: ['Ball Lightning'], 4: ['Coolheaded'], 6: ...","{11: ['Strike_B'], 27: ['Strike_B'], 2: ['Stri...","{7: ['Coolheaded'], 12: ['Coolheaded'], 15: ['..."
4,2df5033f-e5f7-4cd7-93b7-3f4b70f5475e,56,DEFECT,20,"[AscendersBane, Dualcast, Melter+1, Darkness+1...","{1: ['Melter'], 6: ['Darkness'], 7: ['Cold Sna...",{30: ['Zap']},"{4: ['Melter', 'Defend_B'], 8: ['Darkness'], 3..."


In [19]:
def get_deck_at_floors(play_id):
    data = new_df[new_df.play_id == play_id]
    cur_deck = get_char_starting_deck(data.character_chosen.item(), data.ascension_level.item())
    deck_states = pd.Series(index=np.arange(0, 57))
    deck_states[data.floor_reached.item()] = data.master_deck.item()
    
    for floor in range(0, data.floor_reached.item()):
        adds = data.adds.item()[floor]
        removes = data.removes.item()[floor]
        upgrades = data.upgrades.item()[floor]
        
        cur_deck.extend(adds)
        for card in removes:
            try:
                cur_deck.remove(card)
            except ValueError:
                pass
        for card in upgrades:
            try:
                cur_deck.remove(card)
            except ValueError:
                pass
            if "Searing Blow+" in card:
                # this is the only case where an upgrade makes a card anything other than "card+1"
                upg_level = card.split("+")[1]
                cur_deck.append(f"Searing Blow+{int(upg_level)+1}")
            else:
                cur_deck.append(card+"+1")
        
        deck_states[floor] = cur_deck.copy()
    
    return deck_states

In [20]:
#get_deck_at_floors("f035334e-865f-476e-949b-67829192437d")

In [21]:
df_deck_state = pd.DataFrame(index=new_df["play_id"].copy(),
                             columns=[f"floor_{floor}" for floor in range(0, 57)])

# This probably isn't the most efficient way, but other ways weren't working
for play_id in df_deck_state.index:
    state = get_deck_at_floors(play_id)
    df_deck_state.loc[play_id][state.index] = state.values
df_deck_state.head()

Unnamed: 0_level_0,floor_0,floor_1,floor_2,floor_3,floor_4,floor_5,floor_6,floor_7,floor_8,floor_9,...,floor_47,floor_48,floor_49,floor_50,floor_51,floor_52,floor_53,floor_54,floor_55,floor_56
play_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
f035334e-865f-476e-949b-67829192437d,"[AscendersBane, Strike_G, Defend_G, Strike_G, ...","[AscendersBane, Strike_G, Defend_G, Strike_G, ...","[AscendersBane, Defend_G, Strike_G, Defend_G, ...","[AscendersBane, Strike_G, Defend_G, Strike_G, ...","[AscendersBane, Strike_G, Defend_G, Strike_G, ...","[AscendersBane, Strike_G, Defend_G, Strike_G, ...","[AscendersBane, Strike_G, Defend_G, Strike_G, ...","[AscendersBane, Strike_G, Defend_G, Strike_G, ...","[AscendersBane, Strike_G, Defend_G, Strike_G, ...","[AscendersBane, Strike_G, Defend_G, Strike_G, ...",...,"[AscendersBane, Defend_G, Strike_G, Defend_G, ...","[AscendersBane, Defend_G, Defend_G, Strike_G, ...","[AscendersBane, Defend_G, Defend_G, Strike_G, ...","[AscendersBane, Defend_G, Defend_G, Strike_G, ...","[AscendersBane, Defend_G, Defend_G, Strike_G, ...","[AscendersBane, Strike_G, Strike_G, Defend_G, ...",,,,
e370d2cb-3ed0-46b2-998b-c8741675398a,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",...,"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[AscendersBane, Defend_R, Defend_R, Defend_R, ...",,,,,
0ee32f9b-83a7-45e2-9da7-6623897d5521,"[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...",...,"[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...",,,,,
b768fd8e-f7e7-427b-af3f-075fbe8812ec,"[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Strike_B, Defend_B, ...","[AscendersBane, Defend_B, Defend_B, Strike_B, ...","[AscendersBane, Defend_B, Defend_B, Strike_B, ...","[AscendersBane, Defend_B, Defend_B, Strike_B, ...","[AscendersBane, Defend_B, Defend_B, Strike_B, ...","[AscendersBane, Defend_B, Defend_B, Strike_B, ...",...,"[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap, Zap, ...","[AscendersBane, Defend_B, Defend_B, Zap+1, Dua..."
2df5033f-e5f7-4cd7-93b7-3f4b70f5475e,"[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Strike_B, Defend_B, Strike_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...",...,"[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[AscendersBane, Dualcast, Melter+1, Darkness+1..."


Now we have a data frame that contains each run's deck state at any given floor. We'll save it to JSON so we can load it later for our model:

In [22]:
df_deck_state.to_json("deck_states_full.json")

I also want to implement a model that uses a simple heuristic - at or before the floor the player is on, how often was each card picked vs. how often has it been seen?

In [23]:
cards_seen = {}
cards_picked = {}
for floor in range(0, 57):
    cards_seen[floor] = []
    cards_picked[floor] = []

for row in df["card_choices"]:
    for dt in row:
        seen = dt["not_picked"].copy()
        if dt["picked"] == "SKIP":
            continue
        elif dt["picked"] != "Singing Bowl":
            seen.append(dt["picked"])
            seen = [card.split("+")[0] for card in seen]
            picked = dt["picked"].split("+")[0]
            floor = int(dt["floor"])

        if floor > 56:
            continue
        cards_seen[floor].extend(seen)
        cards_picked[floor].append(picked)

for floor in range(1, 57):
    seen = cards_seen[floor-1].copy()
    seen.extend(cards_seen[floor])
    picked = cards_picked[floor-1].copy()
    picked.extend(cards_picked[floor])

    cards_seen[floor] = seen
    cards_picked[floor] = picked
    
count_seen = {}
count_picked = {}
floor_props = {}
for floor in range(0, 57):
    props = {}
    count_seen[floor] = Counter(cards_seen[floor])
    count_picked[floor] = Counter(cards_picked[floor])
    for card in CARDS_LIST:
        if card in count_picked[floor].keys() and card in count_seen[floor].keys() and count_seen[floor][card] > 0:
            #print(count_picked[floor][card])
            #print(count_seen[floor][card])
            props[card] = min(count_picked[floor][card] / count_seen[floor][card], 1.0)
        else:
            props[card] = 0
    floor_props[floor] = props

In [24]:
df_props = pd.DataFrame.from_dict(floor_props, orient="index")
df_props.index.name = "Floor"
df_props.to_json("pickrates_full.json")
df_props.head()

Unnamed: 0_level_0,AscendersBane,Strike_G,Defend_G,Survivor,Neutralize,Acrobatics,Blade Dance,Backflip,Adrenaline,Predator,...,Flash of Steel,Hello World,Lockon,Nirvana,Discovery,PathToVictory,Chrysalis,Mind Blast,Enlightenment,Jack Of All Trades
Floor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,0,0,0,0.0,0.25,0.0,0.777778,0.0,...,0.777778,0.0,0.0,0.0,0.5,0.0,0.033333,0.0,0.0,0.25
1,0,0,0,0,0,0.173611,0.25974,0.372414,0.777778,0.656716,...,0.777778,0.152174,0.282051,0.0,0.5,0.170732,0.033333,0.0,0.0,0.25
2,0,0,0,0,0,0.169492,0.267677,0.40625,0.818182,0.640449,...,0.777778,0.147541,0.34,0.0,0.5,0.168,0.033333,0.0,0.0,0.25
3,0,0,0,0,0,0.158798,0.252033,0.417671,0.846154,0.60396,...,0.777778,0.146667,0.380282,0.020408,0.5,0.138365,0.033333,0.0,0.0,0.25
4,0,0,0,0,0,0.164983,0.250825,0.419355,0.882353,0.609375,...,0.777778,0.149425,0.363636,0.047619,0.5,0.125628,0.033333,0.0,0.0,0.25


The last (for now) thing we need is a data frame containing our card choices on each floor. That way, we can evaluate our models based on whether they recommended a card that was actually chosen.

In [25]:
df.head()

Unnamed: 0,play_id,floor_reached,character_chosen,ascension_level,items_purged,items_purged_floors,campfire_choices,master_deck,items_purchased,item_purchase_floors,card_choices,event_choices,purges,card_purchases,card_picks,event_card_picks,event_card_removes,event_card_upgrades,fire_upgrades,fire_removes
0,f035334e-865f-476e-949b-67829192437d,52,THE_SILENT,20,"[Defend_G, Decay, Strike_G]","[3, 36, 48]","[{'data': 'Blade Dance', 'floor': 7.0, 'key': ...","[AscendersBane, Strike_G, Strike_G, Defend_G, ...","[Sundial, Calculated Gamble]","[20, 36]","[{'not_picked': ['Noxious Fumes', 'Escape Plan...","[{'cards_removed': ['Strike_G'], 'damage_heale...","{3: ['Defend_G'], 36: ['Decay'], 48: ['Strike_...",{36: ['Calculated Gamble']},"{1: ['Acrobatics'], 4: ['Blade Dance'], 5: ['B...","{22: ['Decay'], 47: ['Madness', 'Madness']}","{2: ['Strike_G'], 39: ['Strike_G']}",{},"{7: ['Blade Dance'], 10: ['Neutralize'], 12: [...",{}
1,e370d2cb-3ed0-46b2-998b-c8741675398a,51,IRONCLAD,19,"[Strike_R, Strike_R]","[8, 21]","[{'data': 'Rampage', 'floor': 7.0, 'key': 'SMI...","[AscendersBane, Defend_R, Defend_R, Defend_R, ...","[Lee's Waffle, Flex+1, Apotheosis+1, Combust, ...","[11, 11, 27, 36, 47]","[{'not_picked': ['Warcry', 'Twin Strike'], 'pi...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm...","{8: ['Strike_R'], 21: ['Strike_R']}",{36: ['Combust']},"{1: ['Rampage'], 4: ['Shrug It Off'], 6: ['Lim...","{39: ['Madness', 'Madness'], 44: ['Shrug It Of...",{},{},"{7: ['Rampage'], 15: ['Limit Break'], 25: ['He...",{}
2,0ee32f9b-83a7-45e2-9da7-6623897d5521,51,DEFECT,20,"[Defend_B, Strike_B]","[11, 19]","[{'data': 'Zap', 'floor': 6.0, 'key': 'SMITH'}...","[AscendersBane, Strike_B, Strike_B, Defend_B, ...","[Glacier, Potion Belt, Sweeping Beam, Rebound,...","[11, 30, 41, 47, 47]","[{'not_picked': ['Melter', 'BootSequence'], 'p...","[{'damage_healed': 0.0, 'max_hp_gain': 5.0, 'm...","{11: ['Defend_B'], 19: ['Strike_B']}","{11: ['Glacier'], 41: ['Sweeping Beam'], 47: [...","{1: ['Ball Lightning'], 5: ['Hologram'], 14: [...",{4: ['Leap']},{3: ['Strike_B']},{46: ['Electrodynamics']},"{6: ['Zap'], 8: ['Dualcast'], 25: ['Glacier'],...",{}
3,b768fd8e-f7e7-427b-af3f-075fbe8812ec,56,DEFECT,17,"[Strike_B, Strike_B]","[11, 27]","[{'data': 'Coolheaded', 'floor': 7.0, 'key': '...","[AscendersBane, Defend_B, Defend_B, Zap+1, Dua...","[Art of War, War Paint, Medical Kit, Distilled...","[11, 27, 53, 53]","[{'not_picked': ['Undo', 'Leap'], 'picked': 'B...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm...","{11: ['Strike_B'], 27: ['Strike_B']}",{},"{1: ['Ball Lightning'], 4: ['Coolheaded'], 6: ...","{2: ['Self Repair'], 28: ['BootSequence', 'For...","{2: ['Strike_B'], 5: ['Strike_B'], 28: ['Defen...",{},"{7: ['Coolheaded'], 12: ['Coolheaded'], 15: ['...",{}
4,2df5033f-e5f7-4cd7-93b7-3f4b70f5475e,56,DEFECT,20,[Zap],[30],"[{'data': 'Darkness', 'floor': 8, 'key': 'SMIT...","[AscendersBane, Dualcast, Melter+1, Darkness+1...","[DataDisk, Strange Spoon, Defragment, Gamblers...","[5, 30, 30, 30, 47, 54]","[{'not_picked': ['Stack', 'Heatsinks'], 'picke...","[{'damage_healed': 0, 'gold_gain': 75, 'player...",{30: ['Zap']},"{30: ['Defragment'], 54: ['Panacea']}","{1: ['Melter'], 6: ['Darkness'], 7: ['Cold Sna...",{19: ['Core Surge']},{},"{4: ['Melter', 'Defend_B']}","{8: ['Darkness'], 32: ['Defragment'], 40: ['Mu...",{}


We're only going to use card choices from 3-card rewards, since those are the only choices we're programming our recommender to make at the moment.

In [26]:
df_choices = df[["play_id", "card_choices"]]
df_choices.head()

Unnamed: 0,play_id,card_choices
0,f035334e-865f-476e-949b-67829192437d,"[{'not_picked': ['Noxious Fumes', 'Escape Plan..."
1,e370d2cb-3ed0-46b2-998b-c8741675398a,"[{'not_picked': ['Warcry', 'Twin Strike'], 'pi..."
2,0ee32f9b-83a7-45e2-9da7-6623897d5521,"[{'not_picked': ['Melter', 'BootSequence'], 'p..."
3,b768fd8e-f7e7-427b-af3f-075fbe8812ec,"[{'not_picked': ['Undo', 'Leap'], 'picked': 'B..."
4,2df5033f-e5f7-4cd7-93b7-3f4b70f5475e,"[{'not_picked': ['Stack', 'Heatsinks'], 'picke..."


In [27]:
def reformat_choices(choices):
    new_choices = {}
    offered = {}
    for choice in choices:
        floor = choice['floor']
        seen = choice['not_picked'].copy()
        picked = choice['picked']
        if picked == "SKIP" or picked == "Singing Bowl":
            # I don't care about skips, for now at least
            # Singing Bowl is a skip alternative, we don't care about those either
            continue
        seen.append(picked)
        offered[floor] = seen
        new_choices[floor] = picked
    
    return new_choices, offered

In [28]:
res = df_choices["card_choices"].apply(reformat_choices)
res = pd.DataFrame([[a, b] for a, b in res.values], columns=["choices", "seen"])
df_choices = df_choices.join(res).drop("card_choices", axis=1).set_index("play_id")
df_choices.head()

Unnamed: 0_level_0,choices,seen
play_id,Unnamed: 1_level_1,Unnamed: 2_level_1
f035334e-865f-476e-949b-67829192437d,"{1.0: 'Acrobatics', 4.0: 'Blade Dance', 5.0: '...","{1.0: ['Noxious Fumes', 'Escape Plan', 'Acroba..."
e370d2cb-3ed0-46b2-998b-c8741675398a,"{1.0: 'Rampage', 4.0: 'Shrug It Off', 6.0: 'Li...","{1.0: ['Warcry', 'Twin Strike', 'Rampage'], 4...."
0ee32f9b-83a7-45e2-9da7-6623897d5521,"{1.0: 'Ball Lightning', 5.0: 'Hologram', 14.0:...","{1.0: ['Melter', 'BootSequence', 'Ball Lightni..."
b768fd8e-f7e7-427b-af3f-075fbe8812ec,"{1.0: 'Ball Lightning', 4.0: 'Coolheaded', 6.0...","{1.0: ['Undo', 'Leap', 'Ball Lightning'], 4.0:..."
2df5033f-e5f7-4cd7-93b7-3f4b70f5475e,"{1: 'Melter', 6: 'Darkness', 7: 'Cold Snap', 1...","{1: ['Stack', 'Heatsinks', 'Melter'], 6: ['Cha..."


In [29]:
df_choices.to_json("cards_seen_picked_full.json")