In [92]:
import pandas as pd
import json
from collections import Counter

# Data files are formatted in a weird way, this function helps read them in
def read_sts_file(arr, cap=1000):
    df = pd.DataFrame()
    nm = 0
    for str in arr:
        if nm>cap:
            break
        with open(str) as f:
            data = json.load(f)
            df = pd.concat([df, pd.DataFrame(rec['event'] for rec in data)])
            nm+=1
    
    return df

In [93]:
# This cell can be modified to load in any number of run files
# Currently, it's only reading in the file of my personal runs
df = read_sts_file(["runs.json"])
df.head()

Unnamed: 0,gold_per_floor,floor_reached,playtime,items_purged,score,play_id,local_time,is_ascension_mode,campfire_choices,neow_cost,...,boss_relics,items_purged_floors,is_endless,potions_floor_spawned,killed_by,ascension_level,special_seed,basemod:card_modifiers,relic_stats,daily_mods
0,"[114, 39, 65, 82, 6, 35, 35, 35, 35, 45, 45, 7...",33,960,[Strike_R],555,fdbe49b9-1846-4b0f-aea4-81f669f21767,20240806233804,True,"[{'data': 'Disarm', 'floor': 8, 'key': 'SMITH'...",NONE,...,"[{'not_picked': ['Ectoplasm', 'Runic Dome'], '...",[2],False,"[6, 10, 14, 16, 18, 21, 30]",Automaton,17,,,,
1,"[114, 114, 132, 57, 77, 106, 106]",7,213,[],94,31fb9095-70e9-46cd-9069-7d6ae4c20237,20240812111502,True,[],TEN_PERCENT_HP_LOSS,...,[],[],False,"[1, 3, 5]",Gremlin Gang,17,,,,
2,"[116, 216, 11, 22, 40, 68, 68, 97, 97, 116, 116]",11,227,[],154,98eb822b-53a7-481c-9e92-367e16e75499,20240813232656,True,"[{'data': 'Evolve', 'floor': 7, 'key': 'SMITH'}]",NONE,...,[],[],False,"[1, 8, 10]",3 Louse,17,0.0,,,
3,"[112, 112, 112, 124, 136, 176, 188, 200, 212, ...",16,393,[],250,9f0d32dc-eaad-43c4-b8a7-e934c6da2614,20240814165737,True,"[{'data': 'Bash', 'floor': 7, 'key': 'SMITH'},...",NONE,...,[],[],False,"[1, 6, 12, 13, 14]",Hexaghost,17,,,,
4,"[362, 381, 391, 316, 316, 316]",6,112,[],67,a9240145-8e3c-4033-9b58-f2ccd4cec560,20240815232946,True,[],TEN_PERCENT_HP_LOSS,...,[],[],False,[],Gremlin Nob,17,,,,


This data is structured in a way that will require a lot of preprocessing. My goal is to eventually have a set of data for each floor of a run, and to compare the user's current deck to every other deck at that floor to see what card choices they would have made.

In [94]:
df.columns

Index(['gold_per_floor', 'floor_reached', 'playtime', 'items_purged', 'score',
       'play_id', 'local_time', 'is_ascension_mode', 'campfire_choices',
       'neow_cost', 'seed_source_timestamp', 'circlet_count', 'master_deck',
       'relics', 'potions_floor_usage', 'damage_taken', 'seed_played',
       'potions_obtained', 'is_trial', 'path_per_floor', 'character_chosen',
       'items_purchased', 'campfire_rested', 'item_purchase_floors',
       'current_hp_per_floor', 'gold', 'neow_bonus', 'is_prod', 'is_daily',
       'chose_seed', 'campfire_upgraded', 'win_rate', 'timestamp',
       'path_taken', 'build_version', 'purchased_purges', 'victory',
       'max_hp_per_floor', 'card_choices', 'player_experience',
       'relics_obtained', 'event_choices', 'is_beta', 'boss_relics',
       'items_purged_floors', 'is_endless', 'potions_floor_spawned',
       'killed_by', 'ascension_level', 'special_seed',
       'basemod:card_modifiers', 'relic_stats', 'daily_mods'],
      dtype='object')

In [95]:
df["card_choices"].iloc[0]

[{'not_picked': ['True Grit', 'Flame Barrier'], 'picked': 'Anger', 'floor': 1},
 {'not_picked': ['Combust', 'Sword Boomerang'],
  'picked': 'Hemokinesis',
  'floor': 4},
 {'not_picked': ['Dual Wield', 'Whirlwind'], 'picked': 'Disarm', 'floor': 6},
 {'not_picked': ['Body Slam', 'Shrug It Off'],
  'picked': 'Reaper',
  'floor': 10},
 {'not_picked': ['Sentinel', 'Ghostly Armor'],
  'picked': 'Seeing Red',
  'floor': 12},
 {'not_picked': ['Cleave', 'Twin Strike', 'Anger'],
  'picked': 'SKIP',
  'floor': 14},
 {'not_picked': ['Impervious', 'Feed'], 'picked': 'Offering', 'floor': 16},
 {'not_picked': ['Hemokinesis', 'Clash'],
  'picked': 'Flame Barrier',
  'floor': 18},
 {'not_picked': ['Sword Boomerang', 'Twin Strike'],
  'picked': 'Inflame',
  'floor': 19},
 {'not_picked': ['Twin Strike', 'Thunderclap'],
  'picked': 'Disarm+1',
  'floor': 21},
 {'not_picked': ['Warcry+1', 'Armaments'], 'picked': 'Whirlwind', 'floor': 24},
 {'not_picked': ['Sword Boomerang', 'Intimidate'],
  'picked': 'Arma

Before we cut down on our columns, we need a list of all available cards, so we can filter for them later:

In [96]:
CARDS_LIST = []
for row in df["master_deck"]:
    for card in row:
        card = card.split("+")[0]
        if card not in CARDS_LIST:
            CARDS_LIST.append(card)

There are a whole lot of unimportant columns in this data set. I'll only be using the ones that relate to the cards in the deck.


In [97]:
cols = ["floor_reached", "character_chosen", "ascension_level",
        "items_purged", "items_purged_floors", "campfire_choices", "master_deck",
        "items_purchased", "item_purchase_floors", "card_choices", "event_choices"]
df = df[cols]
df.head()

Unnamed: 0,floor_reached,character_chosen,ascension_level,items_purged,items_purged_floors,campfire_choices,master_deck,items_purchased,item_purchase_floors,card_choices,event_choices
0,33,IRONCLAD,17,[Strike_R],[2],"[{'data': 'Disarm', 'floor': 8, 'key': 'SMITH'...","[AscendersBane, Strike_R, Strike_R, Defend_R, ...","[Feel No Pain, PowerPotion, Dark Embrace, Seei...","[5, 13, 31, 31, 31]","[{'not_picked': ['True Grit', 'Flame Barrier']...","[{'damage_healed': 0, 'gold_gain': 0, 'cards_t..."
1,7,IRONCLAD,17,[],[],[],"[AscendersBane, Strike_R, Strike_R, Defend_R, ...",[],[],"[{'not_picked': ['Rampage', 'Heavy Blade'], 'p...","[{'damage_healed': 0, 'gold_gain': 0, 'cards_t..."
2,11,IRONCLAD,17,[],[],"[{'data': 'Evolve', 'floor': 7, 'key': 'SMITH'}]","[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[Evolve, Shrug It Off, Headbutt, Cleave]","[3, 3, 3, 3]","[{'not_picked': ['Wild Strike', 'Clothesline']...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm..."
3,16,IRONCLAD,17,[],[],"[{'data': 'Bash', 'floor': 7, 'key': 'SMITH'},...","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Reckless Charge', 'Perfected...","[{'damage_healed': 0, 'gold_gain': 0, 'player_..."
4,6,IRONCLAD,17,[],[],[],"[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Iron Wave', 'Warcry'], 'pick...","[{'cards_removed': ['Strike_R'], 'damage_heale..."


These columns are all of the columns that contain any information about the state of our deck, even if it's only a tiny amount. I also have a little bit of metadata about the run, so we can select only runs on high difficulties, only runs that match the character currently being played, etc.

Unfortunately, most of these columns are still not in a particularly useful state. The `master_deck` column gives us the deck at the end of the run, and the deck at the start of the run is always the same, but we'll need to use the information in every other column to be able to reconstruct the deck at any given floor.

We'll likely want to store that in a new column after we construct it.

In [102]:
def get_purge_dict(purges, floors):
    dt = {}
    for purge, floor in zip(purges, floors):
        if floor in dt:
            dt[floor].append(purge)
        else:
            dt[floor] = [purge]
    return dt

def get_purchase_dict(purchases, floors):
    dt = {}
    for purchase, floor in zip(purchases, floors):
        # Make sure the purchase is a card, otherwise ignore it
        if purchase in CARDS_LIST:
            if floor in dt:
                dt[floor].append(purchase)
            else:
                dt[floor] = [purchase]
    return dt

def get_card_picks(choices):
    picks = {}
    for choice in choices:
        floor = int(choice["floor"])
        pick = choice["picked"]
        if pick != "SKIP":
            if floor in picks:
                picks[floor].append(pick)
            else:
                picks[floor] = [pick]
    return picks

In [103]:
df["purges"] = df[["items_purged", "items_purged_floors"]].apply(lambda x: get_purge_dict(x.items_purged, x.items_purged_floors), axis=1)
df["purges"].head()

0    {2: ['Strike_R']}
1                   {}
2                   {}
3                   {}
4                   {}
Name: purges, dtype: object

In [104]:
df["card_purchases"] = df[["items_purchased", "item_purchase_floors"]].apply(lambda x: get_purge_dict(x.items_purchased, x.item_purchase_floors)
                                                                             , axis=1)
df["card_purchases"].head()

0    {5: ['Feel No Pain'], 13: ['PowerPotion'], 31:...
1                                                   {}
2    {3: ['Evolve', 'Shrug It Off', 'Headbutt', 'Cl...
3                                                   {}
4                                                   {}
Name: card_purchases, dtype: object

In [105]:
df["card_picks"] = df["card_choices"].apply(get_card_picks)

In [106]:
def get_char_starting_deck(character, ascension):
    # Gets the deck that a character starts with on floor 0
    deck = []
    if ascension >= 10:
        deck.append("AscendersBane")
    
    if character == "Ironclad":
        deck.extend(["Strike_R"] * 5 + ["Defend_R"] * 4 + ["Bash"])
    elif character == "The_Silent":
        deck.extend(["Strike_G", "Defend_G"] * 5 + ["Neutralize", "Survivor"])
    elif character == "Defect":
        deck.extend(["Strike_B","Defend_B"] * 4 + ["Zap", "Dualcast"])
        deck.extend([] * 4)
        deck.extend(["Zap", "Dualcast"])
    elif character == "Watcher":
        deck.extend(["Strike_P", "Defend_P"] * 4 + ["Eruption", "Vigilance"])

    return deck

In [107]:
get_char_starting_deck("Ironclad", 20)

['AscendersBane',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Defend_R',
 'Defend_R',
 'Defend_R',
 'Defend_R',
 'Bash']

# MORE PREPROCESSING GOES HERE

I also want to implement a model that uses a simple heuristic - at or before the floor the player is on, how often was each card picked vs. how often has it been seen?

In [168]:
cards_seen = {}
cards_picked = {}
for floor in range(0, 55):
    cards_seen[floor] = []
    cards_picked[floor] = []
    
for row in df["card_choices"]:
    for dt in row:
        seen = dt["not_picked"].copy()
        if dt["picked"] != "SKIP" and dt["picked"] != "Singing Bowl":
            seen.append(dt["picked"])
            seen = [card.split("+")[0] for card in seen]
            picked = dt["picked"].split("+")[0]
        floor = int(dt["floor"])
        cards_seen[floor].extend(seen)
        cards_picked[floor].append(picked)

for floor in range(1, 55):
    #print(cards_seen[floor-1])
    seen = cards_seen[floor-1].copy()
    seen.extend(cards_seen[floor])
    picked = cards_picked[floor-1].copy()
    picked.extend(cards_picked[floor])
    cards_seen[floor] = seen
    cards_picked[floor] = picked

count_seen = {}
count_picked = {}
floor_props = {}
for floor in range(0, 55):
    props = {}
    count_seen[floor] = Counter(cards_seen[floor])
    count_picked[floor] = Counter(cards_picked[floor])
    for card in count_seen[floor].keys():
        if card in count_picked[floor].keys() and count_seen[floor][card] > 0:
            #print(count_picked[floor][card])
            #print(count_seen[floor][card])
            props[card] = count_picked[floor][card] / count_seen[floor][card]
        else:
            props[card] = 0
    floor_props[floor] = props

floor_props[54]

{'Double Tap': 0.16666666666666666,
 'Impervious': 0.5333333333333333,
 'Reaper': 0.9375,
 'Panacea': 0.3333333333333333,
 'Madness': 0.3333333333333333,
 'Purity': 3.3333333333333335,
 'Berserk': 0,
 'Barricade': 0.35,
 'Exhume': 0.6666666666666666,
 'Offering': 1.7222222222222223,
 'Fiend Fire': 1.105263157894737,
 'Corruption': 1.15,
 'Bludgeon': 0.15384615384615385,
 'Demon Form': 0.5,
 'Feed': 0.7647058823529411,
 'Limit Break': 0.75,
 'Immolate': 1.0909090909090908,
 'Violence': 0,
 'Mayhem': 0,
 'Sadistic Nature': 0.5,
 'Secret Weapon': 0,
 'Secret Technique': 0.16666666666666666,
 'Metamorphosis': 0.16666666666666666,
 'Swift Strike': 0,
 'Bandage Up': 1.0,
 'Poisoned Stab': 0.17142857142857143,
 'Dagger Throw': 0.29,
 'Leg Sweep': 0.6545454545454545,
 'Panache': 0.8333333333333334,
 'HandOfGreed': 0.75,
 'Chrysalis': 0,
 'Apotheosis': 1.2857142857142858,
 'Master of Strategy': 0.5,
 'Thinking Ahead': 0,
 'Mind Blast': 0,
 'Enlightenment': 0,
 'Blind': 1.0,
 'The Bomb': 1.0,
 '

In [163]:
#df_props = pd.DataFrame()
#df_props.columns = CARDS_LIST
#for floor in floor_props.keys():
#df_props
df_props = pd.DataFrame.from_dict(floor_props, orient="index")
df_props

Unnamed: 0,Double Tap,Impervious,Reaper,Panacea,Madness,Purity,Berserk,Barricade,Exhume,Offering,...,Warcry+1,Distraction+1,Double Energy+1,Quick Slash+1,Fusion+1,Fasting2+1,Trip,Reckless Charge+1,Bane+1,Storm+1
0,0.0,0.0,1.0,0.0,0.0,1.0,0,0.0,1.0,0.0,...,,,,,,,,,,
1,0.0,0.0,1.0,0.0,0.0,1.0,0,0.0,1.0,0.0,...,,,,,,,,,,
2,0.0,0.0,1.0,0.0,0.0,1.0,0,0.0,1.0,0.0,...,,,,,,,,,,
3,0.0,0.25,1.0,0.0,0.0,1.0,0,0.0,1.0,0.0,...,,,,,,,,,,
4,0.0,0.25,1.0,0.0,0.0,1.0,0,0.0,1.0,0.0,...,,,,,,,,,,
5,0.0,0.25,1.0,0.0,0.0,1.0,0,0.0,1.0,0.0,...,,,,,,,,,,
6,0.0,0.4,1.0,0.0,0.0,1.0,0,0.0,1.0,0.5,...,,,,,,,,,,
7,0.666667,0.4,1.0,0.0,0.0,1.0,0,0.0,1.0,0.5,...,,,,,,,,,,
8,0.4,0.571429,1.0,0.0,0.0,1.0,0,0.2,1.0,0.6,...,,,,,,,,,,
9,0.4,0.571429,1.0,0.0,0.0,1.0,0,0.2,1.0,0.6,...,,,,,,,,,,
