In [1]:
import pandas as pd
import json
from collections import Counter

# Data files are formatted in a weird way, this function helps read them in
def read_sts_file(arr, cap=1000):
    df = pd.DataFrame()
    nm = 0
    for str in arr:
        if nm>cap:
            break
        with open(str) as f:
            data = json.load(f)
            df = pd.concat([df, pd.DataFrame(rec['event'] for rec in data)])
            nm+=1
    
    return df

In [2]:
# This cell can be modified to load in any number of run files
# Currently, it's only reading in the file of my personal runs
#df = read_sts_file(["runs.json"])

# This line reads in a 2+ GB file of runs. This is the full dataset
df_backup = read_sts_file(["all_runs.json"])

In [24]:
df = df_backup.copy()
df.head()

Unnamed: 0,gold_per_floor,floor_reached,playtime,items_purged,score,play_id,local_time,is_ascension_mode,campfire_choices,neow_cost,...,boss_relics,items_purged_floors,is_endless,potions_floor_spawned,ascension_level,killed_by,special_seed,basemod:card_modifiers,relic_stats,daily_mods
0,"[99, 99]",0,14,[],0,6e08526d-e971-46a9-a2a3-1bbf8c8b076e,20200928145602,False,[],,...,[],[],False,[],0,,,,,
1,"[114, 114, 133, 58, 58, 58]",5,58,[Pain],54,245dfaaa-828e-4714-88f2-2117845d4ca1,20200928135602,True,[],NONE,...,[],[4],False,"[1, 3]",17,Small Slimes,,,,
2,"[114, 114, 114, 114, 134, 161, 161, 190, 190, ...",50,1804,"[Regret, Shame]",674,79dc9e53-9273-418d-b316-19c9b945eb62,20200928105601,True,"[{'data': 'Dagger Spray', 'floor': 7, 'key': '...",NONE,...,"[{'not_picked': ['Fusion Hammer', 'Velvet Chok...","[21, 45]",False,"[6, 8, 16, 22, 23, 29, 31, 39, 44, 48]",3,Donu and Deca,,,,
3,"[359, 359, 374, 385, 401, 401, 419, 434, 460, ...",55,4014,[],693,52302b6c-d316-42b5-9fc3-1a00f13e7c80,20200927225605,False,"[{'floor': 6.0, 'key': 'RECALL'}, {'floor': 15...",PERCENT_DAMAGE,...,"[{'not_picked': ['SlaversCollar', 'Coffee Drip...",[],False,"[1, 4, 7, 12, 16, 25, 28, 31, 35]",0,The Heart,0.0,,,
4,"[118, 134, 134, 134, 37, 37, 81, 99, 99, 99, 9...",28,21366,[],352,58483692-2edf-4bde-bffa-cc89d6087630,20200927225605,True,"[{'floor': 6.0, 'key': 'RECALL'}, {'data': 'Bl...",NONE,...,"[{'not_picked': ['Philosopher's Stone', 'Ectop...",[],False,"[1, 12, 18]",9,Cultist and Chosen,0.0,,,


I only want to use runs performed on Ascension 20, the hardest difficulty. We also don't want to use any runs performed in Endless Mode. It should be pretty easy to filter for that right away:

In [44]:
df = df[(df.ascension_level == 20) & (df.is_endless == False) & (df.floor_reached <= 56)]

AttributeError: 'DataFrame' object has no attribute 'is_endless'

This data is structured in a way that will require a lot of preprocessing. My goal is to eventually have a set of data for each floor of a run, and to compare the user's current deck to every other deck at that floor to see what card choices they would have made.

In [33]:
df.columns

Index(['gold_per_floor', 'floor_reached', 'playtime', 'items_purged', 'score',
       'play_id', 'local_time', 'is_ascension_mode', 'campfire_choices',
       'neow_cost', 'seed_source_timestamp', 'circlet_count', 'master_deck',
       'relics', 'potions_floor_usage', 'damage_taken', 'seed_played',
       'potions_obtained', 'is_trial', 'path_per_floor', 'character_chosen',
       'items_purchased', 'campfire_rested', 'item_purchase_floors',
       'current_hp_per_floor', 'gold', 'neow_bonus', 'is_prod', 'is_daily',
       'chose_seed', 'campfire_upgraded', 'win_rate', 'timestamp',
       'path_taken', 'build_version', 'purchased_purges', 'victory',
       'max_hp_per_floor', 'card_choices', 'player_experience',
       'relics_obtained', 'event_choices', 'is_beta', 'boss_relics',
       'items_purged_floors', 'is_endless', 'potions_floor_spawned',
       'ascension_level', 'killed_by', 'special_seed',
       'basemod:card_modifiers', 'relic_stats', 'daily_mods'],
      dtype='object')

Before we cut down on our columns, we need a list of all available cards, so we can filter for them later:

In [35]:
CARDS_LIST = []
for row in df["master_deck"]:
    for card in row:
        card = card.split("+")[0]
        if card not in CARDS_LIST:
            CARDS_LIST.append(card)

There are a whole lot of unimportant columns in this data set. I'll only be using the ones that relate to the cards in the deck.


In [36]:
cols = ["floor_reached", "character_chosen", "ascension_level",
        "items_purged", "items_purged_floors", "campfire_choices", "master_deck",
        "items_purchased", "item_purchase_floors", "card_choices", "event_choices"]
df = df[cols]
df.head()

Unnamed: 0,floor_reached,character_chosen,ascension_level,items_purged,items_purged_floors,campfire_choices,master_deck,items_purchased,item_purchase_floors,card_choices,event_choices
6,33,WATCHER,20,"[Defend_P, Defend_P]","[14, 19]","[{'data': 'Eruption', 'floor': 6.0, 'key': 'SM...","[AscendersBane, Eruption+1, Vigilance, Wallop,...",[Bag of Preparation],[19],"[{'not_picked': ['Protect', 'PathToVictory', '...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm..."
18,24,DEFECT,20,[Strike_B],[13],"[{'data': 'Zap', 'floor': 6.0, 'key': 'SMITH'}...","[AscendersBane, Strike_B, Strike_B, Strike_B, ...","[Gash, Storm, Blood Vial]","[3, 22, 22]","[{'not_picked': ['Stack', 'Turbo'], 'picked': ...","[{'damage_healed': 0.0, 'max_hp_gain': 0.0, 'm..."
30,22,DEFECT,20,[],[],"[{'floor': 7, 'key': 'REST'}, {'data': 'Doom a...","[AscendersBane, Strike_B, Strike_B, Strike_B, ...","[Orichalcum, Doom and Gloom]","[3, 3]","[{'not_picked': ['Consume', 'Rebound'], 'picke...","[{'damage_healed': 0, 'gold_gain': 150, 'playe..."
34,8,IRONCLAD,20,[],[],[],"[AscendersBane, Strike_R, Strike_R, Strike_R, ...","[Immolate, Strength Potion]","[5, 5]","[{'not_picked': ['Clothesline', 'Cleave'], 'pi...","[{'damage_healed': 0, 'gold_gain': 0, 'player_..."
38,7,IRONCLAD,20,[],[],"[{'data': 'Cleave', 'floor': 6, 'key': 'SMITH'}]","[AscendersBane, Strike_R, Strike_R, Strike_R, ...",[],[],"[{'not_picked': ['Hemokinesis', 'Havoc'], 'pic...","[{'damage_healed': 0, 'gold_gain': 75, 'player..."


These columns are all of the columns that contain any information about the state of our deck, even if it's only a tiny amount. I also have a little bit of metadata about the run, so we can select only runs on high difficulties, only runs that match the character currently being played, etc.

Unfortunately, most of these columns are still not in a particularly useful state. The `master_deck` column gives us the deck at the end of the run, and the deck at the start of the run is always the same, but we'll need to use the information in every other column to be able to reconstruct the deck at any given floor.

We'll likely want to store that in a new column after we construct it.

In [48]:
def get_purge_dict(purges, floors):
    dt = {}
    for purge, floor in zip(purges, floors):
        if floor in dt:
            dt[floor].append(purge)
        else:
            dt[floor] = [purge]
    return dt

def get_purchase_dict(purchases, floors):
    dt = {}
    for purchase, floor in zip(purchases, floors):
        # Make sure the purchase is a card, otherwise ignore it
        if purchase in CARDS_LIST:
            if floor in dt:
                dt[floor].append(purchase)
            else:
                dt[floor] = [purchase]
    return dt

def get_card_picks(choices):
    picks = {}
    for choice in choices:
        floor = int(choice["floor"])
        if floor > 56:
            # Runs should end at floor 56, so we won't count anything past that
            continue
        pick = choice["picked"]
        if pick != "SKIP":
            if floor in picks:
                picks[floor].append(pick)
            else:
                picks[floor] = [pick]
    return picks

In [49]:
df["purges"] = df[["items_purged", "items_purged_floors"]].apply(lambda x: get_purge_dict(x.items_purged, x.items_purged_floors), axis=1)
df["purges"].head()

6     {14: ['Defend_P'], 19: ['Defend_P']}
18                      {13: ['Strike_B']}
30                                      {}
34                                      {}
38                                      {}
Name: purges, dtype: object

In [50]:
df["card_purchases"] = (df[["items_purchased", "item_purchase_floors"]]
                        .apply(lambda x: get_purchase_dict(x.items_purchased, x.item_purchase_floors), axis=1))
df["card_purchases"].head()

6                               {}
18    {3: ['Gash'], 22: ['Storm']}
30         {3: ['Doom and Gloom']}
34               {5: ['Immolate']}
38                              {}
Name: card_purchases, dtype: object

In [51]:
df["card_picks"] = df["card_choices"].apply(get_card_picks)
df["card_picks"].head()

6     {4: ['DeusExMachina'], 7: ['BowlingBash'], 10:...
18    {1: ['Redo'], 5: ['Glacier'], 8: ['Reinforced ...
30    {1: ['Coolheaded'], 4: ['Leap'], 5: ['Ball Lig...
34    {1: ['Battle Trance'], 2: ['Inflame'], 4: ['Pu...
38    {1: ['Twin Strike'], 2: ['Pommel Strike'], 5: ...
Name: card_picks, dtype: object

In [52]:
def get_char_starting_deck(character, ascension):
    # Gets the deck that a character starts with on floor 0
    deck = []
    if ascension >= 10:
        deck.append("AscendersBane")
    
    if character == "Ironclad":
        deck.extend(["Strike_R"] * 5 + ["Defend_R"] * 4 + ["Bash"])
    elif character == "The_Silent":
        deck.extend(["Strike_G", "Defend_G"] * 5 + ["Neutralize", "Survivor"])
    elif character == "Defect":
        deck.extend(["Strike_B","Defend_B"] * 4 + ["Zap", "Dualcast"])
        deck.extend([] * 4)
        deck.extend(["Zap", "Dualcast"])
    elif character == "Watcher":
        deck.extend(["Strike_P", "Defend_P"] * 4 + ["Eruption", "Vigilance"])

    return deck

In [53]:
get_char_starting_deck("Ironclad", 20)

['AscendersBane',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Strike_R',
 'Defend_R',
 'Defend_R',
 'Defend_R',
 'Defend_R',
 'Bash']

# MORE PREPROCESSING GOES HERE

I also want to implement a model that uses a simple heuristic - at or before the floor the player is on, how often was each card picked vs. how often has it been seen?

In [61]:
cards_seen = {}
cards_picked = {}
for floor in range(0, 57):
    cards_seen[floor] = []
    cards_picked[floor] = []

for row in df["card_choices"]:
    for dt in row:
        seen = dt["not_picked"].copy()
        if dt["picked"] == "SKIP":
            continue
        elif dt["picked"] != "Singing Bowl":
            seen.append(dt["picked"])
            seen = [card.split("+")[0] for card in seen]
            picked = dt["picked"].split("+")[0]
            floor = int(dt["floor"])

        if floor > 56:
            continue
        cards_seen[floor].extend(seen)
        cards_picked[floor].append(picked)

for floor in range(1, 57):
    seen = cards_seen[floor-1].copy()
    seen.extend(cards_seen[floor])
    picked = cards_picked[floor-1].copy()
    picked.extend(cards_picked[floor])

    cards_seen[floor] = seen
    cards_picked[floor] = picked
    
count_seen = {}
count_picked = {}
floor_props = {}
for floor in range(0, 57):
    props = {}
    count_seen[floor] = Counter(cards_seen[floor])
    count_picked[floor] = Counter(cards_picked[floor])
    for card in CARDS_LIST:
        if card in count_picked[floor].keys() and card in count_seen[floor].keys() and count_seen[floor][card] > 0:
            #print(count_picked[floor][card])
            #print(count_seen[floor][card])
            props[card] = count_picked[floor][card] / count_seen[floor][card]
        else:
            props[card] = 0
    floor_props[floor] = props

In [62]:
df_props = pd.DataFrame.from_dict(floor_props, orient="index")
df_props.head()

Unnamed: 0,AscendersBane,Eruption,Vigilance,Wallop,DeusExMachina,BowlingBash,TalkToTheHand,LessonLearned,FlurryOfBlows,Bite,...,Secret Weapon,Discovery,PanicButton,Good Instincts,Finesse,Deep Breath,Collect,Enlightenment,Mind Blast,Violence
0,0,0,0,1.0,0.172414,0.333333,1.0,0.857143,0.625,0,...,0.10101,0.458333,0.076923,0.25,0.450549,0.253333,0.0,0.333333,0.090909,0.158371
1,0,0,0,0.81,0.172414,0.467066,0.846847,0.857143,0.491329,0,...,0.10101,0.458333,0.076923,0.25,0.450549,0.253333,0.170455,0.333333,0.090909,0.158371
2,0,0,0,0.781022,0.2,0.483296,0.843537,0.84375,0.514831,0,...,0.10101,0.458333,0.076923,0.25,0.450549,0.253333,0.15873,0.333333,0.090909,0.158371
3,0,0,0,0.813187,0.212121,0.496503,0.839378,0.857143,0.515101,0,...,0.10101,0.458333,0.076923,0.25,0.450549,0.253333,0.148387,0.333333,0.090909,0.158371
4,0,0,0,0.785714,0.358974,0.496382,0.827586,0.85,0.527397,0,...,0.10101,0.458333,0.076923,0.25,0.450549,0.253333,0.156098,0.333333,0.090909,0.158371


In [89]:
def recommend_card_at_floor(cards, floor):
    # Given a list of cards and a floor, returns the probabilities that each card would have been picked given a card reward of only those
    prob = {}
    for card in cards:
        prob[card] = df_props[card].iloc[floor]
    sum_probs = sum(prob.values())
    for key, value in prob.items():
        prob[key] = value/sum_probs
    return prob

recommend_card_at_floor(["Double Tap", "Corruption", "Bludgeon"], 15)

{'Double Tap': 0.3314917904955524,
 'Corruption': 0.29504433608991076,
 'Bludgeon': 0.37346387341453696}