In [2]:
import pandas as pd
import json
import os
import string


In [3]:
# load cards

card_path = os.path.join('data', 'oracle-cards-20250414210533.json')

card_data = pd.read_json(card_path)

card_data.head()

Unnamed: 0,object,id,oracle_id,multiverse_ids,mtgo_id,tcgplayer_id,cardmarket_id,name,lang,released_at,...,card_faces,produced_mana,tcgplayer_etched_id,loyalty,life_modifier,hand_modifier,attraction_lights,color_indicator,content_warning,defense
0,card,a471b306-4941-4e46-a0cb-d92895c16f8a,00037840-6089-42ec-8c5c-281f9f474504,[692174],137223.0,615195.0,807933.0,"Nissa, Worldsoul Speaker",en,2025-02-14,...,,,,,,,,,,
1,card,86bf43b1-8d4e-4759-bb2d-0b2e03ba7012,0004ebd0-dfd6-4276-b4a6-de0003e94237,[15862],15870.0,3094.0,3081.0,Static Orb,en,2001-04-11,...,,,,,,,,,,
2,card,7050735c-b232-47a6-a342-01795bfd0d46,0006faf6-7a61-426c-9034-579f2cfcfa83,[370780],49283.0,69965.0,262945.0,Sensory Deprivation,en,2013-07-19,...,,,,,,,,,,
3,card,e718b21b-46d1-4844-985c-52745657b1ac,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,[470580],77122.0,196536.0,391692.0,Road of Return,en,2019-08-23,...,,,,,,,,,,
4,card,036ef8c9-72ac-46ce-af07-83b79d736538,000d5588-5a4c-434e-988d-396632ade42c,[83282],22609.0,12835.0,12551.0,Storm Crow,en,2005-07-29,...,,,,,,,,,,


In [4]:
# load decklist
decklist_path = os.path.join('data', 'decklist.csv')

decklist = pd.read_csv(decklist_path)

decklist.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'name', 'mana_cost', 'cmc', 'colors',
       'type_line', 'oracle_text', 'produced_mana', 'must_keeps'],
      dtype='object')

In [5]:
def get_decklist_info(card_data:pd.DataFrame, decklist:pd.DataFrame):
    """Map card data to a decklist"""
    return card_data[card_data['name'].isin(decklist['name'])]

In [6]:
# map card_data to decklist
decklist = get_decklist_info(card_data, decklist)
# filter cards columns
necessary_columns = [
    'name','mana_cost','cmc','colors', 
    'type_line','oracle_text','produced_mana'
]

# trim the decklist
decklist = decklist[necessary_columns]
decklist = decklist.drop_duplicates(['name'])

decklist.columns

Index(['name', 'mana_cost', 'cmc', 'colors', 'type_line', 'oracle_text',
       'produced_mana'],
      dtype='object')

In [7]:
# save new decklist
decklist.to_csv(decklist_path)

In [8]:
# find tricks

decklist['label1'] = None
# card_data[card_data['name'].isin(decklist['name'])]
temp = decklist[
    (decklist['type_line'] == 'Instant') &
    (decklist['oracle_text'].str.find('library') != -1.0)
]

decklist.loc[decklist['name'].isin(temp['name']), 'label1'] = 'tutor'

decklist['label1']
# iloc is a positional indexer
# the indexes from the decklist are not their direct order
# add an order feature and it should work
# decklist.iloc[temp_decklist.index,:]['label1'] = 'tutor'

77        None
497       None
533      tutor
610       None
665       None
         ...  
32507     None
32586     None
32752     None
33290     None
34116     None
Name: label1, Length: 119, dtype: object

In [9]:
# tokenize oracle_text

# index = same as decklist

# get all oracle_text

oracle_token = decklist['oracle_text'].astype(str)
oracle_token = sum([i.split() for i in oracle_token], [])
oracle_token = set([i.translate(str.maketrans('','',string.punctuation)).lower() 
                for i in oracle_token])

oracle_token_mapping = {
    list(oracle_token)[i] : i for i in range(len(oracle_token))
}

oracle_token_mapping


{'': 0,
 '2b': 1,
 '10': 2,
 'has': 3,
 'turn': 4,
 'monarch': 5,
 'were': 6,
 'other': 7,
 'at': 8,
 'that': 9,
 'nonland': 10,
 'color': 11,
 'malicious': 12,
 'looks': 13,
 'controls': 14,
 'time': 15,
 'attacks': 16,
 'chosen': 17,
 'imprint': 18,
 'nightmare': 19,
 'long': 20,
 'had': 21,
 'treasures': 22,
 'affliction': 23,
 'you': 24,
 'connives': 25,
 'equal': 26,
 'partner': 27,
 'discard': 28,
 'opponents': 29,
 'swamps': 30,
 'only': 31,
 'result': 32,
 'unless': 33,
 'beginning': 34,
 'die': 35,
 'on': 36,
 'choose': 37,
 'players': 38,
 'sources': 39,
 '5': 40,
 'next': 41,
 'nan': 42,
 'exiled': 43,
 'same': 44,
 'deals': 45,
 'enchantment': 46,
 'library': 47,
 'kind': 48,
 'total': 49,
 'starts': 50,
 'otherwise': 51,
 'cost': 52,
 'ten': 53,
 'decayed': 54,
 'end': 55,
 'by': 56,
 'fear': 57,
 'base': 58,
 'nonblack': 59,
 'name': 60,
 'six': 61,
 'stash': 62,
 'may': 63,
 'your': 64,
 'both': 65,
 'draw': 66,
 'nontoken': 67,
 'counters': 68,
 'or': 69,
 'mastermind':

In [10]:
# we can now take a dataframe of this and map

decklist['oracle_text'] = decklist['oracle_text'].astype(str)

temp_oracle = decklist['oracle_text'].apply(
    lambda x: [i.translate(str.maketrans('','',string.punctuation)).lower() 
               for i in x.split(' ')])

temp_oracle = temp_oracle.apply(lambda x: [i.strip('\\n') for i in x])

decklist['token_length'] = temp_oracle.apply(lambda x: len(x))

# temp_oracle.apply(lambda x: x.extend(['None'] * (decklist['token_length'].max()-len(x))))

temp_oracle = pd.DataFrame(temp_oracle.tolist())

for col in temp_oracle.columns:
    temp_oracle[col] = temp_oracle[col].map(oracle_token_mapping)

temp_oracle = temp_oracle.fillna(408.)

temp_oracle.astype(int).value_counts(sort=True)
    # print(temp_oracle[0].dot(temp_oracle[i+1]))
# decklist['token_length'].max()

0    1    2    3    4    5    6    7    8    9    10   11   12   13   14   15   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63   64   65   66   67   68   69   70   71   72   73 
103  55   367  408  192  404  228  408  148  404  288  408  311  390  254  389  256  351  388  159  236  265  78   252  385  408  311  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408  408    2
18   344  162  157  322  192  404  90   87   78   229  280  31   334  78   408  328  78   408  356  78   278  367  192  404  90   43   265  148  401  323  388  78   119  408  152  404  265  193  280  31   334  78   210  408  408  408  408  408  408  408  40