In [111]:
# imports
import pandas as pd
import random

import os

In [112]:
# read in decklist
# WE ARE RUNNING 21 SWAMPS

decklist = pd.read_csv(os.path.join('data', 'gonti,_night_minister.csv'))

sub_ndxs = [0,1,2,3]

decklist.iloc[sub_ndxs,:]

Unnamed: 0.1,Unnamed: 0,name,mana_cost,cmc,colors,type_line,oracle_text,produced_mana
0,77,"Shizo, Death's Storehouse",,0.0,[],Legendary Land,"{T}: Add {B}.\n{B}, {T}: Target legendary crea...",['B']
1,497,Diabolic Intent,{1}{B},2.0,['B'],Sorcery,"As an additional cost to cast this spell, sacr...",
2,533,Misinformation,{B},1.0,['B'],Instant,Put up to three target cards from an opponent'...,
3,610,Deadly Rollick,{3}{B},4.0,['B'],Instant,"If you control a commander, you may cast this ...",


## Functions

### decklist and scryfall

In [113]:
def get_decklist_info(card_data:pd.DataFrame, decklist:pd.DataFrame):
    """Map card data to a decklist"""
    return card_data[card_data['name'].isin(decklist['name'])]


### read decklist

In [114]:
# reading in text files
# as a heads up you need to delete the last line of this as its 

def read_decklist_txt(path) -> pd.DataFrame:
    with open(path, 'r') as f:
        string = f.read()
        string = string.split('\n')
        # first index is number
        numbers = [i.split(' ')[0] for i in string]

        # read categories

        cats = [i[i.find('['):i.find(']')+1].strip('[]')
                for i in string]
        cats = [i.split(',') for i in cats]

        # even out number of categories
        max_cat_len = max([len(c) for c in cats])
        cats = [i + (['NA'] * (max_cat_len-len(i)))
                for i in cats]

        # read color tags

        tags = [i[i.find('^'):-1].strip('^') for i in string]
        tags = [i.split(',')[0] for i in tags] # drop colors

        # get names
        names = [i[i.find(' ')+1:i.find('[')-1] for i in string]

        columns = sum(
            [['name'], [f'cat_{ndx}' for ndx in range(max_cat_len)],
             ['tags'], ['no']], []
        )

        decklist = pd.DataFrame(
            [sum([[names[ndx]], cats[ndx], [tags[ndx]], [numbers[ndx]]], [])
             for ndx in range(len(string))],
             columns=columns
        )

        return decklist

### Categories

In [115]:
# get unique values from cat columns

# vertically stack cats


# NEED TO DO THIS FOR EACH category 
# card_data.groupby('hand_no').value_counts(['cat_0', 'cat_1', 'cat_2']) # can't group together

def get_archidekt_categories(card_data:pd.DataFrame, prefix:str='cat') -> list[str]:
    """pull out column names with a specified prefix"""
    categories = [i for i in card_data.columns.tolist() if prefix in i]
    categories = [i for i in categories if i.split('_')[0] == prefix]

    return categories


def get_unique_categories(card_data:pd.DataFrame) -> list[str]:

    # intialize series
    cats = pd.Series()

    # iterate through categories and concatanate verical axis
    for cat_i in get_archidekt_categories(card_data):
        # cats = pd.concat([cats, ])
        cats = pd.concat([cats, card_data[cat_i]])
    

    # return unique values 
    return pd.unique(cats).tolist()


### Sampling

In [116]:
def sample_cardlist(cardlist:pd.DataFrame, decksize:int=100):
    return cardlist.iloc[random.sample(
        cardlist.index.get_level_values(1).tolist(), k=decksize
    )]

def shuffle(cardlist:pd.DataFrame):
    return cardlist.sample(cardlist.shape[0], replace=False)



### labeling

In [None]:

# make strings into integer vectors

# INTENDED FUNCTIONALITY

def map_vector_to_string(card_data:pd.DataFrame, categories:list[str]) -> pd.DataFrame:
    """returns vector representations of card categories"""

    # initialize empty mapping vector
    empty_vector = [[0 for i in range(len(categories))]
                    for j in range(len(categories))]
    
    # map string labels to a dictionary 
    # whose keys are the label name and the integer is the ndx
    category_mappings = {
        cat:ndx for (ndx, cat) in enumerate(categories)
    }
    
    # add ndx mappings from category_mappings
    for ndx, cat in enumerate(categories):
        empty_vector[ndx][category_mappings[cat]] = 1

    # replace category_mapping values from being integers to lists
    category_mappings = {
        cat:empty_vector[ndx] for (ndx, cat) in enumerate(categories)
    }

    # map string categories in the card_data dataframe
    for col in get_archidekt_categories(card_data):
        card_data[col] = card_data[col].map(category_mappings)

    return card_data[get_archidekt_categories(card_data)]

def join_vector_list_columns(card_data:pd.DataFrame, categories:list[str]) -> pd.DataFrame:

    # length of category list used for slicing
    split_step = len(categories)
    # create list to iterate over for slicing
    split_values = range(0, card_data['cat_0'].explode().shape[0], split_step)


    # we could probably initialize this empty so that it would be cleaner
    category_counts = card_data['cat_0'].explode().tolist()
    category_counts = pd.DataFrame([category_counts[i:i+split_step] 
                                for i in split_values], 
                                columns=categories)

    # iterage over category columns, summing the respective rows
    for cat in get_archidekt_categories(card_data)[1:]:
        category_counts += pd.DataFrame(
            [card_data[cat].explode().tolist()[i:i+split_step]
            for i in split_values], columns=categories
        )

    # drop the 'NA' column
    category_counts = category_counts.drop(['NA'], axis=1)

    return category_counts

def expand_categories(card_data:pd.DataFrame) -> pd.DataFrame:
    unique_categories = get_unique_categories(card_data)
    
    categories = join_vector_list_columns(
        map_vector_to_string(card_data, unique_categories), 
        unique_categories
    )
    categories.index = card_data.index

    return categories

## Running Code

In [118]:

decklist = read_decklist_txt(os.path.join('data', 'narser.txt'))
decklist.index.name = 'decklist_ndx'

# read in card_data from json
card_path = os.path.join('data', 'oracle-cards-20250414210533.json')
card_data = pd.read_json(card_path)

card_data.shape

(34504, 82)

In [119]:
# label indices
card_data.index.names = ['scryfall_ndx']

# match global card data to decklist
card_data = get_decklist_info(card_data, decklist).sort_values('name')

card_data = card_data[card_data.duplicated(subset=['name']) != True]

# make multidex for merging
card_data.index = pd.MultiIndex.from_tuples(
    [(card_data.index[i], i) 
     for i in range(card_data.shape[0])],
    names=['scryfall_ndx','decklist_ndx']
)

card_data.shape, decklist.shape

((208, 82), (208, 7))

In [120]:
# join decklist and card_data
# # add card_data indices to decklist

decklist.index = card_data.index

card_data = pd.concat([card_data, decklist], axis=1)

# card_data.head()

In [121]:
card_data.shape

(208, 89)

### add in categories

In [None]:
categories = expand_categories(card_data)

In [123]:
categories

Unnamed: 0_level_0,Unnamed: 1_level_0,Evasion,Land,Extra Combat,Pump,Extra Turn,Counter Spell,Win Condition,Ramp,Stax,Board Wipe,...,Draw,Spell Copy,Enchantment,Commander{top},Extra Triggers,Tap Lands,Artifact,Instant,Burn,Tokens
scryfall_ndx,decklist_ndx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
32006,0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28761,1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4347,2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
22113,3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25267,4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14399,203,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
26065,204,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
15394,205,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
29600,206,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


### sampling

In [124]:
def samplev2(cardlist:pd.DataFrame, draw_to:int=10, no_draws:int=10):

    # sample 100 cards

    deck = sample_cardlist(cardlist)

    # shuffle

    deck = shuffle(deck)


    samples = deck.iloc[
        sum([[random.randint(0, deck.shape[0]-1) for i in range(draw_to)]
        for j in range(no_draws)], [])
    ]

    

    samples.loc[:,'hand_no'] = sum([[i] * draw_to for i in range(no_draws)], [])
    
    samples = samples.set_index('hand_no', append=True)

    return samples

In [125]:
card_data = samplev2(card_data, draw_to=11)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * draw_to for i in range(no_draws)], [])


In [126]:
# splitting things

# need to do this at the beginning 






# generating samples

In [127]:
# need to add a way to map card types or whatever to the integers


# for col in data.columns:
#     data[col] = data[col].map(get_mappings(decklist, 'type_line'))

# data

In [128]:
# # label card types

# # lands


# for col in data.columns:
#     data.loc[data[col].str.find('Land') != -1, col] = 'land'
#     data.loc[data[col].str.find('Creature') != -1, col] = 'creature'
#     data.loc[data[col].str.find('Enchantment') != -1, col] = 'enchantment'
#     data.loc[data[col].str.find('Equipment') != -1, col] = 'equipment' 
#     data.loc[data[col].str.find('Sorcery') != -1, col] = 'sorcery'
#     data.loc[data[col].str.find('Instant') != -1, col] = 'instant'
#     data.loc[data[col].str.find('Artifact') != -1, col] = 'artifact'

# 