In [155]:
# imports
import pandas as pd
import random

import os

In [156]:
# read in decklist
# WE ARE RUNNING 21 SWAMPS

decklist = pd.read_csv(os.path.join('data', 'gonti,_night_minister.csv'))

sub_ndxs = [0,1,2,3]

decklist.iloc[sub_ndxs,:]

Unnamed: 0.1,Unnamed: 0,name,mana_cost,cmc,colors,type_line,oracle_text,produced_mana
0,77,"Shizo, Death's Storehouse",,0.0,[],Legendary Land,"{T}: Add {B}.\r\n{B}, {T}: Target legendary cr...",['B']
1,497,Diabolic Intent,{1}{B},2.0,['B'],Sorcery,"As an additional cost to cast this spell, sacr...",
2,533,Misinformation,{B},1.0,['B'],Instant,Put up to three target cards from an opponent'...,
3,610,Deadly Rollick,{3}{B},4.0,['B'],Instant,"If you control a commander, you may cast this ...",


# the coming refactor notes

I think that we should change this so we read in a decklist and then create 3 representations of it

## representation #1

just the decklist as it is on scryfall and read in from archidekt (the merged thing below)

## representation #2

the card categories as we have already done

## representation #3

generated stats representing the different decks and such
- starting hands
- composition


In [None]:

# maybe it initializing with an empty frame isn't the worst thing
class Decklist:
    def __init__(self, cards:pd.DataFrame=pd.DataFrame(), categories:pd.DataFrame=pd.DataFrame()):
        self.cards : pd.DataFrame = cards
        self.categories : pd.DataFrame = categories

    def read_decklist_txt(self, decklist_path:str) -> None:
        


    

## Functions

### decklist and scryfall

In [157]:
def get_decklist_info(card_data:pd.DataFrame, decklist:pd.DataFrame):
    """Map card data to a decklist"""
    return card_data[card_data['name'].isin(decklist['name'])]


### read decklist

In [158]:
# reading in text files
# as a heads up you need to delete the last line of this as its 

def read_decklist_txt(path) -> pd.DataFrame:
    with open(path, 'r') as f:
        string = f.read()
        string = string.split('\n')
        # first index is number
        numbers = [i.split(' ')[0] for i in string]

        # read categories

        cats = [i[i.find('['):i.find(']')+1].strip('[]')
                for i in string]
        cats = [i.split(',') for i in cats]

        # even out number of categories
        max_cat_len = max([len(c) for c in cats])
        cats = [i + (['NA'] * (max_cat_len-len(i)))
                for i in cats]

        # read color tags

        tags = [i[i.find('^'):-1].strip('^') for i in string]
        tags = [i.split(',')[0] for i in tags] # drop colors

        # get names
        names = [i[i.find(' ')+1:i.find('[')-1] for i in string]

        columns = sum(
            [['name'], [f'cat_{ndx}' for ndx in range(max_cat_len)],
             ['tags'], ['no']], []
        )

        decklist = pd.DataFrame(
            [sum([[names[ndx]], cats[ndx], [tags[ndx]], [numbers[ndx]]], [])
             for ndx in range(len(string))],
             columns=columns
        )

        return decklist

### Categories

In [159]:
# get unique values from cat columns

# vertically stack cats


# NEED TO DO THIS FOR EACH category 
# card_data.groupby('hand_no').value_counts(['cat_0', 'cat_1', 'cat_2']) # can't group together

def get_archidekt_categories(card_data:pd.DataFrame, prefix:str='cat') -> list[str]:
    """pull out column names with a specified prefix"""
    categories = [i for i in card_data.columns.tolist() if prefix in i]
    categories = [i for i in categories if i.split('_')[0] == prefix]

    return categories


def get_unique_categories(card_data:pd.DataFrame) -> list[str]:

    # intialize series
    cats = pd.Series()

    # iterate through categories and concatanate verical axis
    for cat_i in get_archidekt_categories(card_data):
        # cats = pd.concat([cats, ])
        cats = pd.concat([cats, card_data[cat_i]])
    

    # return unique values 
    return pd.unique(cats).tolist()


### Sampling

In [160]:
def sample_cardlist(cardlist:pd.DataFrame, decksize:int=100):
    return cardlist.iloc[random.sample(
        cardlist.index.get_level_values(1).tolist(), k=decksize
    )]

def shuffle(cardlist:pd.DataFrame):
    return cardlist.sample(cardlist.shape[0], replace=False)



### labeling

In [161]:

# make strings into integer vectors

# INTENDED FUNCTIONALITY

def map_vector_to_string(card_data:pd.DataFrame, categories:list[str]) -> pd.DataFrame:
    """returns vector representations of card categories"""

    # initialize empty mapping vector
    empty_vector = [[0 for i in range(len(categories))]
                    for j in range(len(categories))]
    
    # map string labels to a dictionary 
    # whose keys are the label name and the integer is the ndx
    category_mappings = {
        cat:ndx for (ndx, cat) in enumerate(categories)
    }
    
    # add ndx mappings from category_mappings
    for ndx, cat in enumerate(categories):
        empty_vector[ndx][category_mappings[cat]] = 1

    # replace category_mapping values from being integers to lists
    category_mappings = {
        cat:empty_vector[ndx] for (ndx, cat) in enumerate(categories)
    }

    # map string categories in the card_data dataframe
    for col in get_archidekt_categories(card_data):
        card_data[col] = card_data[col].map(category_mappings)

    return card_data[get_archidekt_categories(card_data)]

def join_vector_list_columns(card_data:pd.DataFrame, categories:list[str]) -> pd.DataFrame:

    # length of category list used for slicing
    split_step = len(categories)
    # create list to iterate over for slicing
    split_values = range(0, card_data['cat_0'].explode().shape[0], split_step)


    # we could probably initialize this empty so that it would be cleaner
    category_counts = card_data['cat_0'].explode().tolist()
    category_counts = pd.DataFrame([category_counts[i:i+split_step] 
                                for i in split_values], 
                                columns=categories)

    # iterage over category columns, summing the respective rows
    for cat in get_archidekt_categories(card_data)[1:]:
        category_counts += pd.DataFrame(
            [card_data[cat].explode().tolist()[i:i+split_step]
            for i in split_values], columns=categories
        )

    # drop the 'NA' column
    category_counts = category_counts.drop(['NA'], axis=1)

    return category_counts

def expand_categories(card_data:pd.DataFrame) -> pd.DataFrame:
    unique_categories = get_unique_categories(card_data)
    
    categories = join_vector_list_columns(
        map_vector_to_string(card_data, unique_categories), 
        unique_categories
    )
    categories.index = card_data.index

    return categories

## Running Code

### read in files

In [162]:

decklist = read_decklist_txt(os.path.join('data', 'narser.txt'))
decklist.index.name = 'decklist_ndx'

# read in card_data from json
card_path = os.path.join('data', 'oracle-cards-20250414210533.json')
card_data = pd.read_json(card_path)

card_data.shape

(34504, 82)

### set decklist from card data

In [163]:
# label indices
card_data.index.names = ['scryfall_ndx']

# match global card data to decklist
card_data = get_decklist_info(card_data, decklist).sort_values('name')

card_data = card_data[card_data.duplicated(subset=['name']) != True]

# make multidex for merging
card_data.index = pd.MultiIndex.from_tuples(
    [(card_data.index[i], i) 
     for i in range(card_data.shape[0])],
    names=['scryfall_ndx','decklist_ndx']
)

# join decklist and card_data
# # add card_data indices to decklist

# decklist and card_data shapes should be the same

decklist.index = card_data.index

card_data = pd.concat([card_data, decklist], axis=1)


### add in categories

In [164]:
categories = expand_categories(card_data)

In [165]:
categories.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Evasion,Land,Extra Combat,Pump,Extra Turn,Counter Spell,Win Condition,Ramp,Stax,Board Wipe,...,Draw,Spell Copy,Enchantment,Commander{top},Extra Triggers,Tap Lands,Artifact,Instant,Burn,Tokens
scryfall_ndx,decklist_ndx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
32006,0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28761,1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4347,2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
22113,3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25267,4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


### sampling

In [None]:
def samplev2(decklist:pd.DataFrame, 
             draw_to:int=10, 
             no_draws:int=10) -> tuple[pd.DataFrame, pd.DataFrame]:

    """
    sample a decklist by subsetting through random indexing 
    and subsequent subsampling.

    decklist: pandas dataframe representing a decklist in the format of 
        a scryfall card
    draw_to: an integer representing the 'cards drawn' 
    no_draws: an integer representing the number of subsamples to take
        from a decklist

    returns: tuple of dataframes representing the deck and the deck samples
    """

    # sample 100 cards

    deck = sample_cardlist(decklist)

    # shuffle

    deck = shuffle(deck)


    samples = deck.iloc[
        sum([[random.randint(0, deck.shape[0]-1) for i in range(draw_to)]
        for j in range(no_draws)], [])
    ]

    

    samples.loc[:,'hand_no'] = sum([[i] * draw_to for i in range(no_draws)], [])
    
    samples = samples.set_index('hand_no', append=True)


    # with the future of how this is organized I think this should return indices rather than
    # the entire dataframe
    return samples, deck

In [167]:
card_data = samplev2(card_data, draw_to=11)

  samples.loc[:,'hand_no'] = sum([[i] * draw_to for i in range(no_draws)], [])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * draw_to for i in range(no_draws)], [])


In [168]:
# splitting things

# need to do this at the beginning 



card_data


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,object,id,oracle_id,multiverse_ids,mtgo_id,tcgplayer_id,cardmarket_id,name,lang,released_at,...,color_indicator,content_warning,defense,name,cat_0,cat_1,cat_2,cat_3,tags,no
scryfall_ndx,decklist_ndx,hand_no,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
7296,30,0,card,8beb987c-1b67-4a4e-ae71-58547afad2a0,36cd2364-d113-47d1-b2c4-b088d9eb88dd,[675986],130849.0,578953.0,788639.0,Brainstorm,en,2024-09-27,...,,,,Brainstorm,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x
10821,160,0,card,673e4561-8dfd-46db-b492-878009666ac7,5097f4e6-50af-4641-909f-db44abf0ce32,[679169],133752.0,591815.0,797328.0,Rite of the Dragoncaller,en,2024-11-15,...,,,,Rite of the Dragoncaller,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x
29904,120,0,card,56001a36-126b-4c08-af98-a6cc4d84210e,de2440de-e948-4811-903c-0bbe376ff64d,[489948],82584.0,218602.0,484079.0,Mox Opal,en,2020-08-07,...,,,,Mox Opal,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x
23424,128,0,card,de4b0d5f-1071-4030-be16-2b4dadbdf9e9,aeaf7768-ed67-487d-8506-e667babb88ba,[625336],114203.0,505743.0,722952.0,"Narset, Enlightened Master",en,2023-08-04,...,,,,"Narset, Enlightened Master","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x
30510,143,0,card,61fc78e0-d0db-4ecb-a29a-f1a437d7987c,e2b472dd-047d-47eb-9ebb-df6aa4b52dd4,[696539],138707.0,624212.0,818945.0,Perilous Landscape,en,2025-04-11,...,,,,Perilous Landscape,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18487,33,9,card,f87d0298-f76b-4ea8-82c4-5861c4539d8b,89aa65d9-2502-40b0-90b6-b25a8e9f6155,[622687],115257.0,484941.0,721727.0,Capture of Jingzhou,en,2023-08-04,...,,,,Capture of Jingzhou,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x
28041,64,9,card,f7f3dd95-bd14-4e0f-a388-444f9cf1b0dc,d09c9cba-fdd2-479b-ad5d-d05181c3e3f9,[622702],115287.0,504561.0,722524.0,Fierce Guardianship,en,2023-08-04,...,,,,Fierce Guardianship,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x
5725,58,9,card,612beb8f-2ab1-4a8b-84c5-c47d19d400ab,2adbb56a-45e9-4fbe-b586-3488ef8014a3,[366411],47783.0,67317.0,259801.0,Enter the Infinite,en,2013-02-01,...,,,,Enter the Infinite,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x
15417,135,9,card,d33d91d0-1506-45e4-9def-975bf901815e,730e39e6-c61d-48b5-8827-bfd952bf1be7,[679903],133348.0,557923.0,795123.0,Omniscience,en,2024-11-15,...,,,,Omniscience,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",,1x


# generating samples

In [169]:
# need to add a way to map card types or whatever to the integers


# for col in data.columns:
#     data[col] = data[col].map(get_mappings(decklist, 'type_line'))

# data

In [170]:
# # label card types

# # lands


# for col in data.columns:
#     data.loc[data[col].str.find('Land') != -1, col] = 'land'
#     data.loc[data[col].str.find('Creature') != -1, col] = 'creature'
#     data.loc[data[col].str.find('Enchantment') != -1, col] = 'enchantment'
#     data.loc[data[col].str.find('Equipment') != -1, col] = 'equipment' 
#     data.loc[data[col].str.find('Sorcery') != -1, col] = 'sorcery'
#     data.loc[data[col].str.find('Instant') != -1, col] = 'instant'
#     data.loc[data[col].str.find('Artifact') != -1, col] = 'artifact'

# 