In [96]:
# imports
import pandas as pd
import random

import os

In [97]:
# read in decklist
# WE ARE RUNNING 21 SWAMPS

decklist = pd.read_csv(os.path.join('data', 'gonti,_night_minister.csv'))

sub_ndxs = [0,1,2,3]

decklist.iloc[sub_ndxs,:]

Unnamed: 0.1,Unnamed: 0,name,mana_cost,cmc,colors,type_line,oracle_text,produced_mana
0,77,"Shizo, Death's Storehouse",,0.0,[],Legendary Land,"{T}: Add {B}.\n{B}, {T}: Target legendary crea...",['B']
1,497,Diabolic Intent,{1}{B},2.0,['B'],Sorcery,"As an additional cost to cast this spell, sacr...",
2,533,Misinformation,{B},1.0,['B'],Instant,Put up to three target cards from an opponent'...,
3,610,Deadly Rollick,{3}{B},4.0,['B'],Instant,"If you control a commander, you may cast this ...",


# the coming refactor notes

I think that we should change this so we read in a decklist and then create 3 representations of it

## representation #1

just the decklist as it is on scryfall and read in from archidekt (the merged thing below)

## representation #2

the card categories as we have already done

## representation #3

generated stats representing the different decks and such
- starting hands
- composition


In [98]:

# maybe it initializing with an empty frame isn't the worst thing
class Decklist:
    def __init__(self, cards:pd.DataFrame=pd.DataFrame(), categories:pd.DataFrame=pd.DataFrame()):
        self.cards : pd.DataFrame = cards
        self.categories : pd.DataFrame = categories

    def read_decklist_txt(self, decklist_path:str) -> None:
        pass


    

## Functions

### decklist and scryfall

In [99]:
def get_decklist_info(card_data:pd.DataFrame, decklist:pd.DataFrame):
    """Map card data to a decklist"""
    return card_data[card_data['name'].isin(decklist['name'])]


### read decklist

In [100]:
# reading in text files
# as a heads up you need to delete the last line of this as its 

def read_decklist_txt(path) -> pd.DataFrame:
    with open(path, 'r') as f:
        string = f.read()
        string = string.split('\n')
        # first index is number
        numbers = [i.split(' ')[0] for i in string]

        # read categories

        cats = [i[i.find('['):i.find(']')+1].strip('[]')
                for i in string]
        cats = [i.split(',') for i in cats]

        # even out number of categories
        max_cat_len = max([len(c) for c in cats])
        cats = [i + (['NA'] * (max_cat_len-len(i)))
                for i in cats]

        # read color tags

        tags = [i[i.find('^'):-1].strip('^') for i in string]
        tags = [i.split(',')[0] for i in tags] # drop colors

        # get names
        names = [i[i.find(' ')+1:i.find('[')-1] for i in string]

        columns = sum(
            [['name'], [f'cat_{ndx}' for ndx in range(max_cat_len)],
             ['tags'], ['no']], []
        )

        decklist = pd.DataFrame(
            [sum([[names[ndx]], cats[ndx], [tags[ndx]], [numbers[ndx]]], [])
             for ndx in range(len(string))],
             columns=columns
        )

        return decklist

### Categories

In [101]:
# get unique values from cat columns

# vertically stack cats


# NEED TO DO THIS FOR EACH category 
# card_data.groupby('hand_no').value_counts(['cat_0', 'cat_1', 'cat_2']) # can't group together

def get_archidekt_categories(card_data:pd.DataFrame, prefix:str='cat') -> list[str]:
    """pull out column names with a specified prefix"""
    categories = [i for i in card_data.columns.tolist() if prefix in i]
    categories = [i for i in categories if i.split('_')[0] == prefix]

    return categories


def get_unique_categories(card_data:pd.DataFrame) -> list[str]:

    # intialize series
    cats = pd.Series()

    # iterate through categories and concatanate verical axis
    for cat_i in get_archidekt_categories(card_data):
        # cats = pd.concat([cats, ])
        cats = pd.concat([cats, card_data[cat_i]])
    

    # return unique values 
    return pd.unique(cats).tolist()


### Sampling

In [102]:
def sample_cardlist(cardlist:pd.DataFrame, decksize:int=100):
    return cardlist.iloc[random.sample(
        cardlist.index.get_level_values(1).tolist(), k=decksize
    )]

def shuffle(cardlist:pd.DataFrame):
    return cardlist.sample(cardlist.shape[0], replace=False)



### labeling

In [103]:

# make strings into integer vectors

# INTENDED FUNCTIONALITY

def map_vector_to_string(card_data:pd.DataFrame, categories:list[str]) -> pd.DataFrame:
    """returns vector representations of card categories"""

    # initialize empty mapping vector
    empty_vector = [[0 for i in range(len(categories))]
                    for j in range(len(categories))]
    
    # map string labels to a dictionary 
    # whose keys are the label name and the integer is the ndx
    category_mappings = {
        cat:ndx for (ndx, cat) in enumerate(categories)
    }
    
    # add ndx mappings from category_mappings
    for ndx, cat in enumerate(categories):
        empty_vector[ndx][category_mappings[cat]] = 1

    # replace category_mapping values from being integers to lists
    category_mappings = {
        cat:empty_vector[ndx] for (ndx, cat) in enumerate(categories)
    }

    # map string categories in the card_data dataframe
    for col in get_archidekt_categories(card_data):
        card_data[col] = card_data[col].map(category_mappings)

    return card_data[get_archidekt_categories(card_data)]

def join_vector_list_columns(card_data:pd.DataFrame, categories:list[str]) -> pd.DataFrame:

    # length of category list used for slicing
    split_step = len(categories)
    # create list to iterate over for slicing
    split_values = range(0, card_data['cat_0'].explode().shape[0], split_step)


    # we could probably initialize this empty so that it would be cleaner
    category_counts = card_data['cat_0'].explode().tolist()
    category_counts = pd.DataFrame([category_counts[i:i+split_step] 
                                for i in split_values], 
                                columns=categories)

    # iterage over category columns, summing the respective rows
    for cat in get_archidekt_categories(card_data)[1:]:
        category_counts += pd.DataFrame(
            [card_data[cat].explode().tolist()[i:i+split_step]
            for i in split_values], columns=categories
        )

    # drop the 'NA' column
    category_counts = category_counts.drop(['NA'], axis=1)

    return category_counts

def expand_categories(card_data:pd.DataFrame) -> pd.DataFrame:
    unique_categories = get_unique_categories(card_data)
    
    categories = join_vector_list_columns(
        map_vector_to_string(card_data, unique_categories), 
        unique_categories
    )
    categories.index = card_data.index

    return categories

## Running Code

### read in files

In [104]:

decklist = read_decklist_txt(os.path.join('data', 'narser.txt'))
decklist.index.name = 'decklist_ndx'

# read in card_data from json
card_path = os.path.join('data', 'oracle-cards-20250414210533.json')
card_data = pd.read_json(card_path)

card_data.shape

(34504, 82)

### set decklist from card data

In [105]:
# label indices
card_data.index.names = ['scryfall_ndx']

# match global card data to decklist
card_data = get_decklist_info(card_data, decklist).sort_values('name')

card_data = card_data[card_data.duplicated(subset=['name']) != True]

# make multidex for merging
card_data.index = pd.MultiIndex.from_tuples(
    [(card_data.index[i], i) 
     for i in range(card_data.shape[0])],
    names=['scryfall_ndx','decklist_ndx']
)

# join decklist and card_data
# # add card_data indices to decklist

# decklist and card_data shapes should be the same

decklist.index = card_data.index

card_data = pd.concat([card_data, decklist], axis=1)


### add in categories

In [106]:
categories = expand_categories(card_data)

In [107]:
categories.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Evasion,Land,Extra Combat,Pump,Extra Turn,Counter Spell,Win Condition,Ramp,Stax,Board Wipe,...,Draw,Spell Copy,Enchantment,Commander{top},Extra Triggers,Tap Lands,Artifact,Instant,Burn,Tokens
scryfall_ndx,decklist_ndx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
32006,0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28761,1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4347,2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
22113,3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25267,4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


### sampling

In [108]:
def generate_random_list(random_range:tuple[int,int], list_length:int) -> list[int]:
    """
    random_range: tuple representing (minimum, maximum) 
        of random integer range
    list_length: number of random samples

    """
    return [random.randint(random_range[0], random_range[1]) for i in range(list_length)]

def generate_random_set(random_range:tuple[int, int], no_samples:int=10, no_subsamples:int=10) -> list[int]:
            return sum(
                [generate_random_list(random_range, no_subsamples) 
                 for _ in range(no_samples)], []
            )
            

def samplev2(decklist:pd.DataFrame, 
             draw_to:int=10, 
             no_draws:int=10) -> tuple[pd.DataFrame, pd.DataFrame]:

    """
    sample a decklist by subsetting through random indexing 
    and subsequent subsampling.

    decklist: pandas dataframe representing a decklist in the format of 
        a scryfall card
    draw_to: an integer representing the 'cards drawn' 
    no_draws: an integer representing the number of subsamples to take
        from a decklist

    returns: tuple of dataframes representing the deck and the deck samples
    """

    # sample 100 cards
    deck = sample_cardlist(decklist)

    # randomly sample the deck 
    samples = deck.iloc[generate_random_set((0, deck.shape[0]-1), no_draws, draw_to)]

    # group by sample number
    samples.loc[:,'hand_no'] = sum([[i] * draw_to for i in range(no_draws)], [])
    samples = samples.set_index('hand_no', append=True)


    return samples, deck

In [109]:
samples, deck = samplev2(card_data, draw_to=10, no_draws=11)

# samples.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * draw_to for i in range(no_draws)], [])


In [None]:
# sample cats

deck = sample_cardlist(categories)

no_iter = 100

samples = deck.iloc[generate_random_set((0, deck.shape[0]-1), 10, 10)]
samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(10)], [])
samples = samples.set_index('hand_no', append=True)

average = samples.groupby('hand_no').sum().sum(axis=0)
delta = samples.groupby('hand_no').sum().sum(axis=0)

deltas = []

# this hsould go to be about 0-1
for i in range(no_iter):
    # randomly sample deck
    samples = deck.iloc[generate_random_set((0, deck.shape[0]-1), 100, 10)]
    samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
    samples = samples.set_index('hand_no', append=True).groupby('hand_no').sum().sum(axis=0)

    deltas.append(
        (average - ((samples + averages) / 2) ).mean()
    )
    print(samples)

    averages += samples
    averages /= 2






A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(10)], [])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
A val

Evasion                         42
Land                           163
Extra Combat                    52
Pump                             6
Extra Turn                     123
Counter Spell                   38
Win Condition                   36
Ramp                           125
Stax                            70
Board Wipe                      34
Maybeboard{noDeck}{noPrice}     18
Sac Land                        76
Haste Enabler                   15
Protection                      45
Top Deck Manipulation           42
Sorcery                          7
Removal                        117
Tutor                           17
Draw                            19
Spell Copy                      48
Enchantment                     18
Commander{top}                   0
Extra Triggers                 147
Tap Lands                       10
Artifact                         0
Instant                         16
Burn                             8
Tokens                          14
dtype: int64
Evasion

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
A va

Evasion                         36
Land                           154
Extra Combat                    53
Pump                             8
Extra Turn                     124
Counter Spell                   48
Win Condition                   31
Ramp                           114
Stax                            72
Board Wipe                      34
Maybeboard{noDeck}{noPrice}     13
Sac Land                        66
Haste Enabler                   24
Protection                      51
Top Deck Manipulation           52
Sorcery                         15
Removal                        125
Tutor                           18
Draw                            17
Spell Copy                      32
Enchantment                     14
Commander{top}                   0
Extra Triggers                 159
Tap Lands                        9
Artifact                         0
Instant                         21
Burn                            11
Tokens                           9
dtype: int64
Evasion

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * 10 for i in range(100)], [])
A va

In [122]:
print(deltas[-10:-1])

[np.float64(-42.06326044508519), np.float64(-42.299487365399735), np.float64(-42.364029396985586), np.float64(-42.87844326992136), np.float64(-42.6535073492464), np.float64(-42.75532510319463), np.float64(-42.85980540874017), np.float64(-42.69775984722724), np.float64(-42.75959420932791)]


In [111]:
# make this a function to sample large numbers

# can make it a while loop to stop when its hit a stable state

no_draws = 1000

cat_samples, cat_deck = samplev2(categories, draw_to=7, no_draws=no_draws)

# cat_samples

# for i in range(1000):

cat_samples.groupby('hand_no').sum().sum(axis=0) / no_draws # cat_samples.groupby('hand_no').sum().sum(axis=0).sum()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  samples.loc[:,'hand_no'] = sum([[i] * draw_to for i in range(no_draws)], [])


Evasion                        0.297
Land                           1.415
Extra Combat                   0.358
Pump                           0.074
Extra Turn                     0.339
Counter Spell                  0.262
Win Condition                  0.396
Ramp                           0.852
Stax                           0.341
Board Wipe                     0.399
Maybeboard{noDeck}{noPrice}    0.080
Sac Land                       0.365
Haste Enabler                  0.374
Protection                     0.397
Top Deck Manipulation          0.341
Sorcery                        0.060
Removal                        0.817
Tutor                          0.137
Draw                           0.063
Spell Copy                     0.352
Enchantment                    0.139
Commander{top}                 0.000
Extra Triggers                 0.632
Tap Lands                      0.132
Artifact                       0.080
Instant                        0.000
Burn                           0.065
T