In [1]:
import json
import pandas as pd
import re
import math

In [2]:
with open('data/default-cards-20230612090651.json') as f:
    cards_json = json.load(f)

In [3]:
len(cards_json)

83971

In [4]:
double_faced_cards = []

for card in cards_json:
    if '//' in card['name']:
        double_faced_cards.append(card)

In [5]:
len(double_faced_cards)

2895

## Preparação do conjunto de dados

### Carga e tratamento inicial do conjunto de dados de cartas

In [6]:
pd.set_option('display.max_columns', None)

In [7]:
raw_cards = pd.read_json('data/default-cards-20230612090651.json')
print(f'Starter raw card set length ---> : {len(raw_cards.index)}')

Starter raw card set length ---> : 83971


In [8]:
card_set_drop_reprints = raw_cards.query('reprint != True')
print(f'Number of removed cards ---> : {len(raw_cards.index) - len(card_set_drop_reprints.index)}')
print(f'Current card set length ---> : {len(card_set_drop_reprints.index)}')

Number of removed cards ---> : 50057
Current card set length ---> : 33914


In [9]:
card_set_drop_digital_content = card_set_drop_reprints.query('digital != True')
print(f'Number of removed cards ---> : {len(card_set_drop_reprints.index) - len(card_set_drop_digital_content.index)}')
print(f'Current card set length ---> : {len(card_set_drop_digital_content.index)}')

Number of removed cards ---> : 746
Current card set length ---> : 33168


In [10]:
card_set_drop_tokens = card_set_drop_digital_content.query('not type_line.str.contains("Token")')
print(f'Current card set length without tokens ---> : {len(card_set_drop_tokens.index)}')

Current card set length without tokens ---> : 32535


In [11]:
card_set_drop_tokens.columns.values.tolist()

['object',
 'id',
 'oracle_id',
 'multiverse_ids',
 'mtgo_id',
 'mtgo_foil_id',
 'tcgplayer_id',
 'cardmarket_id',
 'name',
 'lang',
 'released_at',
 'uri',
 'scryfall_uri',
 'layout',
 'highres_image',
 'image_status',
 'image_uris',
 'mana_cost',
 'cmc',
 'type_line',
 'oracle_text',
 'power',
 'toughness',
 'colors',
 'color_identity',
 'keywords',
 'legalities',
 'games',
 'reserved',
 'foil',
 'nonfoil',
 'finishes',
 'oversized',
 'promo',
 'reprint',
 'variation',
 'set_id',
 'set',
 'set_name',
 'set_type',
 'set_uri',
 'set_search_uri',
 'scryfall_set_uri',
 'rulings_uri',
 'prints_search_uri',
 'collector_number',
 'digital',
 'rarity',
 'flavor_text',
 'card_back_id',
 'artist',
 'artist_ids',
 'illustration_id',
 'border_color',
 'frame',
 'full_art',
 'textless',
 'booster',
 'story_spotlight',
 'edhrec_rank',
 'penny_rank',
 'prices',
 'related_uris',
 'all_parts',
 'promo_types',
 'arena_id',
 'preview',
 'security_stamp',
 'produced_mana',
 'watermark',
 'frame_effects'

In [12]:
undesired_columns = [
    'object',
    'multiverse_ids',
    'mtgo_id',
    'mtgo_foil_id',
    'tcgplayer_id',
    'cardmarket_id',
    'name',
    'lang',
    'released_at',
    'uri',
    'scryfall_uri',
    'layout',
    'highres_image',
    'image_status',
    'image_uris',
    'games',
    'foil',
    'nonfoil',
    'finishes',
    'oversized',
    'promo',
    'variation',
    'set_id',
    'set',
    'set_name',
    'set_type',
    'set_uri',
    'set_search_uri',
    'scryfall_set_uri',
    'rulings_uri',
    'prints_search_uri',
    'collector_number',
    'card_back_id',
    'artist',
    'artist_ids',
    'illustration_id',
    'border_color',
    'frame',
    'full_art',
    'textless',
    'booster',
    'story_spotlight',
    'prices',
    'related_uris',
    'all_parts',
    'promo_types',
    'arena_id',
    'preview',
    'security_stamp',
    'watermark',
    'frame_effects',
    'card_faces',
    'tcgplayer_etched_id',
    'attraction_lights',
    'variation_of',
    'printed_type_line',
    'printed_text',
    'printed_name',
    'content_warning',
    'flavor_name',
    'reprint',
    'digital',
    'color_indicator'
]

In [13]:
card_set_cleaned_undesired_columns = card_set_drop_tokens.drop(columns=undesired_columns)
print(f'Total de colunas original ---> {card_set_drop_tokens.shape[1]} colunas')
print(f'Total de colunas após limpeza ---> {card_set_cleaned_undesired_columns.shape[1]} colunas')
print(f'Número de colunas removidas ---> {len(undesired_columns)} colunas')


Total de colunas original ---> 84 colunas
Total de colunas após limpeza ---> 21 colunas
Número de colunas removidas ---> 63 colunas


In [14]:
card_set_cleaned_undesired_columns.columns.values.tolist()

['id',
 'oracle_id',
 'mana_cost',
 'cmc',
 'type_line',
 'oracle_text',
 'power',
 'toughness',
 'colors',
 'color_identity',
 'keywords',
 'legalities',
 'reserved',
 'rarity',
 'flavor_text',
 'edhrec_rank',
 'penny_rank',
 'produced_mana',
 'loyalty',
 'life_modifier',
 'hand_modifier']

In [15]:
card_set_without_duplicates = card_set_cleaned_undesired_columns.drop_duplicates(subset='oracle_id')
print(f'Total de cartas ANTES da eliminação das duplicatas ---> {len(card_set_cleaned_undesired_columns)}')
print(f'Total de cartas APÓS da eliminação das duplicatas ---> {len(card_set_without_duplicates)}')
print(f'número de duplicatas removidas ---> {len(card_set_cleaned_undesired_columns)-len(card_set_without_duplicates)}')

Total de cartas ANTES da eliminação das duplicatas ---> 32535
Total de cartas APÓS da eliminação das duplicatas ---> 27736
número de duplicatas removidas ---> 4799


In [16]:
mtg_card_types = [
    'Artifact',
    'Battle ',
    'Creature',
    'Enchantment',
    'Land',
    'Planeswalker',
    'Instant',
    'Sorcery'
]

card_set_standardized_types = card_set_without_duplicates[card_set_without_duplicates['type_line'].str.contains('|'.join(mtg_card_types))]
print(f'Total de cartas ANTES da eliminação de tipos não desejados ---> {len(card_set_without_duplicates)}')
print(f'Total de cartas APÓS da eliminação de tipos não desejados ---> {len(card_set_standardized_types)}')
print(f'Número de cartas com tipos não desejados removidas ---> {len(card_set_without_duplicates)-len(card_set_standardized_types)}')

Total de cartas ANTES da eliminação de tipos não desejados ---> 27736
Total de cartas APÓS da eliminação de tipos não desejados ---> 26041
Número de cartas com tipos não desejados removidas ---> 1695


In [17]:
double_faced_cards_dataset = card_set_standardized_types[card_set_standardized_types['type_line'].str.contains('//')]
double_faced_cards_dataset.shape

(544, 21)

In [18]:
def getDoublefacedCardData(card_id):
    card = next(x for x in cards_json if x['id'] == card_id)

    front_face = card['card_faces'][0]
    back_face = card['card_faces'][1]
    
    return (
        {
            'id': card['id'],
            'oracle_id': card['oracle_id'],
            'mana_cost': front_face['mana_cost'] if 'mana_cost' in front_face else None,
            'cmc': card['cmc'],
            'type_line': front_face['type_line'],
            'oracle_text': front_face['oracle_text'] if 'oracle_text' in front_face else None,
            'power': front_face['power'] if 'power' in front_face else None,
            'toughness': front_face['toughness'] if 'toughness' in front_face else None,
            'colors': front_face['colors'] if 'colors' in front_face else None,
            'color_identity': card['color_identity'],
            'keywords': card['keywords'],
            'legalities': card['legalities'],
            'reserved': card['reserved'],
            'rarity': card['rarity'],
            'flavor_text': front_face['flavor_text'] if 'flavor_text' in front_face else None,
            'produced_mana': front_face['loyalty'] if 'loyalty' in front_face else None,
            'loyalty': front_face['loyalty'] if 'loyalty' in front_face else None,
            'life_modifier': front_face['life_modifier'] if 'life_modifier' in front_face else None,
            'hand_modifier': front_face['hand_modifier'] if 'hand_modifier' in front_face else None
        },
        {
            'id': card['id'],
            'oracle_id': card['oracle_id'],
            'mana_cost': back_face['mana_cost'] if 'mana_cost' in back_face else None,
            'cmc': card['cmc'],
            'type_line': back_face['type_line'],
            'oracle_text': back_face['oracle_text'] if 'oracle_text' in back_face else None,
            'power': back_face['power'] if 'power' in back_face else None,
            'toughness': back_face['toughness'] if 'toughness' in back_face else None,
            'colors': back_face['colors'] if 'colors' in back_face else None,
            'color_identity': card['color_identity'],
            'keywords': card['keywords'],
            'legalities': card['legalities'],
            'reserved': card['reserved'],
            'rarity': card['rarity'],
            'flavor_text': back_face['flavor_text'] if 'flavor_text' in back_face else None,
            'produced_mana': back_face['loyalty'] if 'loyalty' in back_face else None,
            'loyalty': back_face['loyalty'] if 'loyalty' in back_face else None,
            'life_modifier': back_face['life_modifier'] if 'life_modifier' in back_face else None,
            'hand_modifier': back_face['hand_modifier'] if 'hand_modifier' in back_face else None
        }
    )


In [19]:
double_faced_cards_id_list = double_faced_cards_dataset.id.tolist()
print(f'Total de cartas com face dupla ---> { len(double_faced_cards_id_list)} cartas')

Total de cartas com face dupla ---> 544 cartas


In [20]:
double_faced_cards_splited = []

for id in double_faced_cards_id_list:
    double_faced_cards_splited.extend(getDoublefacedCardData(id))

print(f'Montante de cartas a ser adicionado ao dataframe ---> {len(double_faced_cards_splited)} cartas')

Montante de cartas a ser adicionado ao dataframe ---> 1088 cartas


In [21]:
# print(f'{dropped_double_faced_cards_dataset}')

cards_to_drop = card_set_standardized_types.loc[card_set_standardized_types['id'].isin(double_faced_cards_id_list)].index
total_cards_to_drop = len(cards_to_drop)
print(f'Número de registros a serem removidos ---> {total_cards_to_drop} cartas')

dropped_double_faced_cards_dataset = card_set_standardized_types.drop(cards_to_drop)

total_cards_after_drop  = len(dropped_double_faced_cards_dataset.index)
total_cards_before_drop = len(card_set_standardized_types.index)

print(f'Total de cartas ANTES da remoção das cartas com face dupla ---> {total_cards_before_drop} cartas')
print(f'Total de cartas APÓS da remoção das cartas com face dupla ---> {total_cards_after_drop} cartas')
print(f'Total de cartas cartas face dupla removidas ---> { total_cards_before_drop - total_cards_after_drop} cartas')

dropped_double_faced_cards_dataset._append(double_faced_cards)
dropped_double_faced_cards_dataset.shape

Número de registros a serem removidos ---> 544 cartas
Total de cartas ANTES da remoção das cartas com face dupla ---> 26041 cartas
Total de cartas APÓS da remoção das cartas com face dupla ---> 25497 cartas
Total de cartas cartas face dupla removidas ---> 544 cartas


(25497, 21)

In [22]:
final_cards_dataset = dropped_double_faced_cards_dataset.copy()
final_cards_dataset.shape

(25497, 21)

### Carga e tratamento do conjunto de dados de regras expandidas

In [23]:
rules = pd.read_json('data/rulings-20230612090033.json')
rules

Unnamed: 0,object,oracle_id,source,published_at,comment
0,ruling,0004ebd0-dfd6-4276-b4a6-de0003e94237,wotc,2004-10-04,"If there are two of these on the battlefield, ..."
1,ruling,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,wotc,2019-08-23,The “commander tax” increases based on how man...
2,ruling,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,wotc,2019-08-23,Certain cards in other sets with the partner k...
3,ruling,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,wotc,2019-08-23,If your commander isn’t in the command zone (o...
4,ruling,000e5d65-96c3-498b-bd01-72b1a1991850,wotc,2004-10-04,The target loses just one of the listed abilit...
...,...,...,...,...,...
52331,ruling,fffdc2ac-bde4-4e4c-a5bd-0e6c6e49ad91,wotc,2017-07-14,You divide the damage as you activate Samut’s ...
52332,ruling,ffff90c3-63c4-4dee-a21d-6b2b113f4f80,wotc,2013-07-01,"Abilities that Slivers grant, as well as power..."
52333,ruling,ffff90c3-63c4-4dee-a21d-6b2b113f4f80,wotc,2013-07-01,If the creature type of a Sliver changes so it...
52334,ruling,ffff90c3-63c4-4dee-a21d-6b2b113f4f80,wotc,2021-03-19,Because damage remains marked on a creature un...


In [24]:
cleaned_rules = rules.drop(columns=['object', 'source', 'published_at'])
cleaned_rules

Unnamed: 0,oracle_id,comment
0,0004ebd0-dfd6-4276-b4a6-de0003e94237,"If there are two of these on the battlefield, ..."
1,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,The “commander tax” increases based on how man...
2,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,Certain cards in other sets with the partner k...
3,0007c283-5b7a-4c00-9ca1-b455c8dff8c3,If your commander isn’t in the command zone (o...
4,000e5d65-96c3-498b-bd01-72b1a1991850,The target loses just one of the listed abilit...
...,...,...
52331,fffdc2ac-bde4-4e4c-a5bd-0e6c6e49ad91,You divide the damage as you activate Samut’s ...
52332,ffff90c3-63c4-4dee-a21d-6b2b113f4f80,"Abilities that Slivers grant, as well as power..."
52333,ffff90c3-63c4-4dee-a21d-6b2b113f4f80,If the creature type of a Sliver changes so it...
52334,ffff90c3-63c4-4dee-a21d-6b2b113f4f80,Because damage remains marked on a creature un...


In [25]:
grouped_rules = cleaned_rules.groupby(['oracle_id']).agg(lambda x: x.tolist())
grouped_rules.shape

(14861, 1)

In [26]:
final_rules_dataset = grouped_rules.copy()

## Mesclagem dos conjuntos de dados

In [27]:
merged_dataset = final_cards_dataset.merge(final_rules_dataset, on='oracle_id', how='left')
merged_dataset.shape

(25497, 22)

In [28]:
merged_dataset

Unnamed: 0,id,oracle_id,mana_cost,cmc,type_line,oracle_text,power,toughness,colors,color_identity,keywords,legalities,reserved,rarity,flavor_text,edhrec_rank,penny_rank,produced_mana,loyalty,life_modifier,hand_modifier,comment
0,0000579f-7b35-4ed3-b44c-db2a538066fe,44623693-51d6-49ad-8cd7-140505caf02f,{5}{R},6.0,Creature — Sliver,All Sliver creatures have double strike.,3,3,[R],[R],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"""A rift opened, and our arrows were abruptly s...",6148.0,11341.0,,,,,"[Abilities that Slivers grant, as well as powe..."
1,00006596-1166-4a79-8443-ca9f82e6db4e,8ae3562f-28b7-4462-96ed-be0cf7052ccc,{W}{W},2.0,Creature — Kor Soldier,"When Kor Outfitter enters the battlefield, you...",2,2,[W],[W],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,"""We take only what we need to survive. Believe...",15131.0,4905.0,,,,,[You may target Kor Outfitter with its ability...
2,0000cd57-91fe-411f-b798-646e965eec37,9f0d82ae-38bf-45d8-8cda-982b6ead1d72,{2}{U},3.0,Creature — Siren Pirate,Flying\nWhen Siren Lookout enters the battlefi...,1,2,[U],[U],"[Flying, Explore]","{'standard': 'not_legal', 'future': 'not_legal...",False,common,,13541.0,8653.0,,,,,[If a resolving spell or ability instructs a s...
3,0001f1ef-b957-4a55-b47f-14839cdbab6f,ef027846-be81-4959-a6b5-56bd01b1e68a,{W},1.0,Creature — Human Knight,"When Venerable Knight dies, put a +1/+1 counte...",2,1,[W],[W],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"""May this blade guide you on your great journe...",13135.0,1504.0,,,,,[If a Knight you control is dealt lethal damag...
4,0002ab72-834b-4c81-82b1-0d2760ea96b0,645b5784-a6f7-4cf3-966a-e1a51420b96b,{2}{U},3.0,Creature — Fish,"Whenever you draw your second card each turn, ...",3,1,[U],[U],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,The problem wasn't that fish had learned how t...,22283.0,,,,,,[If a spell or ability causes you to put a car...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25492,ffeca7a7-2a14-4166-9e89-0f4eb94b79f5,9ef24a79-fade-4c38-b63c-22983206b4ad,{1}{G},2.0,Creature — Centaur Shaman,Enchantment spells you cast cost {1} less to c...,2,2,[G],[G],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,rare,The distinction of bearing the gods' banner is...,1323.0,3194.0,,,,,[Herald of the Pantheon can reduce alternative...
25493,fff118a5-765b-4ba1-8f12-ce6f24b2459b,82c5ec8e-27be-475a-9921-ad61209fd022,,0.0,Land,"{T}: Add {C}.\n{1}, {T}: Add one mana of any c...",,,[],[],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,Philosophers speak of a place where myths wash...,4668.0,3086.0,"[B, C, G, R, U, W]",,,,
25494,fff5a09e-9276-44b8-b374-4b84aebd47cc,b33d2b77-f068-4fdc-a1a8-0353a35e3973,{3}{U/P},4.0,Sorcery,({U/P} can be paid with either {U} or 2 life.)...,,,[U],[U],[Proliferate],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,,796.0,2168.0,,,,,"[If you're at 1 life or less, you can't pay 2 ..."
25495,fff9989f-77a3-4f73-ade6-c04306c98501,e2fe1847-4003-41c5-a64b-03afd888b81e,{3}{B}{B},5.0,Creature — Spirit,Morbid — When Morkrut Banshee enters the battl...,4,4,[B],[B],[Morbid],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"""Let go your grudges, or risk wandering the bo...",18503.0,12122.0,,,,,[If there are no other creatures on the battle...


In [29]:
final_merged_dataset = merged_dataset.drop(columns=['id', 'oracle_id'])
final_merged_dataset.shape

(25497, 20)

In [30]:
final_merged_dataset.head(20)

Unnamed: 0,mana_cost,cmc,type_line,oracle_text,power,toughness,colors,color_identity,keywords,legalities,reserved,rarity,flavor_text,edhrec_rank,penny_rank,produced_mana,loyalty,life_modifier,hand_modifier,comment
0,{5}{R},6.0,Creature — Sliver,All Sliver creatures have double strike.,3.0,3.0,[R],[R],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"""A rift opened, and our arrows were abruptly s...",6148.0,11341.0,,,,,"[Abilities that Slivers grant, as well as powe..."
1,{W}{W},2.0,Creature — Kor Soldier,"When Kor Outfitter enters the battlefield, you...",2.0,2.0,[W],[W],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,"""We take only what we need to survive. Believe...",15131.0,4905.0,,,,,[You may target Kor Outfitter with its ability...
2,{2}{U},3.0,Creature — Siren Pirate,Flying\nWhen Siren Lookout enters the battlefi...,1.0,2.0,[U],[U],"[Flying, Explore]","{'standard': 'not_legal', 'future': 'not_legal...",False,common,,13541.0,8653.0,,,,,[If a resolving spell or ability instructs a s...
3,{W},1.0,Creature — Human Knight,"When Venerable Knight dies, put a +1/+1 counte...",2.0,1.0,[W],[W],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"""May this blade guide you on your great journe...",13135.0,1504.0,,,,,[If a Knight you control is dealt lethal damag...
4,{2}{U},3.0,Creature — Fish,"Whenever you draw your second card each turn, ...",3.0,1.0,[U],[U],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,The problem wasn't that fish had learned how t...,22283.0,,,,,,[If a spell or ability causes you to put a car...
5,{1}{U},2.0,Creature — Bird Wizard,Kicker {R} (You may pay an additional {R} as y...,2.0,1.0,[U],"[R, U]","[Kicker, Flying]","{'standard': 'legal', 'future': 'legal', 'hist...",False,uncommon,,19347.0,7832.0,,,,,
6,{2}{B},3.0,Creature — Snake Wall,Defender (This creature can't attack.)\n{3}: D...,2.0,4.0,[B],[B],[Defender],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"What wall can never be climbed, but is always ...",23030.0,,,,,,
7,{5}{G},6.0,Creature — Lizard Hydra,Reach\nWhen Whiptongue Hydra enters the battle...,4.0,4.0,[G],[G],[Reach],"{'standard': 'not_legal', 'future': 'not_legal...",False,rare,"""Where'd all the birds go?""\n—Kaldrin, jungle ...",4075.0,,,,,,[A creature with indestructible or a creature ...
8,{2}{G},3.0,Creature — Human Monk,Reach\nWar Historian has indestructible as lon...,3.0,3.0,[G],[G],[Reach],"{'standard': 'legal', 'future': 'legal', 'hist...",False,common,All children on Kamigawa learn of the Kami War...,23200.0,,,,,,[War Historian’s last ability starts to apply ...
9,{1}{B}{B},3.0,Creature — Vampire Assassin,Lifelink\nWhen Blood Operative enters the batt...,3.0,1.0,[B],[B],[Lifelink],"{'standard': 'not_legal', 'future': 'not_legal...",False,rare,,13992.0,7571.0,,,,,[An ability that triggers “whenever you survei...


## Pré-processamento do dataset

### Correção do tipo

In [31]:
# final_merged_dataset[['type', 'subtype']] = final_merged_dataset['type_line'].str.split(' — ', expand=True)
# final_merged_dataset
splitted_type_dataset = final_merged_dataset.copy()
splitted_type_dataset[['type', 'subtype']] = splitted_type_dataset['type_line'].str.split('—', expand=True)
splitted_type_dataset.shape

(25497, 22)

In [32]:
# supertypes = [
# 'Basic',
# 'Legendary',
# 'Snow',
# 'World'
# ]

# # card_set_standardized_types = card_set_without_duplicates[card_set_without_duplicates['type_line'].str.contains('|'.join(mtg_card_types))]

# test[test['type'].str.contains('|'.join(supertypes))]

In [33]:
splitted_type_dataset.drop(columns=[ 'type_line'], inplace=True)
splitted_type_dataset

Unnamed: 0,mana_cost,cmc,oracle_text,power,toughness,colors,color_identity,keywords,legalities,reserved,rarity,flavor_text,edhrec_rank,penny_rank,produced_mana,loyalty,life_modifier,hand_modifier,comment,type,subtype
0,{5}{R},6.0,All Sliver creatures have double strike.,3,3,[R],[R],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"""A rift opened, and our arrows were abruptly s...",6148.0,11341.0,,,,,"[Abilities that Slivers grant, as well as powe...",Creature,Sliver
1,{W}{W},2.0,"When Kor Outfitter enters the battlefield, you...",2,2,[W],[W],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,"""We take only what we need to survive. Believe...",15131.0,4905.0,,,,,[You may target Kor Outfitter with its ability...,Creature,Kor Soldier
2,{2}{U},3.0,Flying\nWhen Siren Lookout enters the battlefi...,1,2,[U],[U],"[Flying, Explore]","{'standard': 'not_legal', 'future': 'not_legal...",False,common,,13541.0,8653.0,,,,,[If a resolving spell or ability instructs a s...,Creature,Siren Pirate
3,{W},1.0,"When Venerable Knight dies, put a +1/+1 counte...",2,1,[W],[W],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"""May this blade guide you on your great journe...",13135.0,1504.0,,,,,[If a Knight you control is dealt lethal damag...,Creature,Human Knight
4,{2}{U},3.0,"Whenever you draw your second card each turn, ...",3,1,[U],[U],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,The problem wasn't that fish had learned how t...,22283.0,,,,,,[If a spell or ability causes you to put a car...,Creature,Fish
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25492,{1}{G},2.0,Enchantment spells you cast cost {1} less to c...,2,2,[G],[G],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,rare,The distinction of bearing the gods' banner is...,1323.0,3194.0,,,,,[Herald of the Pantheon can reduce alternative...,Creature,Centaur Shaman
25493,,0.0,"{T}: Add {C}.\n{1}, {T}: Add one mana of any c...",,,[],[],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,Philosophers speak of a place where myths wash...,4668.0,3086.0,"[B, C, G, R, U, W]",,,,,Land,
25494,{3}{U/P},4.0,({U/P} can be paid with either {U} or 2 life.)...,,,[U],[U],[Proliferate],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,,796.0,2168.0,,,,,"[If you're at 1 life or less, you can't pay 2 ...",Sorcery,
25495,{3}{B}{B},5.0,Morbid — When Morkrut Banshee enters the battl...,4,4,[B],[B],[Morbid],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,"""Let go your grudges, or risk wandering the bo...",18503.0,12122.0,,,,,[If there are no other creatures on the battle...,Creature,Spirit


### Cálculo de devoção

In [34]:
def calcDevotion(devotion):

    if not devotion:
        return 0

    devotion = str(devotion)

    devotions = []
    devotions.append(len(re.findall("[w]", devotion, re.IGNORECASE)))
    devotions.append(len(re.findall("[r]", devotion, re.IGNORECASE)))
    devotions.append(len(re.findall("[u]", devotion, re.IGNORECASE)))
    devotions.append(len(re.findall("[g]", devotion, re.IGNORECASE)))
    devotions.append(len(re.findall("[b]", devotion, re.IGNORECASE)))

    return max(devotions)

In [35]:
devotion_data_set = splitted_type_dataset.copy()
devotion_data_set['devotion'] = devotion_data_set.apply(lambda x : calcDevotion(x.mana_cost), axis=1)
devotion_data_set.shape

(25497, 22)

### Cálculo de mana phyrexiano

In [36]:
def checkPhyrexianMana(manaCost):

    if not manaCost:
        return 0

    return 1 if re.search('P', manaCost) else 0


In [37]:
phyrexian_mana_data_set = devotion_data_set.copy()
phyrexian_mana_data_set['phyrexian_mana'] = phyrexian_mana_data_set.apply(lambda x : checkPhyrexianMana(x.mana_cost), axis=1)
phyrexian_mana_data_set.drop(columns='mana_cost', inplace=True)
phyrexian_mana_data_set.shape

(25497, 22)

### Cálculo do _flavor text_

In [38]:
def calcTextLen(txt):
    
    if isinstance(txt, str):
        return len(txt)

    return 0

In [39]:
flavor_data_set = phyrexian_mana_data_set.copy()
flavor_data_set['flavor_size'] = flavor_data_set.apply(lambda x : calcTextLen(x.flavor_text), axis=1)
flavor_data_set.drop(columns=['flavor_text'], inplace=True)
flavor_data_set.shape

(25497, 22)

### Cálculo do número de habilidades

In [40]:
def calcAbilities(key_words: list, oracle_text: str):

    if key_words and not oracle_text:
        return len(key_words)

    if oracle_text and not key_words:
        return 1

    if key_words and oracle_text:
        for word in key_words:
            ¡

    return 0

In [50]:
text = 'Flying\nWhen Siren Lookout enters the battlefield, it explores.'
pattern = ['flying','explore']
len(re.match(pattern, text))

TypeError: unhashable type: 'list'

In [41]:
abilities_dataset = flavor_data_set.copy()
abilities_dataset['abilities'] = abilities_dataset.apply(lambda row: calcAbilities(row['keywords']), axis=1)
abilities_dataset.drop(columns=['keywords'], inplace=True)
abilities_dataset.shape

(25497, 22)

### Cálculo de produção de mana

Aqui haviam duas abordagens: 

1. Assumir um booleano que representa se uma carta é ou não capaz de produzir mana;
2. Definir quantas cores diferentes de mana uma dada carta pode gerar (sendo, portanto, um valor numérico - e não lógico como acima - que pode assumir o intervalo entre 0 e 5, inclusos)

Num primeiro momento, optou-se pela abordagem 1.

In [42]:
def canProduceMana(producedMana):

    if isinstance(producedMana, list):
        return 1 if len(producedMana) > 0 else 0

    return 0 if not producedMana or math.isnan(producedMana) else 0


In [46]:
produced_mana_dataset = abilities_dataset.copy()
produced_mana_dataset['produced_mana'] = produced_mana_dataset.apply(lambda row: canProduceMana(row['produced_mana']), axis=1)
produced_mana_dataset

Unnamed: 0,cmc,oracle_text,power,toughness,colors,color_identity,legalities,reserved,rarity,edhrec_rank,penny_rank,produced_mana,loyalty,life_modifier,hand_modifier,comment,type,subtype,devotion,phyrexian_mana,flavor_size,abilities
0,6.0,All Sliver creatures have double strike.,3,3,[R],[R],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,6148.0,11341.0,0,,,,"[Abilities that Slivers grant, as well as powe...",Creature,Sliver,1,0,228,0
1,2.0,"When Kor Outfitter enters the battlefield, you...",2,2,[W],[W],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,15131.0,4905.0,0,,,,[You may target Kor Outfitter with its ability...,Creature,Kor Soldier,2,0,71,0
2,3.0,Flying\nWhen Siren Lookout enters the battlefi...,1,2,[U],[U],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,13541.0,8653.0,0,,,,[If a resolving spell or ability instructs a s...,Creature,Siren Pirate,1,0,0,2
3,1.0,"When Venerable Knight dies, put a +1/+1 counte...",2,1,[W],[W],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,13135.0,1504.0,0,,,,[If a Knight you control is dealt lethal damag...,Creature,Human Knight,1,0,71,0
4,3.0,"Whenever you draw your second card each turn, ...",3,1,[U],[U],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,22283.0,,0,,,,[If a spell or ability causes you to put a car...,Creature,Fish,1,0,99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25492,2.0,Enchantment spells you cast cost {1} less to c...,2,2,[G],[G],"{'standard': 'not_legal', 'future': 'not_legal...",False,rare,1323.0,3194.0,0,,,,[Herald of the Pantheon can reduce alternative...,Creature,Centaur Shaman,1,0,100,0
25493,0.0,"{T}: Add {C}.\n{1}, {T}: Add one mana of any c...",,,[],[],"{'standard': 'not_legal', 'future': 'not_legal...",False,common,4668.0,3086.0,1,,,,,Land,,0,0,86,0
25494,4.0,({U/P} can be paid with either {U} or 2 life.)...,,,[U],[U],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,796.0,2168.0,0,,,,"[If you're at 1 life or less, you can't pay 2 ...",Sorcery,,1,1,0,1
25495,5.0,Morbid — When Morkrut Banshee enters the battl...,4,4,[B],[B],"{'standard': 'not_legal', 'future': 'not_legal...",False,uncommon,18503.0,12122.0,0,,,,[If there are no other creatures on the battle...,Creature,Spirit,2,0,104,1


In [44]:
produced_mana_dataset.columns

Index(['cmc', 'oracle_text', 'power', 'toughness', 'colors', 'color_identity',
       'legalities', 'reserved', 'rarity', 'edhrec_rank', 'penny_rank',
       'produced_mana', 'loyalty', 'life_modifier', 'hand_modifier', 'comment',
       'type', 'subtype', 'devotion', 'phyrexian_mana', 'flavor_size',
       'abilities'],
      dtype='object')