In [1]:
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
import nltk
import difflib
from sklearn.decomposition import LatentDirichletAllocation

pd.options.display.max_columns=999
pd.options.display.max_rows=999

In [2]:
def show_card():
    while(1):
        card_name = input('Enter the name of a commander: ').strip().lower()
        if card_name=='exit':
            return
        try:
            card = commanders[commanders.index.str.lower()==card_name]
            response = requests.get(card.image_uris.tolist()[0]['small'])
            break
        except:
            print(f"Couldn't find {card_name}. Did you mean:")
            counter=1
            for name in difflib.get_close_matches(card_name.title(),commanders.index.tolist(),len(commanders.index.tolist()),0)[:5]:
                print(f"\t{counter}. {name}")
                counter+=1
    img = Image.open(BytesIO(response.content))
    return img

# Load cards and drop columns

In [3]:
commanders = pd.read_json('Data/scryfall-default-cards.json')
commanders.drop(columns=['all_parts','arena_id','artist_ids','booster','border_color','card_back_id','card_faces',
                       'collector_number','digital','flavor_text','foil','frame','frame_effects','full_art',
                       'hand_modifier','id','illustration_id','life_modifier','mtgo_foil_id','mtgo_id',
                       'multiverse_ids','nonfoil','object','oracle_id','printed_name','prints_search_uri',
                       'promo','promo_types','rarity','related_uris','rulings_uri','scryfall_set_uri','set_type',
                       'set_uri','story_spotlight','uri','variation','variation_of','watermark'],inplace=True)

In [4]:
commanders.columns

Index(['artist', 'cmc', 'color_identity', 'color_indicator', 'colors',
       'edhrec_rank', 'games', 'highres_image', 'image_uris', 'lang', 'layout',
       'legalities', 'loyalty', 'mana_cost', 'name', 'oracle_text',
       'oversized', 'power', 'preview', 'printed_text', 'printed_type_line',
       'released_at', 'reprint', 'reserved', 'scryfall_uri', 'set', 'set_name',
       'set_search_uri', 'tcgplayer_id', 'textless', 'toughness', 'type_line'],
      dtype='object')

**Only include legendary creatures**

In [5]:
commanders = commanders[commanders.type_line.str.contains('Legendary Creature')]

**Commander Legal only**

In [6]:
commanders.legalities = [card['commander'] for card in commanders.legalities]
commanders = commanders[commanders.legalities=='legal']

**Commander is meant to be a fun and social format, so drop digital-exclusive cards**

In [7]:
commanders.games = ['paper' in card for card in commanders.games]
commanders = commanders[commanders.games]

In [8]:
commanders = commanders[commanders.layout=='normal']

In [9]:
commanders = commanders[~commanders.oracle_text.str.contains('Partner')]

**Dataset includes reprints as a separate row, so drop those to have a unique list**

In [10]:
commanders.drop_duplicates('name','last',inplace=True)

**Set the index of my DataFrame to the card name**

In [11]:
commanders.set_index('name',inplace=True)

In [14]:
commanders.shape

(786, 31)

In [12]:
show_card()

Enter the name of a commander: exit


## LDA to find topics of commanders

In [52]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
import nltk
from nltk.corpus import stopwords

In [339]:
stop = list(set(stopwords.words('english')))
names = [card_name.lower().replace(',','').split() for card_name in commanders.index]
for name in names:
    stop.extend(name)
stop += ['gain','give','each','among','least','less','choice','until','end','start','beginning','upkeep','may',
         'put','under','whenever','except','permanent','player','cost','turn','gets','get','pay','deals','control',
         'controls','color','order','lose','battlefield','target','spell','card','equal','ability','activate',
         'next','step','cast','owner','opponent','time','would','instead','dealt','number','onto','though','long']
stop = list(set(stop))

In [340]:
texts = [text.strip().lower() for text in commanders.oracle_text]

In [341]:
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words=stop, analyzer='word')
tf = tf_vectorizer.fit_transform(texts)
tf_feature_names = tf_vectorizer.get_feature_names()

In [342]:
tf

<786x325 sparse matrix of type '<class 'numpy.int64'>'
	with 5334 stored elements in Compressed Sparse Row format>

In [343]:
lda=LatentDirichletAllocation(n_components=10,random_state=0,verbose=1,n_jobs=-1,max_iter=10)

In [344]:
lda.fit(tf)

iteration: 1 of max_iter: 10
iteration: 2 of max_iter: 10
iteration: 3 of max_iter: 10
iteration: 4 of max_iter: 10
iteration: 5 of max_iter: 10
iteration: 6 of max_iter: 10
iteration: 7 of max_iter: 10
iteration: 8 of max_iter: 10
iteration: 9 of max_iter: 10
iteration: 10 of max_iter: 10


LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
             evaluate_every=-1, learning_decay=0.7,
             learning_method='batch', learning_offset=10.0,
             max_doc_update_iter=100, max_iter=10, mean_change_tol=0.001,
             n_components=10, n_jobs=-1, n_topics=None, perp_tol=0.1,
             random_state=0, topic_word_prior=None,
             total_samples=1000000.0, verbose=1)

In [345]:
def display_topics(model, feature_names, no_top_words):
    topics = []
    for topic_idx, topic in enumerate(model.components_):
        topics.append({topic_idx:[feature_names[i] for i in topic.argsort()[:-no_top_words-1:-1]]})
        print ("Topic %d:" % (topic_idx),end='\t')
        print ("\n\t\t".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]),end='\n\n')
    return topics

topic_words = display_topics(lda, tf_feature_names, 10)

Topic 0:	damage
		creature
		graveyard
		return
		combat
		flying
		counter
		planeswalker
		trample
		choose

Topic 1:	creature
		token
		create
		white
		artifact
		green
		flying
		haste
		enchantment
		protection

Topic 2:	enters
		copy
		creature
		creatures
		counters
		choose
		sorcery
		sacrifice
		instant
		flying

Topic 3:	creatures
		creature
		destroy
		untap
		attacking
		combat
		attacks
		haste
		flying
		strike

Topic 4:	creature
		counter
		sacrifice
		power
		another
		counters
		toughness
		dies
		enters
		attacks

Topic 5:	flying
		strike
		creature
		spells
		opponents
		trample
		vigilance
		unless
		double
		damage

Topic 6:	library
		graveyard
		shuffle
		top
		search
		creature
		enters
		cards
		reveal
		name

Topic 7:	cards
		library
		top
		graveyard
		bottom
		reveal
		costs
		paying
		flying
		add

Topic 8:	draw
		cards
		discard
		damage
		artifacts
		seven
		converted
		flying
		historic
		creature

Topic 9:	creature
		creatures
		blocked
		becomes
		hor

In [276]:
cmdr_topics = pd.DataFrame(lda.transform(tf))
cmdr_topics.index = commanders.index
cmdr_topics

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"Syr Gwyn, Hero of Ashvale",0.887485,0.012502,0.012501,0.012501,0.012501,0.012501,0.012501,0.012502,0.012502,0.012505
"Korvold, Fae-Cursed King",0.011118,0.011114,0.011114,0.011114,0.011114,0.899968,0.011111,0.011115,0.011117,0.011115
"Chulane, Teller of Tales",0.010004,0.010003,0.010003,0.180357,0.64616,0.010004,0.010001,0.010004,0.010003,0.103462
"Alela, Artful Provocateur",0.006667,0.342333,0.006667,0.006668,0.006668,0.006668,0.006668,0.175324,0.178709,0.263628
"Kenrith, the Returned King",0.008334,0.008335,0.008336,0.008334,0.30317,0.352593,0.100184,0.008334,0.008335,0.194044
"Grumgully, the Generous",0.014287,0.014288,0.014286,0.014289,0.014288,0.719392,0.166309,0.014286,0.014287,0.014287
"Yorvo, Lord of Garenbrig",0.006671,0.098338,0.006667,0.006667,0.006668,0.848321,0.006667,0.006667,0.006667,0.006667
"Syr Faren, the Hengehammer",0.338929,0.01667,0.016669,0.016667,0.016668,0.527714,0.016667,0.016667,0.016676,0.016673
Questing Beast,0.100212,0.005556,0.370324,0.005556,0.005556,0.005556,0.005556,0.005556,0.005557,0.490571
"Torbran, Thane of Red Fell",0.009091,0.009091,0.918178,0.009091,0.009091,0.009091,0.009091,0.009091,0.009092,0.009092


In [279]:
cmdr_topics.sort_values(0,ascending=False)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sidar Jabari,0.930757,0.007694,0.007694,0.007695,0.007693,0.007695,0.007693,0.007693,0.007695,0.007693
"Kiyomaro, First to Stand",0.930752,0.007693,0.007694,0.007694,0.007695,0.007693,0.007693,0.007692,0.007699,0.007695
"Tishana, Voice of Thunder",0.918176,0.009092,0.009091,0.009092,0.009092,0.009092,0.009091,0.009091,0.009091,0.009091
"Tahngarth, Talruum Hero",0.909985,0.010001,0.010003,0.01,0.01,0.010002,0.01,0.01,0.010005,0.010003
"Yuan Shao, the Indecisive",0.899989,0.011113,0.011112,0.011111,0.011112,0.011115,0.011112,0.011112,0.011112,0.011113
"Lu Xun, Scholar General",0.899981,0.011112,0.011116,0.011112,0.011112,0.011113,0.011113,0.011112,0.011114,0.011114
Thraximundar,0.899976,0.011114,0.011114,0.011112,0.011112,0.011118,0.011111,0.011111,0.011115,0.011116
"Zhang He, Wei General",0.887488,0.012502,0.012501,0.0125,0.012501,0.012503,0.012501,0.012501,0.012502,0.012502
"Adamaro, First to Desire",0.887487,0.0125,0.012503,0.012501,0.012502,0.0125,0.012503,0.012501,0.012501,0.012501
"Syr Gwyn, Hero of Ashvale",0.887485,0.012502,0.012501,0.012501,0.012501,0.012501,0.012501,0.012502,0.012502,0.012505
