## Predict what color a card is based on it's text in Magic the Gathering

# Install Wizards of the Coast's API wrapper for python

In [None]:
!pip install mtgsdk

# Import nessesary tools

In [2]:
import numpy as np
import pandas as pd
from mtgsdk import Card
from mtgsdk import Set
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

# Collect Data

In [138]:
def collect_magic_set(mtg_set_code="###"):
    cards = Card.where(set=mtg_set_code).all()
    return cards

def get_text(magic_set):
    """Uses Wizards API to build a dictionary of cards for one "magic_set": name, text, and colors"""
    whole_set = []
    for card in magic_set:
        cards = {
            'text' : card.text,
            'colors' : card.colors,
        }
        
        whole_set.append(cards)
    return whole_set

def get_df(mtg_set_alias):
    magic_set = collect_magic_set(mtg_set_alias)
    magic_set_json = get_text(magic_set)
    magic_set_df = pd.DataFrame(magic_set_json)
    return magic_set_df

def prep_df(magic_set_df):
    magic_set_df = magic_set_df.replace('\n',' ', regex=True)
    magic_set_df['colors'] = magic_set_df['colors'].apply(lambda x : str(x))
    mask = magic_set_df.applymap(lambda x: x is None)
    cols = magic_set_df.columns[(mask).any()]
    magic_set_df = magic_set_df.replace('[]', "['Colorless']")
    for col in magic_set_df[cols]:
        magic_set_df.loc[mask[col], col] = ''
    return magic_set_df

# Load the data

In [None]:
# m10_df = get_df('m10')
# m13_df = get_df('m13')
# m15_df = get_df('m15')
# m20_df = get_df('m20')

# Clean data

In [None]:
prep_df(m10_df)
prep_df(m13_df)
prep_df(m15_df)
prep_df(m20_df)

In [141]:
four_mtg_df = pd.concat([m10_df, m13_df, m15_df, m20_df], ignore_index=True)

In [144]:
four_mtg_df.count()

colors    1087
text      1087
dtype: int64

In [143]:
# replace None's with empty strings for Vectorizing purposes
mask = four_mtg_df.applymap(lambda x: x is None)
cols = four_mtg_df.columns[(mask).any()]
for col in four_mtg_df[cols]:
    four_mtg_df.loc[mask[col], col] = ''
four_mtg_df

Unnamed: 0,colors,text
0,['Green'],Deathtouch (Any amount of damage this deals to...
1,['Black'],"{1}{B}, {T}: Target player loses 1 life."
2,['Red'],Gain control of target creature until end of t...
3,['Blue'],Flying (This creature can't be blocked except ...
4,['White'],+1: You gain 2 life. −1: Put a +1/+1 counter o...
5,['Blue'],{T}: Target creature an opponent controls atta...
6,['Colorless'],"Whenever a player casts a white spell, you may..."
7,['White'],You gain 7 life.
8,['Green'],{1}{G}: Create a 1/1 green Insect creature token.
9,['White'],Enchant creature Enchanted creature gets +1/+1...


In [None]:
prep_df(four_mtg_df)

In [178]:
four_mtg_df = four_mtg_df.replace('[]', "['Colorless']")

In [163]:
four_mtg_df['colors'] = four_mtg_df['colors'].apply(lambda x : str(x))

In [179]:
X_train, X_test, y_train, y_test = train_test_split(four_mtg_df[['text']], 
                                                    four_mtg_df['colors'],
                                                    test_size=0.2, 
                                                    random_state=2019)

In [180]:
# Tfidf Vectorizing
v = TfidfVectorizer()
x_train_tfidf = v.fit_transform(X_train['text'])

In [181]:
x_train_tfidf

<869x1008 sparse matrix of type '<class 'numpy.float64'>'
	with 12596 stored elements in Compressed Sparse Row format>

In [182]:
multnb = MultinomialNB()

In [183]:
multnb.fit(x_train_tfidf, y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [184]:
x_test_tfidf = v.transform(X_test['text'])

In [185]:
multnb.score(x_test_tfidf, y_test)

0.6467889908256881

In [186]:
four_mtg_df['colors'].value_counts()

['Colorless']                                 212
['White']                                     173
['Red']                                       172
['Green']                                     171
['Black']                                     170
['Blue']                                      170
['Black', 'Green']                              2
['Black', 'White']                              1
['Black', 'Green', 'Red', 'Blue', 'White']      1
['Red', 'White']                                1
['Red', 'Blue', 'White']                        1
['Red', 'Blue']                                 1
['Black', 'Green', 'White']                     1
['Green', 'Red']                                1
['Green', 'Red', 'Blue']                        1
['Black', 'Blue']                               1
['Black', 'Red', 'Blue']                        1
['Green', 'Red', 'White']                       1
['Blue', 'White']                               1
['Black', 'Red', 'White']                       1


In [187]:
multnb.predict_proba(x_test_tfidf)

array([[1.84176954e-04, 1.76633051e-04, 1.76824606e-04, ...,
        1.76815102e-04, 1.31673983e-01, 4.01024366e-02],
       [3.97289319e-04, 3.98153462e-04, 3.98428353e-04, ...,
        4.33800514e-04, 7.97694788e-02, 4.92636587e-01],
       [1.08333455e-04, 1.08504731e-04, 1.08559194e-04, ...,
        1.08556492e-04, 1.32920119e-02, 1.04235444e-02],
       ...,
       [3.70745715e-05, 3.60382657e-05, 3.71237455e-05, ...,
        3.54844840e-05, 3.54751644e-02, 4.08395230e-02],
       [4.10868232e-05, 3.85260618e-05, 3.71482285e-05, ...,
        3.70553874e-05, 4.62917259e-02, 7.63064118e-02],
       [7.31462126e-05, 7.38673249e-05, 7.38455333e-05, ...,
        6.87932314e-05, 1.36870720e-01, 3.92547274e-01]])

In [188]:
sorted(list(zip(multnb.classes_,multnb.predict_proba(x_test_tfidf)[6])),
       key=lambda x : x[1], 
       reverse=True)

[("['White']", 0.3213146920771804),
 ("['Black']", 0.24992278503485302),
 ("['Green']", 0.2302328426147117),
 ("['Colorless']", 0.10275341445136389),
 ("['Red']", 0.06006549904694787),
 ("['Blue']", 0.03531779280019581),
 ("['Black', 'Green']", 6.796466542858956e-05),
 ("['Blue', 'White']", 2.3719502908318076e-05),
 ("['Green', 'Red']", 2.180868495513814e-05),
 ("['Black', 'White']", 2.1706628228930664e-05),
 ("['Black', 'Red']", 2.090636797263622e-05),
 ("['Red', 'Blue', 'White']", 2.0753326833903623e-05),
 ("['Black', 'Green', 'Red', 'Blue', 'White']", 2.0722180166699098e-05),
 ("['Black', 'Blue']", 2.0337811307122746e-05),
 ("['Red', 'White']", 2.0269926087928496e-05),
 ("['Green', 'Red', 'White']", 2.0089363596075035e-05),
 ("['Black', 'Red', 'Blue']", 2.0071445198142584e-05),
 ("['Green', 'Red', 'Blue']", 2.0037499513885846e-05),
 ("['Red', 'Blue']", 1.953306165313641e-05),
 ("['Green', 'Blue']", 1.913244810332885e-05),
 ("['Green', 'White']", 1.9127160694905074e-05),
 ("['Black',

In [191]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

In [192]:
rfc = RandomForestClassifier()
abc = AdaBoostClassifier()


In [193]:
rfc.fit(x_train_tfidf, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [194]:
abc.fit(x_train_tfidf, y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)

In [198]:
rfc.score(x_test_tfidf, y_test)

0.5642201834862385

In [197]:
abc.score(x_test_tfidf, y_test)

0.2706422018348624

In [202]:
import pickle as pkl

In [205]:
four_mtg_df.to_pickle('four_mtg_df')