## Predict what color a card is based on it's text in Magic the Gathering

# Install Wizards of the Coast's API wrapper for python

In [None]:
!pip install mtgsdk

# Import nessesary tools

In [None]:
import numpy as np
import pandas as pd
from mtgsdk import Card
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
import matplotlib.pyplot as plt
import pickle as pkl

# Collect Data

In [None]:
def collect_magic_set(mtg_set_code="###"):
    cards = Card.where(set=mtg_set_code).all()
    return cards

def get_just_text(magic_set):
    """Uses Wizards API to build a dictionary of cards for one "magic_set": name, text, and colors"""
    whole_set = []
    for card in magic_set:
        cards = {
            'text' : card.text,
            'colors' : card.colors,
        }
        
        whole_set.append(cards)
    return whole_set


def get_df(mtg_set_alias):
    magic_set = collect_magic_set(mtg_set_alias)
    magic_set_json = get_just_text(magic_set)
    magic_set_df = pd.DataFrame(magic_set_json)
    return magic_set_df

def prep_df(magic_set_df):
    magic_set_df = magic_set_df.replace('\n',' ', regex=True)
    magic_set_df['colors'] = magic_set_df['colors'].apply(lambda x : str(x))
    mask = magic_set_df.applymap(lambda x: x is None)
    cols = magic_set_df.columns[(mask).any()]
    magic_set_df = magic_set_df.replace('[]', "['Colorless']")
    for col in magic_set_df[cols]:
        magic_set_df.loc[mask[col], col] = ''
    return magic_set_df

# Load the data

In [None]:
m10_df = get_df('m10')
m13_df = get_df('m13')
m15_df = get_df('m15')
m20_df = get_df('m20')

# Clean data

In [None]:
prep_df(m10_df)
prep_df(m13_df)
prep_df(m15_df)
prep_df(m20_df)

In [None]:
four_mtg_df = pd.concat([m10_df, m13_df, m15_df, m20_df], ignore_index=True)


In [None]:
four_mtg_df.count()

In [None]:
# replace None's with empty strings for Vectorizing purposes
mask = four_mtg_df.applymap(lambda x: x is None)
cols = four_mtg_df.columns[(mask).any()]
for col in four_mtg_df[cols]:
    four_mtg_df.loc[mask[col], col] = ''
four_mtg_df

In [None]:
prep_df(four_mtg_df)


In [None]:
four_mtg_df['colors'] = four_mtg_df['colors'].apply(lambda x : str(x))


In [None]:
four_mtg_df = four_mtg_df.replace('[]', "['Colorless']")


In [None]:
to_drop =  ["['Black', 'Red', 'Blue']", 
            "['Black', 'Green']",
            "['Black', 'Green', 'Red', 'Blue', 'White']", 
            "['Black', 'White']",
            "['Green', 'Red']", 
            "['Blue', 'White']", 
            "['Green', 'White']",
            "['Black', 'Red', 'White']", 
            "['Black', 'Green', 'White']",
            "['Red', 'Blue', 'White']", 
            "['Red', 'Blue']", 
            "['Black', 'Red']",
            "['Green', 'Red', 'Blue']", 
            "['Green', 'Red', 'White']",
            "['Green', 'Blue']", 
            "['Red', 'White']", 
            "['Black', 'Blue']",
            "['Black', 'Green', 'Blue']"]

In [None]:
four_mtg_df = four_mtg_df[~four_mtg_df['colors'].isin(to_drop)]

In [None]:
idf = v.idf_
feat_weight_dict = dict(zip(v.get_feature_names(), idf))

feat_weights = []
feats = []
for key in feat_weight_dict:
    feat_weights.append(feat_weight_dict[key])
    feats.append(key)

feat_weight_df = pd.DataFrame(feats, feat_weights)

feat_weight_df.reset_index(inplace=True)

feat_weight_df['weight'] = feat_weight_df['index']

feat_weight_df.drop('index', axis=1, inplace=True)


feat_weight_df['feature'] = feat_weight_df[0]

feat_weight_df.drop(0, axis=1,inplace=True)

# Train Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(four_mtg_df[['text']], 
                                                    four_mtg_df['colors'],
                                                    test_size=0.2, 
                                                    random_state=2019)


# Tfidf Vectorization in order to model the data

In [None]:
v = TfidfVectorizer()

x_train_tfidf = v.fit_transform(X_train['text'])

# First Models with Multi-nomial Naive Bayes

In [None]:
multnb = MultinomialNB()

In [None]:
x_train_tfidf = v.transform(X_train['text'])

In [None]:
multnb.fit(x_train_tfidf, y_train)

In [None]:
x_test_tfidf = v.transform(X_test['text'])

In [None]:
multnb.score(x_test_tfidf, y_test)

In [None]:
len(v.get_feature_names())

# Testing Cells

In [None]:
color_sureness = []
for x in range(0,len(multnb.predict_proba(x_test_tfidf))):
    color_sureness.append(sorted(list(zip(multnb.classes_,multnb.predict_proba(x_test_tfidf)[x])),
       key=lambda x : x[0], 
       reverse=True))
color_sureness

In [None]:
data = []

for i, val in enumerate(color_sureness[:]):
    for color, sureness in val:
        card = {}
        card['card'] = i
        card['color'] = color
        card['sureness'] = sureness
        data.append(card)


In [None]:
data_df = pd.DataFrame(data)

In [None]:
data_df.drop('card', axis=1, inplace=True)

In [None]:
N = 6
ind = np.arange(N)
fig, ax = plt.subplots()
# my_pal = {"['Colorless']" : 'yellow', "['White']":'white' ,palette=my_pal "['Red']": 'brown', "['Green']":"g", "['Blue']" : 'blue', "['Black']" : 'grey'}
sns.boxplot(x=data_df['color'], y=data_df['sureness'])
plt.ylabel('Percentage')
plt.xlabel("Card Color")
plt.title('How "Sure" the Naive Bayes Classifier is for each card')
fig.set_size_inches(9.7, 7.27)

In [None]:
x_test_tfidf.todense()

In [None]:
actual_vs_predict = pd.DataFrame(multnb.predict(x_test_tfidf), y_test)

In [None]:
actual_vs_predict.reset_index(inplace=True)

In [None]:
actual_vs_predict['predict'] = actual_vs_predict[0]

In [None]:
actual_vs_predict.drop(0,axis=1, inplace=True)

In [None]:
actual_vs_predict['True_False'] = actual_vs_predict['colors'] == actual_vs_predict['predict']

In [None]:
actual_vs_predict.head()

In [None]:
trues_df = actual_vs_predict.groupby('colors').sum()

In [None]:
test_cards_t_f = actual_vs_predict.groupby('colors').count()

In [None]:
df = trues_df.merge(test_cards_t_f, on=trues_df.index)
df

In [None]:
df['True'] = df['True_False_x']

In [None]:
df.drop('True_False_y', axis=1, inplace=True)

In [None]:
df['Wrong'] = df['predict'] - df['True']

In [None]:
df['Percent_Right'] = df['True'] / df['predict']

In [None]:
colors = ['Black', 'Blue', "Colorless", 'Green', 'Red', 'White']
plt.bar(x=colors, height=(df['Percent_Right']*100), color=['black','blue', 'brown','green','red','white'], edgecolor='black')
plt.ylabel('Percentage Guessed Right')
plt.title('How well a Naive Bayes Classifier guesses card color for 4 Core Sets')
plt.xlabel('Card Color')
plt.show;

In [None]:
import seaborn as sns

In [None]:
from operator import itemgetter


card_color_max_sureness =[]
for x in range(0, len(color_sureness)):
    card_color_max_sureness.append(max(color_sureness[0],key=itemgetter(1))[1])

In [None]:
four_mtg_df['colors'].value_counts()


In [None]:
color_count = []
for x in range(0, len(four_mtg_df['colors'].unique())):
    color_count.append(four_mtg_df['colors'].value_counts()[x])

In [None]:
color_count

In [None]:
colors = ["Colorless", 'White', 'Red', 'Green', 'Blue', 'Black']

In [None]:
plt.bar(colors, color_count, color=['brown', 'white', 'red', 'green','blue', 'black'], edgecolor='black')
plt.xlabel("Card Color")
plt.ylabel("Number of Cards")
plt.title("Distribution of Card Colors over 4 'Core' MTG sets");

In [None]:
keep = set(feature_limited_df['feature'])

In [None]:
multNB = MultinomialNB()

In [None]:
multNB.fit(x_train_tfidf[:, keep_columns], y_train)

In [None]:
multNB.score(X_test_limited, y_test)