# Clustering

**This workbook conducts a clustering analysis on a specified set of card-vectors**

In [1]:
# You must specify the file name of the model you want to load (in the 'models' folder)
model_name = 'card2vec-SNC-w40-v100-e302.model'

In [2]:
from scry import scry #For scryfall API calls

from sklearn.manifold import TSNE
from gensim.models import Word2Vec, KeyedVectors
from os import getcwd
import matplotlib.pyplot as plt
import altair as alt
import pandas as pd
import requests
import time
# import scrython

### Load the model containing the card vectors of interest

In [3]:
model_path = getcwd() + '\\models\\' + model_name
model = Word2Vec.load(model_path)

In [4]:
# we only want the KeyedVector of card-embeddings, so we can discard the rest of the model
card_vectors_keyed = model.wv
del(model)

#get the card vectors themselves by index (card name)
card_vectors = card_vectors_keyed[card_vectors_keyed.index_to_key]

### Load card metadata from Scryfall api

In [92]:
# cards = scry(tsne_df['card'])
cards_json = [card.json() for card in cards]
cards_df = pd.json_normalize(cards_json)
cards_df.rename(columns={'image_uris.large':'image'}, inplace=True)
cards_df.set_index('name', drop=False, inplace=True)

### Calculate t-SNE outputs

In [93]:
#Instantiate t-SNE model and run it on card_vectors
tsne_model = TSNE(n_components=2)
tsne_out = tsne_model.fit_transform(card_vectors)



In [94]:
# Create a DataFrame from t-SNE outputs and join with our card metadata
tsne_df = pd.DataFrame(tsne_out, index=card_vectors_keyed.index_to_key, columns = ['tsne_1','tsne_2']) # arbitrary names for t-SNE components
tsne_df['card'] = card_vectors_keyed.index_to_key

cards_df = cards_df.merge(right=tsne_df, how='inner', left_index=True, right_index=True)

### Visualize t-SNE clusters
We see that cards are primarily clustered by colour

In [98]:
alt.Chart(data=cards_df).mark_point().encode(
    x='tsne_1',
    y='tsne_2',
    tooltip=['image','name']
).interactive()

### What do clusters look like if we subtract basic land embeddings from mono-colour cards?

In [102]:
cards_df.filter(regex=("color"))

Unnamed: 0,colors,color_identity,border_color
Plains,[],[W],black
Island,[],[U],black
Forest,[],[G],black
Swamp,[],[B],black
Mountain,[],[R],black
...,...,...,...
Cemetery Tampering,[B],[B],black
Structural Assault,[R],[R],black
"Luxior, Giada's Gift",[],[],black
Evolving Door,[G],[G],black


In [108]:
embeddings_df = pd.DataFrame(card_vectors ,index = card_vectors_keyed.index_to_key)

In [None]:
embeddings_df

Rough work

In [None]:
# get image urls for all cards
#TODO save urls so you don't ahve to re-create the list each time

# urls = []

# for card in tsne_df['card']:
#     response = requests.get('https://api.scryfall.com/cards/named?fuzzy=' + card)
#     urls.append(response.json()['image_uris']['normal'])
#     time.sleep(0.1) # avoid flooding scryfall with requests, as per their guidance

In [None]:
tsne_df['image'] = urls

In [None]:
alt.Chart(data=tsne_df).mark_point().encode(
    x='a',
    y='b',
    tooltip=['image','card']
).interactive()

In [None]:
# plt.scatter(tsne_out.transpose()[0], tsne_out.transpose()[1])

In [None]:
tsne_df['card'][0:5]

In [None]:
cards = scry(tsne_df['card'])

In [None]:
test

In [None]:
# tsne_df = tsne_df.merge(right=cards_df, how='left', right_on='name', left_index=True)