# Clustering

**This workbook conducts a clustering analysis on a specified set of card-vectors**

In [1]:
# You must specify the file name of the model you want to load (in the 'models' folder)
model_name = 'card2vec-SNC-w40-v100-e302.model'

In [2]:
from scry import scry #For scryfall API calls

from sklearn.manifold import TSNE
from gensim.models import Word2Vec, KeyedVectors
from os import getcwd
import matplotlib.pyplot as plt
import altair as alt
import pandas as pd
import requests
import time
# import scrython

### Load the model containing the card-vectors of interest

In [3]:
model_path = getcwd() + '\\models\\' + model_name
model = Word2Vec.load(model_path)

In [4]:
# we only want the KeyedVector of card-embeddings, so we can discard the rest of the model
card_vectors_keyed = model.wv
del(model)

#get the card vectors themselves by index (card name)
card_vectors = card_vectors_keyed[card_vectors_keyed.index_to_key]

### Cluster Using t-SNE

In [5]:
#Instantiate t-SNE model and run it on card_vectors
tsne_model = TSNE(n_components=2)
tsne_out = tsne_model.fit_transform(card_vectors)



In [6]:
# Create a DataFrame from t-SNE outputs
tsne_df = pd.DataFrame(tsne_out, index=card_vectors_keyed.index_to_key, columns = ['a','b']) # arbitrary names for t-SNE components
tsne_df['card'] = card_vectors_keyed.index_to_key

In [7]:
tsne_df.head() # View data sample

Unnamed: 0,a,b,card
Plains,-7.246717,-9.887819,Plains
Island,-11.327468,0.553773,Island
Forest,3.474151,-7.412608,Forest
Swamp,-0.402023,13.05497,Swamp
Mountain,8.896812,6.164212,Mountain


Rough work

In [None]:
# get image urls for all cards
#TODO save urls so you don't ahve to re-create the list each time

urls = []

for card in tsne_df['card']:
    response = requests.get('https://api.scryfall.com/cards/named?fuzzy=' + card)
    urls.append(response.json()['image_uris']['normal'])
    time.sleep(0.1) # avoid flooding scryfall with requests, as per their guidance

In [None]:
tsne_df['image'] = urls

In [None]:
alt.Chart(data=tsne_df).mark_point().encode(
    x='a',
    y='b',
    tooltip=['image','card']
).interactive()

In [None]:
# plt.scatter(tsne_out.transpose()[0], tsne_out.transpose()[1])

In [None]:
tsne_df['card'][0:5]

In [8]:
test = scry(tsne_df['card'][0:5])

In [10]:
test

[<Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>,
 <Response [200]>]