# Magic: the Gathering Artwork Search
Search for card artworks from "Magic: The Gathering" using a text description of the image content. All 32,000+ unique illustrations were encoded as vectors using OpenAI's CLIP model and stored in a CSV file. Here, CLIP encodes the query string and results are returned in order of cosine similarity between the query and the images. GPU runtime is helpful but not necessary.  
  
(Created on 03/25/2023, so newer cards are not included.)

## Instructions


1.   At the top of the page, click "Runtime" and select "Run all." 
2.   Wait for each code cell to finish loading, then scroll to the section "MtG Image Search."
3.   Type your search query into the "query" field (ex. "autumn leaves")
4.   Click the circular play button to the left of the "query" field to run the search (or press Shift+Enter with your cursor in that field).
5.   Clicking an image will open the full card image in a new tab.  
  
Thanks for checking out this project!




In [None]:
!git clone https://github.com/ekohrt/mtg-art-content-search.git
!unzip -q -u "/content/mtg-art-content-search/mtg-art-clip-embeddings.zip" -d "/content/mtg-art-content-search/"

Cloning into 'mtg-art-content-search'...
remote: Enumerating objects: 10, done.[K
remote: Counting objects: 100% (10/10), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 10 (delta 1), reused 7 (delta 1), pack-reused 0[K
Unpacking objects: 100% (10/10), 3.52 MiB | 7.87 MiB/s, done.


In [None]:
!pip install -q sentence-transformers Pillow==9.0.0

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.0 KB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 KB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.3/4.3 MB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m75.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m65.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 KB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m65.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone


In [None]:
# load clip model
from sentence_transformers import SentenceTransformer
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# load a CLIP model from huggingface
model = SentenceTransformer(
    'sentence-transformers/clip-ViT-B-32',
    device=device
)
model

Downloading (…)d52eb/.gitattributes:   0%|          | 0.00/690 [00:00<?, ?B/s]

Downloading (…)LIPModel/config.json:   0%|          | 0.00/4.03k [00:00<?, ?B/s]

Downloading (…)CLIPModel/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/604 [00:00<?, ?B/s]

Downloading (…)CLIPModel/vocab.json:   0%|          | 0.00/961k [00:00<?, ?B/s]

Downloading (…)859cad52eb/README.md:   0%|          | 0.00/1.88k [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)cad52eb/modules.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

SentenceTransformer(
  (0): CLIPModel()
)

In [None]:
import pandas as pd
embeddings_df = pd.read_csv("/content/mtg-art-content-search/mtg-art-clip-embeddings.csv")
embeddings_df.head(2)

Unnamed: 0,illustration_id,0,1,2,3,4,5,6,7,8,...,502,503,504,505,506,507,508,509,510,511
0,05e524e0-7339-4668-a56a-658c76bb016e,-0.182643,0.536473,-0.228911,0.047229,0.687316,-0.445811,-0.453015,0.090478,-0.257793,...,-0.114021,-0.29759,1.006249,-0.390347,-0.315185,0.010628,0.447189,0.249762,-0.197809,-0.165351
1,01a159de-2c4f-4277-bed6-4f685e92f8eb,-0.311668,0.449616,0.211686,0.110684,0.18292,-0.382816,-0.297473,0.269825,-0.11009,...,-0.485347,0.264027,0.606713,0.131653,0.062648,-0.346553,0.356835,0.564428,0.036748,0.066463


In [None]:
image_embeddings = embeddings_df.drop(labels='illustration_id', axis='columns').to_numpy()

In [None]:
# load the csv with the full card data. use 'illustration_id' as the index for easier lookups..
data_df = pd.read_csv('/content/mtg-art-content-search/mtg-art-data.csv', index_col='illustration_id')
data_df.loc[['2fcca987-364c-4738-a75b-099d8a26d614', '619c439b-8b4d-4c0c-9f86-9fdae9bd1c25']] # make sure illustration_id is index

Unnamed: 0_level_0,id,oracle_id,name,art_crop,art_normal,artist
illustration_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2fcca987-364c-4738-a75b-099d8a26d614,0000579f-7b35-4ed3-b44c-db2a538066fe,44623693-51d6-49ad-8cd7-140505caf02f,Fury Sliver,https://cards.scryfall.io/art_crop/front/0/0/0...,https://cards.scryfall.io/normal/front/0/0/000...,Paolo Parente
619c439b-8b4d-4c0c-9f86-9fdae9bd1c25,0001f1ef-b957-4a55-b47f-14839cdbab6f,ef027846-be81-4959-a6b5-56bd01b1e68a,Venerable Knight,https://cards.scryfall.io/art_crop/front/0/0/0...,https://cards.scryfall.io/normal/front/0/0/000...,Colin Boyer


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image
from IPython.core.display import display, HTML
import numpy as np
import requests
import time

def search_images(text_query, top_results=5):
  text_encoding = model.encode(text_query)                          # encode text query with CLIP
  results = cosine_similarity([text_encoding], image_embeddings)    # calc cosine similarity over all images
  indices = np.argsort(-results, axis=-1).tolist()[0][:top_results] # get indices of closest matches
  illustration_ids = embeddings_df['illustration_id'].iloc[indices] # get illustration ids of results
  image_data = data_df.loc[illustration_ids]                        # get the card data rows for each result id
  html_string=""
  for index, row in image_data.iterrows():                          # display all images
    cardname = row['name']                    
    image_url = row['art_crop']
    full_card_image_url = row['art_normal']
    r = requests.get(image_url, allow_redirects=True)
    html_string += f'<a href="{full_card_image_url}" target="_blank" rel="noopener noreferrer"><img src="{image_url}" height=200 title="{cardname}"/></a>'
    time.sleep(0.02) # just to be safe, because i don't want to get blocked from scryfall
  display(HTML(html_string))

In [None]:
#@title MtG Image Search
query = 'desolate ruins' #@param {type:"string"}
number_of_results = 50 #@param {type:"slider", min:0, max:200, step:1}
search_images(query, top_results=number_of_results) 