# Yu-Gi-Oh! card embeddings

## Descarga la información de mis otros repositorios de GitHub


In [None]:
import tempfile
from pathlib import Path
import subprocess

temporary_directory = tempfile.mkdtemp()

In [None]:

cards_repo_url = "https://github.com/fferegrino/yu-gi-oh.git"
cards_repo_dir = Path(temporary_directory, "yu-gi-oh")

subprocess.run(["git", "clone", "-q", "--single-branch", "--depth", "1", cards_repo_url, str(cards_repo_dir)])

In [None]:

decks_repo_url = "https://github.com/fferegrino/yu-gi-oh-decks.git"
decks_repo_dir = Path(temporary_directory, "yu-gi-oh-decks")

subprocess.run(["git", "clone", "-q", "--single-branch", "--depth", "1", decks_repo_url, str(decks_repo_dir)])

## Let's have a quick look at how the data looks like

In [None]:
import csv

In [None]:
cards = []
with open(cards_repo_dir / "data/cards.csv") as r:
    reader = csv.DictReader(r)
    for card in reader:
        cards.append(card)

In [None]:
len(cards)

In [None]:
cards[0]

In [None]:
card_variants = []

with open(cards_repo_dir / "data/cards_variants.csv") as r:
    reader = csv.DictReader(r)
    for card_variant in reader:
        card_variants.append(card_variant)

In [None]:
card_variants[0]

In [None]:
decks = []

for deck_file in decks_repo_dir.glob("data/*.csv"):
    with open(deck_file) as r:
        reader = csv.DictReader(r)
        for deck in reader:
            decks.append(deck)

In [None]:
len(decks)

In [None]:
decks[0]

### Convert cards to actual Python lists

In [None]:
from ast import literal_eval

In [None]:
decks[88191]["side_deck"]

In [None]:
literal_eval(decks[0]["main_deck"])

In [None]:
deck_properties = ["main_deck", "extra_deck", "side_deck"]

deck_cards = []

for idx, deck in enumerate(decks):
    for prop in deck_properties:
        passcodes = []
        if prop in deck and deck[prop] and deck[prop] != 'null':
            try:
                deck_passcodes = literal_eval(deck[prop])
                passcodes.extend(deck_passcodes)
            except:
                break
        if passcodes:
            deck_cards.append({
                "deck_id": idx,
                "passcodes": passcodes,
            })
        

In [None]:
deck_cards[0]

## Preparación de los diccionarios auxiliares

In [None]:
variant_to_original_passcode = {}
for variant in card_variants:
    variant_to_original_passcode[variant["variant"]] = variant["original"]


In [None]:
passcode_to_id = {}
id_to_passcode = {}
for idx, card in enumerate(cards):
    original_passcode = variant_to_original_passcode.get(card["id"], card["id"])
    passcode_to_id[original_passcode] = idx
    id_to_passcode[str(idx)] = original_passcode


In [None]:
import json

with open("passcode_variants.json", "w") as w:
    json.dump(variant_to_original_passcode, w)

with open("passcode_to_id.json", "w") as w:
    json.dump(passcode_to_id, w, indent=4)

with open("id_to_passcode.json", "w") as w:
    json.dump(id_to_passcode, w)

In [None]:
def get_card_id(passcode):
    original_passcode = variant_to_original_passcode.get(passcode, passcode)
    return passcode_to_id.get(original_passcode, None)

In [None]:
get_card_id("36996508"), get_card_id("46986414")

## Generación de la matriz de co-ocurrencia

In [None]:
from scipy.sparse import dok_matrix
import numpy as np

card_count = len(cards)

matrix = dok_matrix((card_count, card_count), dtype=np.float32)

In [None]:
matrix

In [None]:
from collections import Counter
from itertools import permutations

for card in deck_cards:
    card_permutation_count = Counter(permutations(card["passcodes"], 2))
    for (card1, card2), count in card_permutation_count.items():
        card1_id = get_card_id(card1)
        card2_id = get_card_id(card2)
        if card1_id is not None and card2_id is not None:
            matrix[card1_id, card2_id] += count

In [None]:
matrix

## Cálculo de los embeddings

In [None]:
embedding_size = 50

In [None]:
from scipy.sparse.linalg import svds
import numpy as np

u, s, _ = svds(matrix, k=embedding_size)
embeddings = u * np.sqrt(s)

# Normalize embeddings
embeddings = (embeddings - np.mean(embeddings, axis=0)) / np.std(embeddings, axis=0)

## Build index

In [None]:
index_file = "card-embeddings.ann"

In [None]:
from annoy import AnnoyIndex

ann = AnnoyIndex(embedding_size, "angular")
number_of_trees = 10

for idx, card_vector in enumerate(embeddings):
    ann.add_item(idx, card_vector)

ann.build(number_of_trees)
ann.save(index_file)

## Ejecutando algunos queries en el índice

In [None]:
import matplotlib.pyplot as plt
import requests
from PIL import Image
from io import BytesIO

def index_to_card(idx):
    passcode = id_to_passcode[str(idx)]
    card = next((card for card in cards if card["id"] == passcode), None)
    return card

def passcode_to_card(passcode):
    original_passcode = variant_to_original_passcode.get(passcode, passcode)
    card = next((card for card in cards if card["id"] == original_passcode), None)
    return card

def draw_cards(cards):
    fig, axes = plt.subplots(nrows=1, ncols=len(cards), figsize=(20, 4))  # Adjust figsize as needed

    for i, card in enumerate(cards):
        response = requests.get(card["image_url_small"])
        if response.status_code == 200:
            img = Image.open(BytesIO(response.content))
            axes[i].imshow(img)
            axes[i].axis('off')
        else:
            print(f"Failed to load image  for {card['name']}")

    plt.tight_layout()
    plt.show()

In [None]:
from annoy import AnnoyIndex

index_file = "card-embeddings.ann"
embedding_size = 50

ann = AnnoyIndex(embedding_size, "angular")
ann.load(index_file)

In [None]:
query_card = '46986414' # Dark Magician
# query_card = '14558127' # Ash Blossom & Joyous Spring
card = passcode_to_card(query_card)
query_card_embedding = ann.get_item_vector(get_card_id(query_card))
print(query_card_embedding)

In [None]:
similar_card_ids = ann.get_nns_by_vector(query_card_embedding, 5)
print(similar_card_ids)

In [None]:
similar_cards = [index_to_card(idx) for idx in similar_card_ids]
draw_cards(similar_cards)

## Extra: Generando embeddings con un LLM

In [None]:
from openai import OpenAI

# Antes de poder usar esto, necesitas establecer la variable de entorno OPENAI_API_KEY

client = OpenAI()

def get_embedding(text, model="text-embedding-ada-002"):
    response = client.embeddings.create(
        input=[text],
        model=model,
    )
    return response.data[0].embedding

In [None]:
query_card = '14558127' # Ash Blossom & Joyous Spring
card = passcode_to_card(query_card)
print(card['desc'] + "\n")
card_desc_embedding = get_embedding(card['desc'])
print(f"Card embedding, size: {len(card_desc_embedding)}:")
print(card_desc_embedding)