# Similar Cards Module (SCM)

In this notebook we combine all previous knowledge to implement a class which enables us to propose to a given card similar cards. This is only a prototype version.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import requests
import re
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from sklearn.neighbors import NearestNeighbors

In [26]:
def db_cards_init():
    columns = ['id','name','text','manaCost','manaValue','colorIdentity','colors','convertedManaCost','type','types','loyalty','power','toughness','keywords',
        'edhrecRank','life','defense','scryfallId','scryfallIllustrationId','scryfallOracleId','relatedCards']
    dtype = {'id': 'int64','name': str,'text': str,'manaCost': str,'manaValue': float,'colorIdentity': str,'colors': str,
            'convertedManaCost': float,'type': str,'types': str,'loyalty': 'object','power': 'object','toughness': 'object',
            'keywords': str,'edhrecRank': 'object','life': 'object','defense': 'object','scryfallId': str,'scryfallIllustrationId': str,
            'scryfallOracleId': str,'relatedCards': str }
    db = pd.read_csv('../data/mtg_cards_data/AllPrintingsCSVFiles/cards.csv',usecols = columns, dtype = dtype)
    return db

def vectorize_columns(db, columns):
    index = db.index
    df_list = []
    for col in columns:
        for val in db[col].dropna().str.split(',').explode().unique():
            df_list = df_list  + [ pd.DataFrame({val :[1 if x else 0 for x in db[col].str.contains(val, case=False)]},index = index)]
    db = pd.concat([db]+ df_list,axis=1)
    return db

def effect_type_dist(text,keywords):
    act_eff = []
    tri_eff = []
    kyw_eff = []
    stt_eff = []
    for ab in text.split('\n'):
        if(ab.count(':') > 0):
            act_eff = act_eff + [ab]
        elif(len(re.findall('when |whenever |at |[^a-zA-Z]when |[^a-zA-Z]whenever |[^a-zA-Z]at ', ab, flags=re.IGNORECASE))):
            tri_eff = tri_eff + [ab]
        else:
            if(len([word for word in [word for word in ab.split(',') if word] if re.findall(r'\w+',word)[0] in keywords])):
                kyw_eff = kyw_eff + [ab]
            else:
                stt_eff = stt_eff + [ab]
    return (act_eff,tri_eff,kyw_eff,stt_eff)


def tokenizer(txt):
    if(txt):
        return [' '.join(' '.join(' '.join(' '.join(ab.split('.')).split(',')).split('(')).split(')')).split() for ab in txt.split('\n')]
    return txt

def db_cards_proc(db):
    db = db.groupby('name',as_index=False).first()
    db['Text'] = db.apply(lambda x: re.sub(r'[\(].*[\)]', '',str(x['text']).replace(x['name'], 'this card').replace(x['name'].split(',')[0], 'this card')) if (x['text']) else x['text'],axis=1)
    db = vectorize_columns(db, ['colorIdentity','keywords','types'])
    db['TokText'] = db.apply(lambda x: [tokenizer(txt) for txt in (effect_type_dist(x['Text'],db['keywords'].dropna().str.split(',').explode().unique())[0]+effect_type_dist(x['Text'],db['keywords'].dropna().str.split(',').explode().unique())[1]+effect_type_dist(x['Text'],db['keywords'].dropna().str.split(',').explode().unique())[3])] if (x['Text']) else x['Text'],axis=1)
    db['loyalty'] = db['loyalty'].apply(lambda x: eval(str(x).replace('X','5')) if x else x)
    db['power'] = db['power'].apply(lambda x: eval(str(x).replace('*','5')) if x else x)
    db['toughness'] = db['toughness'].apply(lambda x: eval(str(x).replace('*','5')) if x else x)
    # column effect types (triggered, activated, static)
    # column of trigger
    return db

def calc_sim(model_d2v,ab_list1,ab_list2):
    sim = []
    for ab1 in ab_list1:
        for ab2 in ab_list2:
            if(bool(ab1) and bool(ab2)):
                if(model_d2v.wv.n_similarity(ab1,ab2)> 0):
                    sim = sim + [model_d2v.wv.n_similarity(ab1,ab2)]
    return np.mean(sim)

class SCM:
    def __init__(self):
        self.cards = db_cards_proc(db_cards_init())
        self.card_ind = self.cards.index[np.random.randint(0,len(self.cards))]
        
    def get_card(self):
        Im = Image.open(requests.get('https://api.scryfall.com/cards/' + self.cards.loc[self.card_ind,:].scryfallID + '/?format=image&version=small',stream=True).raw)
        display(Im)
        Im.close()
        return None
    
    def set_card(self, card_name: str = ''):
        if(bool(card_name)):
            self.card_ind = self.cards.index[self_cards['name'].str.contains(card_name, case=False)][0]
        else: 
            self.card_ind = self.cards.index[np.random.randint(0,len(self.cards))]
        return None
    
    def text_sim(self):
        training_text = []
        for text in self.cards.Text:
            if(bool(text)):
                training_text = training_text + effect_type_dist(text)[0]+ effect_type_dist(text)[1]+ effect_type_dist(text)[3]
        documents = [TaggedDocument(tokenize(doc),[i]) for i,doc in enumerate(training_text)]
        model_d2v = Doc2Vec(documents,vector_size=300,window=5,min_count=1,workers=4)
        self.cards['text_sim'] = self.cards['TokText'].apply(lambda txt_list: calc_sim(model_d2v,txt_list,self.cards.loc[self.card_ind,:].TokText) if txt_list else 0)    
        return None
    
    def get_sim_cards(self):
        #dic = {'Creature': ['power','toughness'], 'Planeswalker': ['loyalty']}
        col = ['text_sim']
        for c in ['keywords','colorIdentity','types']:
            col = col + self.db[c].dropna().str.split(',').explode().unique()
        #for key in [ x for x in self.cards.loc[self.card_ind,:].types if x in dic.keys()]:
        ngh = NearestNeighbors(n_neighbors=5)
        ngh.fit(self.cards[col].to_numpy())
        dist, ind = ngh.kneighbors([self.cards.loc[self.card_ind,col].to_numpy()],n_neighbors=5)
        for i in ind[0]: 
            Im = Image.open(requests.get('https://api.scryfall.com/cards/' + self.cards.iloc[i,:].scryfallId + '/?format=image&version=small', stream=True).raw)
            display(Im)
            Im.close()

In [27]:
scm = SCM()

KeyboardInterrupt: 