In [1]:
import base64
import glob
from io import BytesIO
import os
import re

from bokeh.embed import file_html
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Spectral10
from bokeh.plotting import figure, show, output_notebook
from bokeh.resources import CDN

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
import seaborn as sns
import umap

In [2]:
pokemon_df = pd.read_csv("../data/raw/pokemon/pokemon_alopez247.csv")
pokemon_dm = pd.read_csv("../data/processed/pokemon_dm_gower.csv", sep=",", index_col=0)
pokemon_dm.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,712,713,714,715,716,717,718,719,720,721
1,0.0,0.072316,0.161091,0.275378,0.292204,0.382474,0.271086,0.2888,0.376676,0.356046,...,0.386384,0.432566,0.461479,0.483869,0.592783,0.646534,0.579991,0.643432,0.606028,0.569181
2,0.072316,0.0,0.132253,0.293902,0.272195,0.353636,0.2942,0.270485,0.347838,0.384762,...,0.396952,0.427413,0.484485,0.455031,0.562635,0.616385,0.549842,0.624898,0.579243,0.539033
3,0.161091,0.132253,0.0,0.379751,0.347389,0.276431,0.382081,0.351156,0.273389,0.473417,...,0.466154,0.458945,0.573139,0.47544,0.5616,0.61535,0.562101,0.565742,0.606053,0.549283
4,0.275378,0.293902,0.379751,0.0,0.075962,0.209709,0.193105,0.206919,0.290868,0.356589,...,0.388073,0.435704,0.4546,0.485017,0.593984,0.60228,0.626646,0.653431,0.606598,0.570382
5,0.292204,0.272195,0.347389,0.075962,0.0,0.177226,0.21131,0.193056,0.259507,0.386897,...,0.399383,0.429925,0.48662,0.452534,0.560024,0.56832,0.592686,0.634507,0.582491,0.542287


In [3]:
reducer = umap.UMAP(metric="precomputed", min_dist=0.05)
embedding = reducer.fit_transform(pokemon_dm)

In [4]:
df = pd.DataFrame(embedding).join(pokemon_df)
df = df.rename(columns={0: 'x', 1: 'y'})
df.head()

Unnamed: 0,x,y,Number,Name,Type_1,Type_2,Total,HP,Attack,Defense,...,Color,hasGender,Pr_Male,Egg_Group_1,Egg_Group_2,hasMegaEvolution,Height_m,Weight_kg,Catch_Rate,Body_Style
0,2.184494,-6.56584,1,Bulbasaur,Grass,Poison,318,45,49,49,...,Green,True,0.875,Monster,Grass,False,0.71,6.9,45,quadruped
1,2.159329,-6.561849,2,Ivysaur,Grass,Poison,405,60,62,63,...,Green,True,0.875,Monster,Grass,False,0.99,13.0,45,quadruped
2,2.155347,-6.474988,3,Venusaur,Grass,Poison,525,80,82,83,...,Green,True,0.875,Monster,Grass,True,2.01,100.0,45,quadruped
3,1.765047,-6.019205,4,Charmander,Fire,,309,39,52,43,...,Red,True,0.875,Monster,Dragon,False,0.61,8.5,45,bipedal_tailed
4,1.711126,-5.997055,5,Charmeleon,Fire,,405,58,64,58,...,Red,True,0.875,Monster,Dragon,False,1.09,19.0,45,bipedal_tailed


In [5]:
pokemon_url = '../data/raw/pokemon/imgs/*Generation/*'
img_files = glob.glob(pokemon_url)
img_files = [x for x in img_files if "-Mega" not in x]
ids = [str(x).zfill(3) for x in range(1, df.shape[0]+1)]
img_files_new = list()
for this_id in ids:
    first_img = [x for x in img_files if this_id in x][0]
    img_files_new.append(first_img)
img_files_new.sort()

In [6]:
imgs = [Image.open(x) for x in img_files_new]
size = (64, 64)
new_imgs = list()
for img in imgs:
    img.thumbnail(size)
    img.load() # required for png.split()

    background = Image.new("RGB", img.size, (255, 255, 255))
    background.paste(img, mask=img.split()[3]) # 3 is the alpha channel
    new_imgs.append(background)

In [7]:
def embeddable_image(data):
    #img_data = 255 - 15 * np.array(data.getdata()).astype(np.uint8)
    #image = Image.fromarray(img_data)
    buffer = BytesIO()
    data.save(buffer, format='png')
    for_encoding = buffer.getvalue()
    return 'data:image/png;base64,' + base64.b64encode(for_encoding).decode()

In [8]:
embedding_imgs = list(map(embeddable_image, new_imgs))
embedding_imgs = embedding_imgs[:df.shape[0]]
df['img'] = embedding_imgs

In [9]:
output_notebook()

datasource = ColumnDataSource(df)

plot_figure = figure(
    title="UMAP projection of Pokemon with Gower Distance",
    plot_width=600,
    plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot_figure.add_tools(HoverTool(tooltips="""
<div>
    <div>
        <img src='@img' style='float: left; margin: 5px 5px 5px 5px'/>
    </div>
    <div>
        <span style='font-size: 16px; color: #224499'>Pokemon:</span>
        <span style='font-size: 18px'>@Name</span>
    </div>
</div>
"""))

plot_figure.circle(
    'x',
    'y',
    source=datasource,
    line_alpha=0.6,
    fill_alpha=0.6,
    fill_color='blue',
    size=5,
)
show(plot_figure)
html = file_html(plot_figure, CDN, "pokemon_gower")
with open("../html/pokemon_gower.html", "w+") as f:
    f.write(html)

In [10]:
pokemon_df.columns

Index(['Number', 'Name', 'Type_1', 'Type_2', 'Total', 'HP', 'Attack',
       'Defense', 'Sp_Atk', 'Sp_Def', 'Speed', 'Generation', 'isLegendary',
       'Color', 'hasGender', 'Pr_Male', 'Egg_Group_1', 'Egg_Group_2',
       'hasMegaEvolution', 'Height_m', 'Weight_kg', 'Catch_Rate',
       'Body_Style'],
      dtype='object')

In [11]:
pokemon_df_num = pokemon_df[['Total', 'HP', 'Attack', 'Defense', 
                             'Sp_Atk', 'Sp_Def', 'Speed', 'Height_m', 
                             'Weight_kg', 'Catch_Rate']]
pokemon_df_num.head()

Unnamed: 0,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Height_m,Weight_kg,Catch_Rate
0,318,45,49,49,65,65,45,0.71,6.9,45
1,405,60,62,63,80,80,60,0.99,13.0,45
2,525,80,82,83,100,100,80,2.01,100.0,45
3,309,39,52,43,60,50,65,0.61,8.5,45
4,405,58,64,58,80,65,80,1.09,19.0,45


In [12]:
reducer_num = umap.UMAP()
embedding_num = reducer_num.fit_transform(pokemon_df_num)
embedding_num_df = pd.DataFrame(embedding_num, columns=['x', 'y']).join(pokemon_df)
embedding_num_df['img'] = embedding_imgs
embedding_num_df.head()

Unnamed: 0,x,y,Number,Name,Type_1,Type_2,Total,HP,Attack,Defense,...,hasGender,Pr_Male,Egg_Group_1,Egg_Group_2,hasMegaEvolution,Height_m,Weight_kg,Catch_Rate,Body_Style,img
0,-0.804048,7.972485,1,Bulbasaur,Grass,Poison,318,45,49,49,...,True,0.875,Monster,Grass,False,0.71,6.9,45,quadruped,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
1,-2.649192,5.517613,2,Ivysaur,Grass,Poison,405,60,62,63,...,True,0.875,Monster,Grass,False,0.99,13.0,45,quadruped,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
2,-5.233712,0.525568,3,Venusaur,Grass,Poison,525,80,82,83,...,True,0.875,Monster,Grass,True,2.01,100.0,45,quadruped,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
3,-1.007266,8.282925,4,Charmander,Fire,,309,39,52,43,...,True,0.875,Monster,Dragon,False,0.61,8.5,45,bipedal_tailed,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
4,-2.981045,5.881923,5,Charmeleon,Fire,,405,58,64,58,...,True,0.875,Monster,Dragon,False,1.09,19.0,45,bipedal_tailed,"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."


In [13]:
datasource = ColumnDataSource(embedding_num_df)

plot_figure = figure(
    title='UMAP projection of Pokemon with just numeric data',
    plot_width=600,
    plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot_figure.add_tools(HoverTool(tooltips="""
<div>
    <div>
        <img src='@img' style='float: left; margin: 5px 5px 5px 5px'/>
    </div>
    <div>
        <span style='font-size: 16px; color: #224499'>Pokemon:</span>
        <span style='font-size: 18px'>@Name</span>
    </div>
</div>
"""))

plot_figure.circle(
    'x',
    'y',
    source=datasource,
    line_alpha=0.6,
    fill_alpha=0.6,
    fill_color='blue',
    size=5,
)
show(plot_figure)