# [Color Embedding](https://philippmuens.com/word2vec-intuition/)

In [1]:
import json

import pandas as pd
import seaborn as sns
import numpy as np
from IPython.display import HTML, display

# prettier Matplotlib plots
import matplotlib.pyplot as plt
import matplotlib.style as style
style.use('seaborn')

# 1. Dataset

#### Download

In [2]:
%%bash
if [[ ! -f  "data/colors-256.json" ]]; then
    mkdir -p data
    wget -nc \
        https://jonasjacek.github.io/colors/data.json \
        -O data/colors-256.json 2> /dev/null
fi

In [3]:
color_data = json.loads(open("data/colors-256.json", 'r').read())
color_data[:5]

[{'colorId': 0,
  'hexString': '#000000',
  'rgb': {'r': 0, 'g': 0, 'b': 0},
  'hsl': {'h': 0, 's': 0, 'l': 0},
  'name': 'Black'},
 {'colorId': 1,
  'hexString': '#800000',
  'rgb': {'r': 128, 'g': 0, 'b': 0},
  'hsl': {'h': 0, 's': 100, 'l': 25},
  'name': 'Maroon'},
 {'colorId': 2,
  'hexString': '#008000',
  'rgb': {'r': 0, 'g': 128, 'b': 0},
  'hsl': {'h': 120, 's': 100, 'l': 25},
  'name': 'Green'},
 {'colorId': 3,
  'hexString': '#808000',
  'rgb': {'r': 128, 'g': 128, 'b': 0},
  'hsl': {'h': 60, 's': 100, 'l': 25},
  'name': 'Olive'},
 {'colorId': 4,
  'hexString': '#000080',
  'rgb': {'r': 0, 'g': 0, 'b': 128},
  'hsl': {'h': 240, 's': 100, 'l': 25},
  'name': 'Navy'}]

#### Preprocess

In [4]:
colors = dict()

for color in color_data:
    name = color['name'].lower()
    r = color['rgb']['r']
    g = color['rgb']['g']
    b = color['rgb']['b']
    rgb = tuple([r, g, b])
    colors[name] = rgb

In [5]:
print('Black: %s' % (colors['black'],))
print('White: %s' % (colors['white'],))
print('Red: %s' % (colors['red'],))
print('Lime: %s' % (colors['lime'],))
print('Blue: %s' % (colors['blue'],))

Black: (0, 0, 0)
White: (255, 255, 255)
Red: (255, 0, 0)
Lime: (0, 255, 0)
Blue: (0, 0, 255)


In [6]:
df = pd.DataFrame.from_dict(
    data=colors,
    orient='index',
    columns=['r', 'g', 'b'])
df.head()

Unnamed: 0,r,g,b
black,0,0,0
maroon,128,0,0
green,0,128,0
olive,128,128,0
navy,0,0,128


# 2. Model

#### Visualize color

In [7]:
def render_color(color: tuple) -> None:
    '''Render color (r, g, b)'''
    (r, g, b) = color
    
    display(HTML('''
      <div style="background-color: rgba(%d, %d, %d, 1); height: 20px;"></div>
    ''' % (r, g, b)),
    metadata=dict(isolated=True))

In [8]:
render_color( (0, 0, 0) )

In [9]:
render_color( (128, 128, 1) )

#### Cosine similarity
$$\text{similarity}\ =\ cos(\theta)\ =\ \frac{A \cdot B}{\| A \| \| B \|}$$

In [10]:
def similar(df, coord, n=10):
    # RGB values (3D coordinates) into a numpy array
    v1 = np.array(coord, dtype=np.float64)

    df_copy = df.copy()
    for i in df_copy.index:
        item = df_copy.loc[i]
        v2 = np.array([item.r, item.g, item.b], dtype=np.float64)

        #### cosine similarty ####
        theta_sum = np.dot(v1, v2)
        theta_den = np.linalg.norm(v1) * np.linalg.norm(v2)

        # check if we're trying to divide by 0
        if theta_den == 0:
            theta = None
        else:
            theta = theta_sum / theta_den

        # adding the `distance` column with the result of our computation
        df_copy.at[i, 'distance'] = theta

    # sorting the resulting DataFrame by distance
    df_copy.sort_values(
        by='distance',
        axis=0,
        ascending=False,
        inplace=True)
    
    return df_copy.head(n)

# 3. Test similar color

#### Similar color to red

In [11]:
render_color(colors['red'])

In [12]:
df_red = similar(df, colors['red'])
df_red

Unnamed: 0,r,g,b,distance
maroon,128,0,0,1.0
red3,215,0,0,1.0
red1,255,0,0,1.0
darkred,135,0,0,1.0
red,255,0,0,1.0
orangered1,255,95,0,0.937082
deeppink2,255,0,95,0.937082
darkorange3,215,95,0,0.914687
darkorange,255,135,0,0.883788
deeppink4,175,0,95,0.878853


In [13]:
for c in df_red.index:
    render_color(colors['{}'.format(c)])

#### Similar color to (100, 20, 120)

In [14]:
render_color((100, 20, 120))

In [15]:
df_render = similar(df, [100, 20, 120])
df_render

Unnamed: 0,r,g,b,distance
darkviolet,175,0,215,0.991837
darkmagenta,135,0,175,0.991199
magenta1,255,0,255,0.987829
magenta3,215,0,215,0.987829
fuchsia,255,0,255,0.987829
mediumorchid1,255,95,255,0.987592
purple,175,0,255,0.98759
orchid,215,95,215,0.980753
mediumorchid,175,95,215,0.979443
magenta2,255,0,215,0.976656


In [16]:
for c in df_render.index:
    render_color(colors['{}'.format(c)])