In [42]:
import color_instances as ci
from tokenizers import basic_unigram_tokenizer
from colorutils import hsv_to_hsl, hsl_to_hsv
from vectorizers import BucketsVectorizer

In [2]:
pairs = [
    (hsv_to_hsl(i.alt_inputs[i.input]), i.output.lower())
    for i in ci.filtered()
    if len(basic_unigram_tokenizer(i.output)) == 1
]

In [3]:
from collections import Counter
counts = Counter([p[1] for p in pairs])
pairs = [p for p in pairs if counts[p[1]] > 1]

In [4]:
pairs[:10]

[((249.0, 25.0, 50.0), 'purple'),
 ((87.0, 65.99999999999999, 50.0), 'lime'),
 ((215.0, 13.0, 50.0), 'grey'),
 ((229.0, 31.0, 50.0), 'purple'),
 ((209.0, 44.0, 50.0), 'blue'),
 ((264.0, 1.0, 50.0), 'brown'),
 ((25.0, 17.0, 50.0), 'brown'),
 ((350.0, 88.0, 50.0), 'red'),
 ((14.0, 70.00000000000001, 50.0), 'orange'),
 ((336.0, 6.0, 50.0), 'grey')]

In [5]:
vec = BucketsVectorizer((16, 8, 8), hsv=True)

In [6]:
buckets = vec.vectorize_all([p[0] for p in pairs], hsv=True).tolist()

In [7]:
len(set(buckets))

128

In [8]:
utterances = [p[1] for p in pairs]

In [9]:
import numpy as np
import cPickle as pickle

In [10]:
bucket_to_index = {b: i for i, b in enumerate(sorted(set(buckets)))}
utt_to_index = {u: i for i, u in enumerate(sorted(set(utterances)))}

In [46]:
import html_report
color_map = {
    i: html_report.web_color(hsl_to_hsv(vec.unvectorize(b, hsv=True)))[1:].upper()
    for b, i in bucket_to_index.items()
}

In [45]:
from IPython.display import HTML
HTML('''
<table>
{}
</table>
'''.format(
    '\n'.join(
        '<tr>{}</tr>'.format(
            ''.join(
                '<td bgcolor="#{hexcode}">{text}</td>'.format(
                    hexcode=color_map[8 * i + j],
                    text='{}: {}'.format(8 * i + j, color_map[8 * i + j]),
                )
                for j in range(8)
            )
        )
        for i in range(16)
    )
))

0,1,2,3,4,5,6,7
0: 746967,1: 81615A,2: 90584B,3: 9D503E,4: AB4730,5: BA3D22,6: C73514,7: D52C06
8: 746E67,9: 81705A,10: 90714B,11: 9D723E,12: AB7430,13: BA7522,14: C77614,15: D57806
16: 747367,17: 817F5A,18: 908B4B,19: 9D973E,20: ABA330,21: BAAF22,22: C7BB14,23: D5C706
24: 707467,25: 76815A,26: 7B904B,27: 819D3E,28: 86AB30,29: 8CBA22,30: 91C714,31: 97D506
32: 6B7467,33: 66815A,34: 61904B,35: 5C9D3E,36: 57AB30,37: 52BA22,38: 4DC714,39: 48D506
40: 677468,41: 5A815C,42: 4B904F,43: 3E9D43,44: 30AB36,45: 22BA29,46: 14C71D,47: 06D510
48: 67746D,49: 5A816B,50: 4B9069,51: 3E9D67,52: 30AB65,53: 22BA63,54: 14C762,55: 06D560
56: 677472,57: 5A8179,58: 4B9082,59: 3E9D8A,60: 30AB93,61: 22BA9B,62: 14C7A3,63: 06D5AC
64: 677274,65: 5A7A81,66: 4B8390,67: 3E8C9D,68: 3095AB,69: 229EBA,70: 14A6C7,71: 06AFD5
72: 676D74,73: 5A6B81,74: 4B6990,75: 3E679D,76: 3065AB,77: 2263BA,78: 1462C7,79: 0660D5


In [11]:
matrix = np.zeros((len(utt_to_index), len(bucket_to_index)), dtype=np.int)

In [12]:
for bucket, utt in zip(buckets, utterances):
    matrix[utt_to_index[utt], bucket_to_index[bucket]] += 1

In [13]:
matrix.shape

(261, 128)

In [14]:
with open('color_matrix.pkl', 'wb') as outfile:
    pickle.dump((utt_to_index, matrix), outfile)

In [47]:
with open('color_index_to_hexcode.pkl', 'wb') as outfile:
    pickle.dump(color_map, outfile)

In [15]:
len(utt_to_index)

261

In [16]:
utt_to_index

{'again': 0,
 'apples': 1,
 'aqua': 2,
 'aquaish': 3,
 'aquamarine': 4,
 'army': 5,
 'avocado': 6,
 'banana': 7,
 'barney': 8,
 'barnie': 9,
 'battleship': 10,
 'beige': 11,
 'blandest': 12,
 'bllue': 13,
 'blood': 14,
 'blu': 15,
 'blue': 16,
 'blue-gray': 17,
 'blue-green': 18,
 'blue-grey': 19,
 'blue-purple': 20,
 'blueest': 21,
 'blueish': 22,
 'blueist': 23,
 'bluer': 24,
 'bluest': 25,
 'bluish': 26,
 'blye': 27,
 'blyue': 28,
 'boring': 29,
 'boy': 30,
 'brick': 31,
 'brighest': 32,
 'bright': 33,
 'brighter': 34,
 'brightest': 35,
 'brn': 36,
 'brown': 37,
 'brown-gray': 38,
 'brown-ish': 39,
 'brownest': 40,
 'brownish': 41,
 'brwon': 42,
 'bubblegum': 43,
 'bue': 44,
 'caca': 45,
 'camo': 46,
 'canvas': 47,
 'cement': 48,
 'clay': 49,
 'cobalt': 50,
 'coffee': 51,
 'concrete': 52,
 'copper': 53,
 'coral': 54,
 'cyan': 55,
 'dark': 56,
 'darker': 57,
 'darkest': 58,
 'dirt': 59,
 'dolphin': 60,
 'drab': 61,
 'drabbest': 62,
 'dreary': 63,
 'dull': 64,
 'dullest': 65,
 'dusty'

In [17]:
matrix[:30, :30]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 2, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        0, 0, 0, 0, 0, 0, 0, 2],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 3,
        0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [18]:
sorted(matrix.sum(axis=0).tolist())

[80,
 86,
 87,
 92,
 93,
 95,
 99,
 101,
 103,
 104,
 104,
 105,
 105,
 106,
 108,
 108,
 109,
 109,
 109,
 111,
 114,
 115,
 115,
 116,
 116,
 118,
 119,
 119,
 119,
 119,
 120,
 120,
 121,
 121,
 122,
 123,
 123,
 124,
 124,
 125,
 125,
 125,
 126,
 127,
 128,
 128,
 129,
 129,
 130,
 131,
 133,
 135,
 136,
 138,
 139,
 139,
 139,
 140,
 140,
 140,
 141,
 141,
 141,
 141,
 141,
 142,
 142,
 142,
 142,
 143,
 144,
 144,
 145,
 145,
 146,
 146,
 147,
 147,
 147,
 148,
 149,
 149,
 149,
 149,
 149,
 150,
 150,
 153,
 154,
 155,
 155,
 155,
 155,
 156,
 157,
 158,
 158,
 160,
 160,
 161,
 161,
 162,
 162,
 163,
 164,
 164,
 164,
 164,
 167,
 168,
 168,
 168,
 169,
 171,
 172,
 172,
 173,
 174,
 174,
 174,
 175,
 177,
 183,
 187,
 187,
 189,
 210,
 235]

In [19]:
np.argsort([[2, 2, 1], [0, 1, 1]], axis=1)

array([[2, 0, 1],
       [0, 1, 2]])