In [21]:
import pandas as pd
import numpy as np

In [22]:
color_df = pd.read_csv("data/train/color_info/color_metrics.csv")

In [23]:
top_color  = set(color_df.top_color.unique())
bottom_color = set(color_df.bottom_color.unique())

all_colors = top_color | bottom_color

In [24]:
from collections import defaultdict

In [25]:
analyzer = {
    "default":0,
    "top":0,
    "bottom":0
}
data = defaultdict(lambda: defaultdict(int))

for _, row in color_df.iterrows():
    top_color = row.top_color
    bottom_color = row.bottom_color
    top_type = row.top_type
    bottom_type = row.bottom_type

    if top_color is not np.nan and bottom_color is not np.nan:
        data[top_color][bottom_color] = data[top_color][bottom_color] + 1
        data[bottom_color][top_color] = data[bottom_color][top_color] + 1
        analyzer["default"] += 1
    elif top_color is not np.nan and top_type == "DRESSES":
        data[top_color][top_color] = data[top_color][top_color] + 2
        analyzer["top"] += 1
    elif bottom_color is not np.nan and (top_type == "DRESSES" or bottom_type == "DRESSES"):
        data[bottom_color][bottom_color] = data[bottom_color][bottom_color] + 2
        analyzer["bottom"] += 1

$\text{Cosine Similarity}$
$$
w_{ij} = \frac{|N(i) \cap N(j)|}{|N(i)N(j)|}
$$

In [26]:
color_to_index = {color: i for i,color in enumerate(all_colors)}

In [27]:
count_color = np.zeros((len(all_colors), len(all_colors)))
cosine_similarity = np.zeros((len(all_colors), len(all_colors)))

In [28]:
for i, row in enumerate(all_colors):
    for j, col in enumerate(all_colors):
        count_color[i][j] = data[row][col]

In [29]:
for i, row in enumerate(all_colors):
    count_row = np.sum(count_color[i,:])
    for j, col in enumerate(all_colors):
        count_col = np.sum(count_color[:,j])
        if count_color[i][j] != 0:
            cosine_similarity[i][j] = (count_color[i][j])/(count_row * count_col)

In [30]:
# percentage of non zero values
non_zero_values = cosine_similarity[np.nonzero(cosine_similarity)]
len(non_zero_values) / (cosine_similarity.shape[0] * cosine_similarity.shape[1])

0.09304113802383698

In [31]:
cosine_similarity_exp = np.exp(cosine_similarity) / np.expand_dims(np.sum(np.exp(cosine_similarity), axis=1), 1)

$\text{hard coded values enhance score by 40\%}$

In [32]:
# Hard code good colors
hard_coded_colors = pd.read_csv("rule_based_color_combination.csv")

In [33]:
import webcolors

def closest_colour(requested_colour):
      min_colours = {}
      for name in webcolors.names("css3"):
          r_c, g_c, b_c = webcolors.name_to_rgb(name)
          rd = (r_c - requested_colour[0]) ** 2
          gd = (g_c - requested_colour[1]) ** 2
          bd = (b_c - requested_colour[2]) ** 2
          min_colours[(rd + gd + bd)] = name
      return min_colours[min(min_colours.keys())]

In [34]:
for i, row in hard_coded_colors.iterrows():
    try:
        main_color = closest_colour(list(map(int, row.MainColor.split('-')[:-1])))
        comp_color = closest_colour(list(map(int, row.ComplimentaryColor.split('-')[:-1])))

        main_color_index = color_to_index[main_color]
        comp_color_index = color_to_index[comp_color]

        cosine_similarity_exp[main_color_index,comp_color_index] *= 1.4
        cosine_similarity_exp[comp_color_index,main_color_index] *= 1.4
    except Exception as e:
        print(e)

'papayawhip'


In [37]:
class Matrix:
    def __init__(self, color_to_index,cosine_similarity_exp):
        self.color_to_index = color_to_index
        self.cosine_similarity_exp = cosine_similarity_exp

In [38]:
matrix = Matrix(color_to_index,cosine_similarity_exp)

In [39]:
import pickle 
with open("global_similarity_file", 'wb') as filehandler:
    pickle.dump(matrix, filehandler)

In [40]:
import pickle 
with open("global_similarity_file", 'rb') as filehandler:
    matrix_1 = pickle.load(filehandler)