In [1]:

%run ../../load_magic/storage.py
%pprint
%matplotlib inline
from itertools import combinations
from math import cos, sin, pi, sqrt, atan
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
%who

DATA_FOLDER: ../data/
SAVES_FOLDER: ../saves/
Pretty printing has been turned OFF
Axes3D	 DATA_CSV_FOLDER	 DATA_FOLDER	 DBSCAN	 ENCODING_TYPE	 PCA	 SAVES_CSV_FOLDER	 SAVES_FOLDER	 SAVES_PICKLE_FOLDER	 
atan	 attempt_to_pickle	 combinations	 cos	 csv	 load_csv	 load_dataframes	 load_object	 mcolors	 
np	 os	 pd	 pi	 pickle	 plt	 save_dataframes	 sin	 sqrt	 
store_objects	 sys	 


In [2]:

columns_list = ['Red', 'Green', 'Blue']
def colors_dict_to_df(colors_dict):
    rows_list = []
    index_list = []
    for base_name, color_tuple in colors_dict.items():
        row_dict = {}
        index_list.append(base_name)
        for i, color_value in enumerate(columns_list):
            row_dict[color_value] = color_tuple[i]
        rows_list.append(row_dict)
    df = pd.DataFrame(rows_list, columns=columns_list, index=index_list)
    
    return df

In [3]:

def show_3d_plot(three_d_df, z_column='Red', x_column='Green', y_column='Blue'):
    fig = plt.figure(figsize=(18, 8))
    ax = fig.add_subplot(111, projection='3d', autoscale_on=True)
    xlabel_text = ax.set_xlabel(x_column)
    ylabel_text = ax.set_ylabel(y_column)
    zlabel_text = ax.set_zlabel(z_column)
    columns_list = [x_column, y_column, z_column]
    df = three_d_df[columns_list].dropna(axis='index', how='any')
    pca_ndarray = df.values
    path_collection = ax.scatter(pca_ndarray[:, 0], pca_ndarray[:, 1],
                                 pca_ndarray[:, 2], alpha=0.75, c=df.index)
    title_text = 'Scatterplot of the {}, {}, and {} Data'
    text_obj = ax.set_title(title_text.format(x_column, y_column, z_column))

In [4]:

def distance_from_white(old_tuple):
    green_diff = 1.0 - old_tuple[0]
    blue_diff = 1.0 - old_tuple[1]
    red_diff = 1.0 - old_tuple[2]
    
    return sqrt(green_diff**2 + blue_diff**2 + red_diff**2)

In [5]:

def distance_from_black(old_tuple):
    
    return sqrt(old_tuple[0]**2 + old_tuple[1]**2 + old_tuple[2]**2)

In [6]:

def distance_from_red(old_tuple):
    red_diff = 1.0 - old_tuple[2]
    
    return sqrt(old_tuple[0]**2 + old_tuple[1]**2 + red_diff**2)

In [7]:

def distance_from_green(old_tuple):
    green_diff = 1.0 - old_tuple[0]
    
    return sqrt(green_diff**2 + old_tuple[1]**2 + old_tuple[2]**2)

In [8]:

def distance_from_blue(old_tuple):
    blue_diff = 1.0 - old_tuple[1]
    
    return sqrt(old_tuple[0]**2 + blue_diff**2 + old_tuple[2]**2)

In [9]:

def distance_from_magenta(old_tuple):
    blue_diff = 1.0 - old_tuple[1]
    red_diff = 1.0 - old_tuple[2]
    
    return sqrt(old_tuple[0]**2 + blue_diff**2 + red_diff**2)

In [10]:

def distance_from_yellow(old_tuple):
    green_diff = 1.0 - old_tuple[0]
    red_diff = 1.0 - old_tuple[2]
    
    return sqrt(green_diff**2 + old_tuple[1]**2 + red_diff**2)

In [11]:

def distance_from_cyan(old_tuple):
    green_diff = 1.0 - old_tuple[0]
    blue_diff = 1.0 - old_tuple[1]
    
    return sqrt(green_diff**2 + blue_diff**2 + old_tuple[2]**2)

In [12]:

def get_distance_df(colors_df):
    rows_list = []
    columns_list = ['distance_from_white', 'distance_from_black', 'distance_from_red', 'distance_from_green', 'distance_from_blue',
                    'distance_from_magenta', 'distance_from_yellow', 'distance_from_cyan']
    index_list = []
    for row_index, row_series in colors_df.iterrows():
        green_value = row_series['Green']
        blue_value = row_series['Blue']
        red_value = row_series['Red']
        row_tuple = (green_value, blue_value, red_value)
        row_dict = {}
        row_dict['distance_from_white'] = distance_from_white(row_tuple)
        row_dict['distance_from_black'] = distance_from_black(row_tuple)
        row_dict['distance_from_red'] = distance_from_red(row_tuple)
        row_dict['distance_from_green'] = distance_from_green(row_tuple)
        row_dict['distance_from_blue'] = distance_from_blue(row_tuple)
        row_dict['distance_from_magenta'] = distance_from_magenta(row_tuple)
        row_dict['distance_from_yellow'] = distance_from_yellow(row_tuple)
        row_dict['distance_from_cyan'] = distance_from_cyan(row_tuple)
        rows_list.append(row_dict)
        index_list.append(row_index)
    distance_df = pd.DataFrame(rows_list, columns=columns_list, index=index_list)
    
    return distance_df

In [14]:

full_corner_list = ['white', 'black', 'red', 'green', 'blue', 'magenta', 'yellow', 'cyan']
def get_face_set_list(combinations_list):
    combs_obj = combinations(combinations_list, 3)
    face_set_list = []
    for color_tuple in combs_obj:
        face_set_list.append(set(color_tuple))
    
    return face_set_list

In [15]:

def plot_colortable(colors_dict, title, sort_colors=True, emptycols=0):

    cell_width = 212
    cell_height = 22
    swatch_width = 48
    margin = 12
    topmargin = 40

    # Sort colors_dict by hue, saturation, value and name.
    if sort_colors is True:
        by_hsv = sorted((tuple(mcolors.rgb_to_hsv(mcolors.to_rgb(color))),
                         name)
                        for name, color in colors_dict.items())
        names = [name for hsv, name in by_hsv]
    else:
        names = list(colors_dict)

    n = len(names)
    ncols = 4 - emptycols
    nrows = n // ncols + int(n % ncols > 0)

    width = cell_width * 4 + 2 * margin
    height = cell_height * nrows + margin + topmargin
    dpi = 72

    fig, ax = plt.subplots(figsize=(width / dpi, height / dpi), dpi=dpi)
    fig.subplots_adjust(margin/width, margin/height,
                        (width-margin)/width, (height-topmargin)/height)
    ax.set_xlim(0, cell_width * 4)
    ax.set_ylim(cell_height * (nrows-0.5), -cell_height/2.)
    ax.yaxis.set_visible(False)
    ax.xaxis.set_visible(False)
    ax.set_axis_off()
    ax.set_title(title, fontsize=24, loc="left", pad=10)

    for i, name in enumerate(names):
        row = i % nrows
        col = i // nrows
        y = row * cell_height

        swatch_start_x = cell_width * col
        swatch_end_x = cell_width * col + swatch_width
        text_pos_x = cell_width * col + swatch_width + 7

        ax.text(text_pos_x, y, name, fontsize=14,
                horizontalalignment='left',
                verticalalignment='center')

        ax.hlines(y, swatch_start_x, swatch_end_x,
                  color=colors_dict[name], linewidth=18)

In [16]:

# Hue, Saturation, Value
def get_hsv_dict(colors_dict):
    
    return {name: tuple(mcolors.rgb_to_hsv(mcolors.to_rgb(color))) for name, color in colors_dict.items()}

In [None]:

colors_dict = {name: mcolors.to_rgb(color) for name, color in mcolors.XKCD_COLORS.items()}
xkcd_colors_df = colors_dict_to_df(colors_dict)
show_3d_plot(xkcd_colors_df)

In [None]:

colors_dict = {name: mcolors.to_rgb(color) for name, color in mcolors.BASE_COLORS.items()}
base_colors_df = colors_dict_to_df(colors_dict)
show_3d_plot(base_colors_df)

In [None]:

colors_dict = {name: mcolors.to_rgb(color) for name, color in mcolors.TABLEAU_COLORS.items()}
tab_colors_df = colors_dict_to_df(colors_dict)
show_3d_plot(tab_colors_df)

In [None]:

colors_dict = {name: mcolors.to_rgb(color) for name, color in mcolors.CSS4_COLORS.items()}
css4_colors_df = colors_dict_to_df(colors_dict)
show_3d_plot(css4_colors_df)

In [None]:

base_distance_df = get_distance_df(base_colors_df)
bryg_face_set_list = get_face_set_list(['black', 'red', 'green', 'yellow'])
for row_index, row_series in base_distance_df.iterrows():
    tuple_list = sorted(row_series.to_dict().items(), key=lambda x: x[1])
    if tuple_list[0][1] == 0.0:
        print('{} is in the {} corner'.format(row_index, tuple_list[0][0].split('_')[2]))
    else:
        corners_list = tuple_list[:3]
        face_set = set([corners_list[0][0].split('_')[2], corners_list[1][0].split('_')[2], corners_list[2][0].split('_')[2]])
        if face_set in bryg_face_set_list:
            print('{} is nearest the black-red-yellow-green face'.format(row_index))
        else:
            print('{} is nearest the {} face'.format(row_index, '-'.join(list(face_set))))

In [None]:

plot_colortable(mcolors.BASE_COLORS, "Base Colors",
                sort_colors=True, emptycols=1)

In [None]:

plot_colortable(mcolors.TABLEAU_COLORS, "Tableau Palette",
                sort_colors=True, emptycols=2)

In [None]:

#sphinx_gallery_thumbnail_number = 3
plot_colortable(mcolors.CSS4_COLORS, "CSS Colors")

In [None]:

colors_dict = {name: mcolors.to_rgb(color) for name, color in mcolors.XKCD_COLORS.items()}
xkcd_colors_df = colors_dict_to_df(colors_dict)
X = xkcd_colors_df.values
X2D = PCA(n_components=2).fit_transform(X)
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')

ax.plot(X2D[:, 0], X2D[:, 1], 'k+')
ax.plot(X2D[:, 0], X2D[:, 1], 'k.')
ax.plot([0], [0], 'ko')
ax.arrow(0, 0, 0, 1, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k')
ax.arrow(0, 0, 1, 0, head_width=0.05, length_includes_head=True, head_length=0.1, fc='k', ec='k')
ax.set_xlabel('$z_1$', fontsize=18)
ax.set_ylabel('$z_2$', fontsize=18, rotation=0)
ax.axis([-1.5, 1.3, -1.2, 1.2])
ax.grid(True)

In [None]:

eps = 0.09
min_samples = 1
db = DBSCAN(eps=eps, min_samples=min_samples).fit(X2D)
labels_ndarray = db.labels_
unique_labels = np.unique(labels_ndarray)
unique_labels_count = len(unique_labels)
if (unique_labels_count > 4) and (unique_labels_count < 8):
    print('np.unique(DBSCAN(eps={}, min_samples={}).fit(pca_ndarray).labels_) = {}'.format(eps, min_samples, unique_labels))

In [None]:

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, autoscale_on=True)
cmap = plt.get_cmap('viridis_r')
path_collection = ax.scatter(X2D[:, 0], X2D[:, 1],
                             c=labels_ndarray, edgecolors=(0, 0, 0), cmap=cmap)
title_text = 'Dimension Reduced Scatterplot of the XKCD Colors'
text = plt.title(title_text)
xticklabel_obj = plt.xticks([])
yticklabel_obj = plt.yticks([])

In [None]:

# Optionally plot the XKCD colors (Caution: will produce large figure)
file_name = 'XKCD_Colors.png'
png_folder = os.path.join(SAVES_FOLDER, 'png')
file_path = os.path.join(png_folder, file_name)
xkcd_fig = plot_colortable(mcolors.XKCD_COLORS, "XKCD Colors")
xkcd_fig.savefig(file_path)

In [None]:

patriline_df = load_object('patriline_df')
min_year = patriline_df['Year of Birth'].min()
generations_df = load_object('generations_df')
match_series = (generations_df['birth_year_begin'] >= min_year)
generations_df[match_series]['saeculum_name'].unique()

In [None]:

unique_color_groups_count = len(generations_df[match_series]['saeculum_name'].unique())
print(type(mcolors.XKCD_COLORS))
dir(mcolors.XKCD_COLORS)