In [16]:
from pathlib import Path

import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import polars as pl

In [17]:
collected_vehicle_data_path = Path("..", "raw_data", "collected-data", "collected_vehicle_data.csv")

In [18]:
# Define Color Functions
def lp_dist(p:float):
    # _lp_dist has access to its outer function's scope.
    def _lp_dist(x1:np.ndarray, x2:np.ndarray) -> np.ndarray:
        """Get the pairwise Lp-distances between points.
        
        Args:
            x1: an m by d matrix where each row corresponds to a point to
                be compared with each point in x2.    
            x2: an n by d matrix where each row corresponds to a point to
                be compared with each point in x1. 
            p: p > 0

        Returns:
            a distance matrix of dimensions m by n where the (i,j)-th entry
            corresponds to the distance
            between points x1[i, :] and x2[j, :]
        """
        m = x1.shape[0]
        n = x2.shape[0]
        d = x1.shape[1]

        # Compare each row of x1 to each row of x2.
        # To do this, make views x1_view and x2_view
        # into x1 and x2, respectively.  x1_view and x2_view have the same
        # entries as x1 and x2, but they are repeated in a nice
        # configuration. 
        # Example:
        # x1 = np.array([
        # 	[1, 0, 0],
        # 	[0.45, 1.24, -0.02]])
        # x2 = np.array([
        # 	[0.9, -0.1, 0.3],
        # 	[0.8, -0.6, 0.4],
        # 	[0.2, -1.4, -0.4]])
        # x1_view
        # array([[[ 1.  ,  0.  ,  0.  ],
        #     [ 0.45,  1.24, -0.02]],
        #
        #    [[ 1.  ,  0.  ,  0.  ],
        #     [ 0.45,  1.24, -0.02]],
        #
        #    [[ 1.  ,  0.  ,  0.  ],
        #     [ 0.45,  1.24, -0.02]]])
        # x2_view
        # array([[[ 0.9, -0.1,  0.3],
        #         [ 0.8, -0.6,  0.4],
        #         [ 0.2, -1.4, -0.4]],
        #
        #        [[ 0.9, -0.1,  0.3],
        #         [ 0.8, -0.6,  0.4],
        #         [ 0.2, -1.4, -0.4]]])
        
        x1_view = np.broadcast_to(
            array=x1, 
            shape=(n, m, d)
        )
        
        x2_view = np.broadcast_to(
            array=x2, 
            shape=(m, n, d)
        )

        # Make sure our views have the same dimensions.
        x2_view_2 = np.swapaxes(x2_view, axis1=0, axis2=1)
        # Example:
        # x2_view_2
        # array([[[ 0.9, -0.1,  0.3],
        #         [ 0.9, -0.1,  0.3]],
        #
        #        [[ 0.8, -0.6,  0.4],
        #         [ 0.8, -0.6,  0.4]],
        #
        #        [[ 0.2, -1.4, -0.4],
        #         [ 0.2, -1.4, -0.4]]])
        
        # Now, x1_view and x2_view_2 are broadcast-able.

        return np.power(
            np.sum(
                np.power(
                    np.abs(x1_view - x2_view_2), 
                    p
                ), 
                axis=2
            ).T, 
            1.0/p
        )
    
    return _lp_dist

In [19]:
euclidean_dist = lp_dist(p=2)

In [20]:
def oklch_to_oklab(oklch:np.ndarray) -> np.ndarray:
    """Convert OKLCH color coordinates to OKLAB.
    
    Args:
        oklch: an n by 3 matrix where each
            row is an OKLCH color.
    """
    h = np.radians(oklch[:, 2])
    c = oklch[:, 1]
    a = c * np.cos(h)
    b = c * np.sin(h)

    oklab_mat = np.empty(shape=(oklch.shape[0], oklch.shape[1]))

    oklab_mat[:, 0] = oklch[:, 0]
    oklab_mat[:, 1] = a
    oklab_mat[:, 2] = b

    return oklab_mat

def oklab_to_linear_srgb(oklab: np.ndarray):
    """Convert OKLAB color coordinates to Linear SRGB.
    
    See: https://bottosson.github.io/posts/oklab/
    Args:
        oklab: an n by 3 matrix where each
            row is an OKLAB color.
    """ 
    constants_1 = np.array([ 
        [1.        ,  1.        ,  1.        ],
        [ 0.39633778, -0.10556135, -0.08948418],
        [0.21580376, -0.06385417, -1.29148555]
    ])

    lms_mat_1 = oklab @ constants_1

    lms_mat_2 = lms_mat_1 * lms_mat_1 * lms_mat_1

    constants_2 = np.array([
        [4.0767416621, -1.2684380046, -0.0041960863],
        [-3.3077115913, 2.6097574011, -0.7034186147],
        [0.2309699292, -0.3413193965, 1.7076147010]
    ])

    return lms_mat_2 @ constants_2

def oklch_to_linear_srgb(oklch):
    oklab = oklch_to_oklab(oklch)
    srgb_lin = oklab_to_linear_srgb(oklab)
    return srgb_lin

def linear_srgb_to_srgb(srgb_lin):
    """
    https://www.image-engineering.de/library/technotes/958-how-to-convert-between-srgb-and-ciexyz
    """
    srgb_lin_2 = np.where(
        srgb_lin < 0,
        0,
        srgb_lin
    )

    part_1 = np.where(
        srgb_lin_2 <= 0.0031308,
        srgb_lin_2 * 12.92,
        1.055 * srgb_lin_2 ** (1.0/2.4) - 0.055
    )

    part_2 = np.where(
        part_1 < 0,
        0,
        part_1
    )
    
    part_3 = 255 * part_2

    part_4 = np.where(
        part_3 > 255,
        255,
        part_3
    )

    return part_4

def oklab_to_srgb(oklab):
    srgb_lin = oklab_to_linear_srgb(oklab)
    srgb = linear_srgb_to_srgb(srgb_lin)
    return srgb

def oklch_to_srgb(oklch):
    oklab = oklch_to_oklab(oklch)
    srgb_lin = oklab_to_linear_srgb(oklab)
    srgb = linear_srgb_to_srgb(srgb_lin)
    return srgb

def rgb_mat_to_str(rgb_mat: np.ndarray) -> list[str]:
    n_colors = rgb_mat.shape[0]
    rgb_mat_list = []
    for i in range(n_colors):
        rgb_mat_str = f"rgb({','.join([str(u) for u in rgb_mat[i, :]])})" 
        rgb_mat_list.append(rgb_mat_str)

    return rgb_mat_list


In [21]:
# Get Observations
data = pl.scan_csv(
    source=collected_vehicle_data_path,
    has_header=True
)

In [22]:
#| include: false
data.select("color").head().collect()

color
str
"""oklch(0.61 0.2268 27.04)"""
"""oklch(0.3 0.0391 243.78)"""
"""oklch(0.74 0.1108 113.5)"""
"""oklch(0.68 0.005 258.33)"""
"""oklch(0.57 0.0132 153.48)"""


In [23]:
oklch_mat = (data
    .select("color")
    .with_columns(
        pl.col("color")
            .str.tail(n=-6).str.head(n=-1)
            .str.splitn(by=" ", n=3)
            .alias("color")
    )
    .unnest("color")
    .cast({pl.Utf8: pl.Float64})
    .collect()
)

In [24]:
# Transform colors and Find Medioid
oklab_mat = oklch_to_oklab(oklch_mat)
# print(euclidean_dist(oklab_mat, oklab_mat))

In [25]:
def get_medioid(x:np.ndarray, d) -> np.ndarray:
    """Get the medioid using the distance function d.
    
    Args:
        x: an n by p matrix (numpy.ndarray) where each row corresponds to 
            a point in R^p.
        d: a function that takes two matrices (numpy.ndarray) as its first two
            positional arguments.  The function
            should return a matrix (numpy.ndarray) with entries that correspond
            to pairwise distances between rows of the input
            matrices.

    Returns:
        medioid point as a 1-dimensional array
    """
    medioid_index = np.argmin(np.sum(d(x, x), axis=0, keepdims=False))
    return x[medioid_index, :].ravel()

In [26]:
#| include: false
get_medioid(x=oklab_mat, d=euclidean_dist)

array([ 0.67      , -0.00684086, -0.00973718])

In [27]:
medioid_oklab = get_medioid(x=oklab_mat, d=euclidean_dist)
medioid_srgb = oklab_to_srgb(medioid_oklab)

In [28]:
#| include: false
medioid_srgb

array([142.53166515, 150.55632902, 155.57100978])

From @fig-medioid, we can see that the estimated vehicle medioid color is a rather boring grey. 

In [29]:
#| fig-cap: "Estimated Vehicle Medioid Color"
#| label: fig-medioid
color = rgb_mat_to_str(medioid_srgb.reshape((1, -1)))
fig = go.Figure(
    go.Scatter(
        x=medioid_oklab[0].reshape(1),
        y=medioid_oklab[1].reshape(1),
        mode="markers",
        marker=dict(size=1000, symbol="square"),
        marker_color=color,
    )
)

fig.update_layout(
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    plot_bgcolor="rgba(0, 0, 0, 0)",
    paper_bgcolor="rgba(0, 0, 0, 0)",
)

fig.show(config={'staticPlot': True})

As seen in @fig-3d-scatter, the distribution of colors in the sample conforms to expectations.  Colors that do not look good on cars are absent.  It is debatable whether or not the colors people pick for their cars are what they prefer or if they are just choosing what is cheap/available for a given model.

In [30]:
#| fig-cap: Vehicle Colors in Sample.  Each vehicle color is plotted in OKLAB color space.
#| label: fig-3d-scatter
srgb_mat = oklch_to_srgb(oklch_mat)
rgb_list = rgb_mat_to_str(srgb_mat)

x=oklab_mat[:, 0]
y=oklab_mat[:, 1]
z=oklab_mat[:, 2]
# https://stackoverflow.com/questions/70340331/how-can-i-manually-color-each-point-in-my-scatter-plot-in-plotly
fig = go.Figure(
    go.Scatter3d(
        x=x,
        y=y,
        z=z,
        mode="markers",
        marker_color=rgb_list,
    )
)

fig.update_layout(
    scene={
        "xaxis": {"title": "L"},
        "yaxis": {"title": "A"},
        "zaxis": {"title": "B"}
    }
    
)

fig.show()

**Caveats:** No attempts are made to account for non-sampling errors such as non-response and measurement errors--or to correct for sampling error.  Measurement errors are likely due to the highly reflective nature of most car paints.  The vehicle colors in the target population may differ.  However, based on personal experience observing vehicles in the target population outside of this study, the author does not expect the color distribution to be dramatically different in the target population.