# Data Visualization Program

# 1. Settings

## 1.1. Import modules

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## 1.2. Hyperparameters

In [2]:
# Data
DATA_DIRECTORY = os.path.join("..", "data")
DOCUMENT_DIRECTORY = os.path.join("..", "docs")
DATASET_NAME = "Bradford-200uL-1"
REFERENCE_FILE = f"reference_corrected_{DATASET_NAME}.csv"

# Plot
NAME_OF_LABEL = "Protein Concentration"
LABEL_UNIT = "\u03BCg/mL"
TITLE_FONT = {'fontname':'Times New Roman', 'size':'16', 'color':'black', 'weight':'heavy'}
AXIS_FONT = {'fontname':'Times New Roman', 'size':'12', 'weight':'heavy'}
LEGEND_FONT = {'family': 'Times New Roman', 'size': 12}

## 1.3. Define functions

### 1.3.1. Save figure

In [3]:
# Define a function to save figure
def save_fig(fig, fig_name, dataset_name, docs_dir):
    try:
        # Create /docs directory if doesn't exist
        if not os.path.exists(docs_dir):
            os.makedirs(docs_dir)
            print(f"Document directory is created at: {docs_dir}")
        
        # Create /dataset document directory if doesn't exist
        dataset_docs_dir = os.path.join(docs_dir, dataset_name)
        if not os.path.exists(dataset_docs_dir):
            os.makedirs(dataset_docs_dir)
            print(f"{dataset_name} directory is created at: {dataset_docs_dir}")

        # Save the figure
        fig_file = os.path.join(dataset_docs_dir, f"{fig_name}_{dataset_name}.png")
        fig.savefig(fig_file, dpi=300)
        print(f"Figure saved at: {os.path.abspath(fig_file)}")

    except Exception as e:
        print("Saving figure error occurred: {e}")

### 1.3.2. Convert color space

In [4]:
# Method to convert RGB to CMYK
def rgb_to_cmyk(r, g, b):
    # Normalize RGB values
    r = r / 255.0
    g = g / 255.0
    b = b / 255.0

    # Convert RGB to CMYK
    # Black
    k = 1 - max(r, g, b)

    # Cyan
    c = (1 - r - k) / (1 - k) if k != 1 else 0

    # Magenta
    m = (1 - g - k) / (1 - k) if k != 1 else 0

    # Yellow
    y = (1 - b - k) / (1 - k) if k != 1 else 0

    # Return CMYK color
    return c, m, y, k

# Method to convert CMYK to RGB
def cmyk_to_rgb(c, m, y, k):
    # Convert CMYK to RGB
    r = 255 * (1-c) * (1-k)
    g = 255 * (1-m) * (1-k)
    b = 255 * (1-y) * (1-k)

    # Ensure r, g, b are within the valid range [0, 255]
    r = min(max(0, r), 255)
    g = min(max(0, g), 255)
    b = min(max(0, b), 255)

    return int(r), int(g), int(b)

# Method to convert RGB to HSL
def rgb_to_hsl(r, g, b):
        r /= 255
        g /= 255
        b /= 255
        max_val = max(r, g, b)
        min_val = min(r, g, b)
        diff = max_val - min_val
        h = s = l = (max_val + min_val) / 2

        if max_val == min_val:
            h = s = 0  # achromatic
        else:
            # saturation calculation
            if l > 0.5:
                s = diff / (2 - max_val - min_val)
            else:
                s = diff / (max_val + min_val)
            
            # hue calculation
            if max_val == r:
                h = (g - b) / diff + (g < b) * 6
            elif max_val == g:
                h = (b - r) / diff + 2
            else:
                h = (r - g) / diff + 4

            h /= 6

        return h, s, l

# Method to convert HSL to RGB
def hsl_to_rgb(h, s, l):
    if s == 0:
        r = g = b = l  # achromatic
    else:
        def hue_to_rgb(p, q, t):
            if t < 0:
                t += 1
            if t > 1:
                t -= 1
            if t < 1 / 6:
                return p + (q - p) * 6 * t
            if t < 1 / 2:
                return q
            if t < 2 / 3:
                return p + (q - p) * (2 / 3 - t) * 6
            return p

        q = l < 0.5 and l * (1 + s) or l + s - l * s
        p = 2 * l - q
        r = hue_to_rgb(p, q, h + 1 / 3)
        g = hue_to_rgb(p, q, h)
        b = hue_to_rgb(p, q, h - 1 / 3)

    return int(r * 255), int(g * 255), int(b * 255)

# Method to convert RGB to HSV
def rgb_to_hsv(r, g, b):
    r /= 255.0
    g /= 255.0
    b /= 255.0
    
    max_val = max(r, g, b)
    min_val = min(r, g, b)
    diff = max_val - min_val
    
    if max_val == min_val:
        h = 0
    elif max_val == r:
        h = ((60 * ((g-b)/diff) + 360) % 360) / 360.0
    elif max_val == g:
        h = ((60 * ((b-r)/diff) + 120) % 360) / 360.0
    elif max_val == b:
        h = ((60 * ((r-g)/diff) + 240) % 360) / 360.0

    if max_val == 0:
        s = 0
    else:
        s = diff/max_val

    v = max_val

    return h, s, v

# Method to convert HSV to RGB
def hsv_to_rgb(h, s, v):
    # When saturation is 0, the color is a shade of gray
    if s == 0:
        r = g = b = v
        return r, g, b

    # Find which sector of the color circle the hue is in
    sector = h * 6.0
    i = int(sector)
    f = sector - i

    p = v * (1.0 - s)
    q = v * (1.0 - s * f)
    t = v * (1.0 - s * (1.0 - f))

    if i == 0:
        r, g, b = v, t, p
    elif i == 1:
        r, g, b = q, v, p
    elif i == 2:
        r, g, b = p, v, t
    elif i == 3:
        r, g, b = p, q, v
    elif i == 4:
        r, g, b = t, p, v
    else:
        r, g, b = v, p, q

    return int(r * 255), int(g * 255), int(b * 255)

# Method to convert RGB to CIELAB
def rgb_to_lab(r, g, b):
    # Gamma correction
    rgb = [c / 255.0 for c in [r, g, b]]
    for i in range(3):
        if rgb[i] <= 0.04045:
            rgb[i] /= 12.92
        else:
            rgb[i] = ((rgb[i] + 0.055) / 1.055) ** 2.4

    # RGB to CIEXYZ
    X = 0.4124 * rgb[0] + 0.3576 * rgb[1] + 0.1805 * rgb[2]
    Y = 0.2126 * rgb[0] + 0.7152 * rgb[1] + 0.0722 * rgb[2]
    Z = 0.0193 * rgb[0] + 0.1192 * rgb[1] + 0.9505 * rgb[2]

    # Normalize for D65 illuminant
    X /= 0.95047
    Y /= 1.00000
    Z /= 1.08883

    X_trans = X ** (1/3) if X > 0.008856 else 7.787 * X + 16/116
    Y_trans = Y ** (1/3) if Y > 0.008856 else 7.787 * Y + 16/116
    Z_trans = Z ** (1/3) if Z > 0.008856 else 7.787 * Z + 16/116

    L = 116.0 * Y_trans - 16.0
    a = 500.0 * (X_trans - Y_trans)
    b = 200.0 * (Y_trans - Z_trans)

    return L, a, b

# Method to convert CIELAB to RGB
def lab_to_rgb(L, a, b):
    # CIELAB to CIEXYZ
    Y = (L + 16) / 116
    X = a / 500 + Y
    Z = Y - b / 200
    
    if Y**3 > 0.008856:
        Y = Y**3
    else:
        Y = (Y - 16/116) / 7.787
    
    if X**3 > 0.008856:
        X = X**3
    else:
        X = (X - 16/116) / 7.787
        
    if Z**3 > 0.008856:
        Z = Z**3
    else:
        Z = (Z - 16/116) / 7.787
        
    # D65 illuminant
    X = X * 0.95047
    Y = Y * 1.00000
    Z = Z * 1.08883
    
    # CIEXYZ to linear RGB
    r = X *  3.2406 + Y * -1.5372 + Z * -0.4986
    g = X * -0.9689 + Y *  1.8758 + Z *  0.0415
    b = X *  0.0557 + Y * -0.2040 + Z *  1.0570
    
    # Apply gamma correction on linear RGB values
    if r > 0.0031308:
        r = 1.055 * (r**(1/2.4)) - 0.055
    else:
        r = 12.92 * r
        
    if g > 0.0031308:
        g = 1.055 * (g**(1/2.4)) - 0.055
    else:
        g = 12.92 * g
        
    if b > 0.0031308:
        b = 1.055 * (b**(1/2.4)) - 0.055
    else:
        b = 12.92 * b
        
    # Convert these values to the range [0, 255]
    r = min(max(int(r * 255), 0), 255)
    g = min(max(int(g * 255), 0), 255)
    b = min(max(int(b * 255), 0), 255)

    return r, g, b

# 2. Dataset

# 2.1. Local parameters

In [5]:
# Plot
TITLE_DISPLAY = True
FIGURE_SIZE = (5, 5)
SAVE_FIGURE = True

In [6]:
# Load data from .csv file
reference_df = pd.read_csv(os.path.join(DATA_DIRECTORY, REFERENCE_FILE))
# Create the dictionary
reference = {((r['Red'], r['Green'], r['Blue']), r['Clear_Frequency']): r['Label'] for _, r in reference_df.iterrows()}
# Normalize RGB for coloring
colors = [(k[0][0]/255, k[0][1]/255, k[0][2]/255) for k in reference.keys()]
# Prepare Label values
labels = list(reference.values())

FileNotFoundError: [Errno 2] No such file or directory: '..\\data\\reference_corrected_Bradford-200uL-1.csv'

## 2.2. RGBC-Frequency

In [None]:
COLORIMETRY_SYSTEM = "rgbc-freq"

In [None]:
# Create the dictionary for frequencies
frequencies = {(r['Red_Frequency'], r['Green_Frequency'], r['Blue_Frequency'], r['Clear_Frequency']): r['Label'] for _, r in reference_df.iterrows()}

# Extract the values for each frequency using a list comprehension
red_frequencies = [k[0] for k in frequencies.keys()]
green_frequencies = [k[1] for k in frequencies.keys()]
blue_frequencies = [k[2] for k in frequencies.keys()]
clear_frequencies = [k[3] for k in frequencies.keys()]

### 2.2.1. Plot overall data

In [None]:
# Create subplots
fig, axs = plt.subplots(figsize=FIGURE_SIZE)

# Plot Label vs Red_Frequency
axs.scatter(labels, red_frequencies, label='Red', color='red', alpha=0.5)

# Plot Label vs Green_Frequency
axs.scatter(labels, green_frequencies, label='Green', color='green', alpha=0.5)

# Plot Label vs Blue_Frequency
axs.scatter(labels, blue_frequencies, label='Blue', color='blue', alpha=0.5)

# Plot Label vs Clear_Frequency
axs.scatter(labels, clear_frequencies, label='Clear', color='gray', alpha=0.5)

axs.set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs.set_ylabel('Frequency Value', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs.set_title(f'{NAME_OF_LABEL} vs Frequency', **TITLE_FONT)

# Add legend to the plot
axs.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), prop=LEGEND_FONT, framealpha=1, ncol=4)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Overview_of_Frequencies_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.2.2. Plot each feature

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 4, figsize=(FIGURE_SIZE[0]*3, FIGURE_SIZE[1]))

# Names for each subplot
freq_names = ['Red Frequency', 'Green Frequency', 'Blue Frequency', 'Clear Frequency']

# Plot Label vs Frequencies
for idx, freq_name in enumerate(freq_names):
    axs[idx].scatter(labels, [k[idx] for k in frequencies.keys()], color=colors)
    axs[idx].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
    axs[idx].set_ylabel(f"{freq_name} (Hz)", **AXIS_FONT)
    if TITLE_DISPLAY == True:
        axs[idx].set_title(f'{NAME_OF_LABEL} vs {freq_name}', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Each_Frequency_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

In [None]:
print(f"Min: {min(clear_frequencies)}, Max: {max(clear_frequencies)}")

### 2.2.3. Plot correlations on features

In [None]:
# Calculate correlations for frequencies
frequency_df = pd.DataFrame({
    'Red_Frequency': red_frequencies,
    'Green_Frequency': green_frequencies,
    'Blue_Frequency': blue_frequencies,
    'Clear_Frequency': clear_frequencies
})

correlation_matrix = frequency_df.corr()

# Plotting the heatmap
plt.figure(figsize=FIGURE_SIZE)
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5, vmin=-1, vmax=1)
plt.title("Correlation Heatmap for Frequencies")
plt.show()

### 2.2.4. Plot relationship of RGB frequencies to clear frequency

In [None]:
# Compute the RGB sum
rgb_sum = frequency_df['Red_Frequency'] + frequency_df['Green_Frequency'] + frequency_df['Blue_Frequency']

# Adjust the figure size to accommodate 4 plots
plt.figure(figsize=(FIGURE_SIZE[0]*3, FIGURE_SIZE[1]))

# Red vs Clear Frequency
plt.subplot(1, 4, 1)
plt.scatter(frequency_df['Red_Frequency'], frequency_df['Clear_Frequency'], color=colors)
plt.xlabel('Red Frequency', **AXIS_FONT)
plt.ylabel('Clear Frequency', **AXIS_FONT)
if TITLE_DISPLAY == True:
    plt.title('Red vs Clear Frequency', **TITLE_FONT)

# Green vs Clear Frequency
plt.subplot(1, 4, 2)
plt.scatter(frequency_df['Green_Frequency'], frequency_df['Clear_Frequency'], color=colors)
plt.xlabel('Green Frequency', **AXIS_FONT)
plt.ylabel('Clear Frequency', **AXIS_FONT)
if TITLE_DISPLAY == True:
    plt.title('Green vs Clear Frequency', **TITLE_FONT)

# Blue vs Clear Frequency
plt.subplot(1, 4, 3)
plt.scatter(frequency_df['Blue_Frequency'], frequency_df['Clear_Frequency'], color=colors)
plt.xlabel('Blue Frequency', **AXIS_FONT)
plt.ylabel('Clear Frequency', **AXIS_FONT)
if TITLE_DISPLAY == True:
    plt.title('Blue vs Clear Frequency', **TITLE_FONT)

# R+G+B vs Clear Frequency
plt.subplot(1, 4, 4)
plt.scatter(rgb_sum, frequency_df['Clear_Frequency'], color=colors)
plt.xlabel('(R+G+B) Frequency', **AXIS_FONT)
plt.ylabel('Clear Frequency', **AXIS_FONT)
if TITLE_DISPLAY == True:
    plt.title('(R+G+B) vs Clear Frequency', **TITLE_FONT)

# Adjust and display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Relationships_of_RGB_and_Clear_Frequency_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.2.5. Plot RGB-Frequency-Ratio

In [None]:
# Calculate the fractions
red_fraction = [rf/cf for rf, cf in zip(red_frequencies, clear_frequencies)]
green_fraction = [gf/cf for gf, cf in zip(green_frequencies, clear_frequencies)]
blue_fraction = [bf/cf for bf, cf in zip(blue_frequencies, clear_frequencies)]
rgb_sum_fraction = [(rf + gf + bf)/cf for rf, gf, bf, cf in zip(red_frequencies, green_frequencies, blue_frequencies, clear_frequencies)]

In [None]:
# Adjust the subplot creation to have 4 columns
fig, axs = plt.subplots(1, 4, figsize=(FIGURE_SIZE[0]*3, FIGURE_SIZE[1]))


# Plot Label vs Red/Clear
axs[0].scatter(labels, red_fraction, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Red/Clear', **AXIS_FONT)
axs[0].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Red/Clear', **TITLE_FONT)

# Plot Label vs Green/Clear
axs[1].scatter(labels, green_fraction, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Green/Clear', **AXIS_FONT)
axs[1].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Green/Clear', **TITLE_FONT)

# Plot Label vs Blue/Clear
axs[2].scatter(labels, blue_fraction, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Blue/Clear', **AXIS_FONT)
axs[2].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Blue/Clear', **TITLE_FONT)

# Plot Label vs (R+G+B)/Clear
axs[3].scatter(labels, rgb_sum_fraction, color=colors)
axs[3].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[3].set_ylabel('(R+G+B)/Clear', **AXIS_FONT)
axs[3].set_ylim([0, 3])  # Adjust the y-limit if necessary
if TITLE_DISPLAY == True:
    axs[3].set_title(f'{NAME_OF_LABEL} vs (R+G+B)/Clear', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"RGB_Frequencies_Over_Clear_Frequency_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.2.6. Plot RGB-Frequency-Ratio over RGB sum

In [None]:
red_freq_normalized = [rf / (rf + gf + bf) for rf, gf, bf in zip(red_frequencies, green_frequencies, blue_frequencies)]
green_freq_normalized = [gf / (rf + gf + bf) for rf, gf, bf in zip(red_frequencies, green_frequencies, blue_frequencies)]
blue_freq_normalized = [bf / (rf + gf + bf) for rf, gf, bf in zip(red_frequencies, green_frequencies, blue_frequencies)]
rgb_combined_freq = [rf + gf + bf for rf, gf, bf in zip(red_frequencies, green_frequencies, blue_frequencies)]

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 4, figsize=(FIGURE_SIZE[0]*3, FIGURE_SIZE[1]))


# Plot Label vs R/R+G+B
axs[0].scatter(labels, red_freq_normalized, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('R/(R+G+B) Frequency', **AXIS_FONT)
axs[0].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs R/(R+G+B) Frequency', **TITLE_FONT)

# Plot Label vs G/R+G+B
axs[1].scatter(labels, green_freq_normalized, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('G/(R+G+B) Frequency', **AXIS_FONT)
axs[1].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs G/(R+G+B) Frequency', **TITLE_FONT)

# Plot Label vs B/R+G+B
axs[2].scatter(labels, blue_freq_normalized, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('B/(R+G+B) Frequency', **AXIS_FONT)
axs[2].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs B/(R+G+B) Frequency', **TITLE_FONT)

# Plot Label vs R+G+B Frequency
axs[3].scatter(labels, rgb_combined_freq, color=colors)
axs[3].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[3].set_ylabel('(R+G+B) Frequency', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[3].set_title(f'{NAME_OF_LABEL} vs R+G+B Frequency', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"RGB_Frequency_Fractions_Over_Sum_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

## 2.3. RGB

In [None]:
COLORIMETRY_SYSTEM = "rgb"

### 2.3.1. Plot overall data

In [None]:
# Create subplots
fig, axs = plt.subplots(figsize=FIGURE_SIZE)

# Plot Label vs Red
axs.scatter(labels, [k[0][0] for k in reference.keys()], label='Red', color='red', alpha=0.5)

# Plot Label vs Green
axs.scatter(labels, [k[0][1] for k in reference.keys()], label='Green', color='green', alpha=0.5)

# Plot Label vs Blue
axs.scatter(labels, [k[0][2] for k in reference.keys()], label='Blue', color='blue', alpha=0.5)

axs.set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs.set_ylabel('RGB Value', **AXIS_FONT)
axs.set_ylim([0, 255])
if TITLE_DISPLAY == True:
    axs.set_title(f'{NAME_OF_LABEL} vs RGB', **TITLE_FONT)

# Add legend to the plot
axs.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), prop=LEGEND_FONT, framealpha=1, ncol=3)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Overview_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.3.2. Plot each feature

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 3, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Label vs Red
axs[0].scatter(labels, [k[0][0] for k in reference.keys()], color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Red Value', **AXIS_FONT)
axs[0].set_ylim([0, 255])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Red', **TITLE_FONT)

# Plot Label vs Green
axs[1].scatter(labels, [k[0][1] for k in reference.keys()], color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Green Value', **AXIS_FONT)
axs[1].set_ylim([0, 255])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Green', **TITLE_FONT)

# Plot Label vs Blue
axs[2].scatter(labels, [k[0][2] for k in reference.keys()], color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Blue Value', **AXIS_FONT)
axs[2].set_ylim([0, 255])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Blue', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Each_Channel_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.3.3. Plot RGB-Ratio

In [None]:
# Compute Red over Blue and Green over Blue values
red_over_blue = [r/b if b!=0 else 0 for r, b in zip([k[0][0] for k in reference.keys()], [k[0][2] for k in reference.keys()])]
green_over_blue = [g/b if b!=0 else 0 for g, b in zip([k[0][1] for k in reference.keys()], [k[0][2] for k in reference.keys()])]

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 2, figsize=(FIGURE_SIZE[0]*1.5, FIGURE_SIZE[1]))

# Plot Red/Blue vs Label
axs[0].scatter(labels, red_over_blue, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Red/Blue Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Red/Blue Ratio', **TITLE_FONT)

# Plot Green/Blue vs Label
axs[1].scatter(labels, green_over_blue, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Green/Blue Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Green/Blue Ratio', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"RGB_Over_Blue_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.3.4. Plot RGB-Ratio over sum

In [None]:
red_values = [k[0][0] for k in reference.keys()]
green_values = [k[0][1] for k in reference.keys()]
blue_values = [k[0][2] for k in reference.keys()]

red_normalized = [r / (r + g + b) for r, g, b in zip(red_values, green_values, blue_values)]
green_normalized = [g / (r + g + b) for r, g, b in zip(red_values, green_values, blue_values)]
blue_normalized = [b / (r + g + b) for r, g, b in zip(red_values, green_values, blue_values)]

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 3, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Label vs R/R+G+B
axs[0].scatter(labels, red_normalized, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('R/(R+G+B)', **AXIS_FONT)
axs[0].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs R/(R+G+B)', **TITLE_FONT)

# Plot Label vs G/R+G+B
axs[1].scatter(labels, green_normalized, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('G/(R+G+B)', **AXIS_FONT)
axs[1].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs G/(R+G+B)', **TITLE_FONT)

# Plot Label vs B/R+G+B
axs[2].scatter(labels, blue_normalized, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('B/(R+G+B)', **AXIS_FONT)
axs[2].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs B/(R+G+B)', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"RGB_Fraction_Over_Sum_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.3.5. Plot RGB channel over RGB-Frequency sum

In [None]:
rgb_sum_numbers = [k[0][0] + k[0][1] + k[0][2] for k in reference.keys()]

In [None]:
# Create figure and axis
fig, ax = plt.subplots(figsize=FIGURE_SIZE)

# Scatter plot showing the relationship
ax.scatter(rgb_sum_numbers, rgb_combined_freq, color=colors)

# Setting labels, title, and other properties
ax.set_xlabel('Sum of RGB Numbers', **AXIS_FONT)
ax.set_ylabel('Sum of RGB Frequencies', **AXIS_FONT)
if TITLE_DISPLAY == True:
    ax.set_title('Relationship between Sum of RGB Numbers and RGB Frequencies', **TITLE_FONT)

# Display the plot
plt.tight_layout()
plt.show()

# Save the figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Sum_of_RGB_Numbers_VS_Sum_of_RGB_Frequencies_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

## 2.4. CMYK

In [None]:
COLORIMETRY_SYSTEM = "cmyk"

In [None]:
# Convert RGB values to CMYK and normalize RGB for coloring
cyan_reference = [rgb_to_cmyk(k[0][0], k[0][1], k[0][2])[0] for k in reference.keys()]
magenta_reference = [rgb_to_cmyk(k[0][0], k[0][1], k[0][2])[1] for k in reference.keys()]
yellow_reference = [rgb_to_cmyk(k[0][0], k[0][1], k[0][2])[2] for k in reference.keys()]
black_reference = [rgb_to_cmyk(k[0][0], k[0][1], k[0][2])[3] for k in reference.keys()]

### 2.4.1. Plot overall data

In [None]:
# Create subplots
fig, axs = plt.subplots(figsize=FIGURE_SIZE)

# Plot Label vs Cyan
axs.scatter(labels, cyan_reference, color='cyan', label='Cyan', alpha=0.5)
axs.set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs.set_ylabel('CMYK Value', **AXIS_FONT)
axs.set_ylim([0, 1])

# Plot Label vs Magenta
axs.scatter(labels, magenta_reference, color='magenta', label='Magenta', alpha=0.5)

# Plot Label vs Yellow
axs.scatter(labels, yellow_reference, color='yellow', label='Yellow', alpha=0.5)

# Plot Label vs Black (K)
axs.scatter(labels, black_reference, color='black', label='Black', alpha=0.5)

if TITLE_DISPLAY == True:
    axs.set_title(f'{NAME_OF_LABEL} vs CMYK', **TITLE_FONT)

# Add legend to the plot
axs.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), prop=LEGEND_FONT, framealpha=1, ncol=4)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Overview_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.4.2. Plot each feature

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 4, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Label vs Cyan
axs[0].scatter(labels, cyan_reference, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Cyan Value', **AXIS_FONT)
axs[0].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Cyan', **TITLE_FONT)

# Plot Label vs Magenta
axs[1].scatter(labels, magenta_reference, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Magenta Value', **AXIS_FONT)
axs[1].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Magenta', **TITLE_FONT)

# Plot Label vs Yellow
axs[2].scatter(labels, yellow_reference, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Yellow Value', **AXIS_FONT)
axs[2].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Yellow', **TITLE_FONT)

# Plot Label vs Black
axs[3].scatter(labels, black_reference, color=colors)
axs[3].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[3].set_ylabel('Black Value', **AXIS_FONT)
axs[3].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[3].set_title(f'{NAME_OF_LABEL} vs Black', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Each_Channel_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.4.3. Plot CMYK-Ratio

In [None]:
# Compute CMY over K values
cyan_over_black = [c/b if b!=0 else 0 for c, b in zip(cyan_reference, black_reference)]
magenta_over_black = [m/b if b!=0 else 0 for m, b in zip(magenta_reference, black_reference)]
yellow_over_black = [y/b if b!=0 else 0 for y, b in zip(yellow_reference, black_reference)]

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 3, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Cyan/Black vs Label
axs[0].scatter(labels, cyan_over_black, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Cyan/Black Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Cyan/Black Ratio', **TITLE_FONT)

# Plot Magenta/Black vs Label
axs[1].scatter(labels, magenta_over_black, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Magenta/Black Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Magenta/Black Ratio', **TITLE_FONT)

# Plot Yellow/Black vs Label
axs[2].scatter(labels, yellow_over_black, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Yellow/Black Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Yellow/Black Ratio', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"CMYK_Channel_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.4.4. Plot CMYK channel over RGB sum

In [None]:
cyan_normalized = [c / rgb_sum for c, rgb_sum in zip(cyan_reference, rgb_sum_numbers)]
magenta_normalized = [m / rgb_sum for m, rgb_sum in zip(magenta_reference, rgb_sum_numbers)]
yellow_normalized = [y / rgb_sum for y, rgb_sum in zip(yellow_reference, rgb_sum_numbers)]
black_normalized = [k / rgb_sum for k, rgb_sum in zip(black_reference, rgb_sum_numbers)]

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 4, figsize=(FIGURE_SIZE[0]*2.5, FIGURE_SIZE[1]))

# Plot Label vs Cyan normalized
axs[0].scatter(labels, cyan_normalized, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Cyan Value (Normalized)', **AXIS_FONT)
axs[0].set_ylim([0, max(cyan_normalized)])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Normalized Cyan', **TITLE_FONT)

# Plot Label vs Magenta normalized
axs[1].scatter(labels, magenta_normalized, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Magenta Value (Normalized)', **AXIS_FONT)
axs[1].set_ylim([0, max(magenta_normalized)])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Normalized Magenta', **TITLE_FONT)

# Plot Label vs Yellow normalized
axs[2].scatter(labels, yellow_normalized, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Yellow Value (Normalized)', **AXIS_FONT)
axs[2].set_ylim([0, max(yellow_normalized)])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Normalized Yellow', **TITLE_FONT)

# Plot Label vs Black normalized
axs[3].scatter(labels, black_normalized, color=colors)
axs[3].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[3].set_ylabel('Black Value (Normalized)', **AXIS_FONT)
axs[3].set_ylim([0, max(black_normalized)])
if TITLE_DISPLAY == True:
    axs[3].set_title(f'{NAME_OF_LABEL} vs Normalized Black', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Normalized_CMYK_vs_RGB_Sum_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

## 2.5. HSL

In [None]:
COLORIMETRY_SYSTEM = "hsl"

In [None]:
# Convert RGB values to HSL and normalize RGB for coloring
hue_reference = [rgb_to_hsl(k[0][0], k[0][1], k[0][2])[0] for k in reference.keys()]
saturation_reference = [rgb_to_hsl(k[0][0], k[0][1], k[0][2])[1] for k in reference.keys()]
lightness_reference = [rgb_to_hsl(k[0][0], k[0][1], k[0][2])[2] for k in reference.keys()]

### 2.5.1. Plot overall data

In [None]:
# Create subplots
fig, axs = plt.subplots(figsize=FIGURE_SIZE)

# Plot Label vs Hue
axs.scatter(labels, hue_reference, label='Hue', alpha=0.5)
axs.set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs.set_ylabel('HSL Value', **AXIS_FONT)
axs.set_ylim([0, 1])

# Plot Label vs Saturation
axs.scatter(labels, saturation_reference, label='Saturation', alpha=0.5)

# Plot Label vs Lightness
axs.scatter(labels, lightness_reference, label='Lightness', alpha=0.5)

if TITLE_DISPLAY == True:
    axs.set_title(f'{NAME_OF_LABEL} vs HSL', **TITLE_FONT)

# Add legend to the plot
axs.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), prop=LEGEND_FONT, framealpha=1, ncol=3)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Overview_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.5.2. Plot each feature

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 3, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Label vs Hue
axs[0].scatter(labels, hue_reference, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Hue Value', **AXIS_FONT)
axs[0].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Hue', **TITLE_FONT)

# Plot Label vs Saturation
axs[1].scatter(labels, saturation_reference, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Saturation Value', **AXIS_FONT)
axs[1].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Saturation', **TITLE_FONT)

# Plot Label vs Lightness
axs[2].scatter(labels, lightness_reference, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Lightness Value', **AXIS_FONT)
axs[2].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Lightness', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Each_Channel_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.5.3. Plot HSL-Ratio

In [None]:
# Compute H and S over L values
hue_over_lightness = [h/l if l!=0 else 0 for h, l in zip(hue_reference, lightness_reference)]
saturation_over_lightness = [s/l if l!=0 else 0 for s, l in zip(saturation_reference, lightness_reference)]

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 3, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Label vs Hue/Lightness
axs[0].scatter(labels, hue_over_lightness, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Hue/Lightness Ratio', **AXIS_FONT)
axs[0].set_ylim([0, max(hue_over_lightness)+0.1])  # adjusted ylim to fit data
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Hue/Lightness', **TITLE_FONT)

# Plot Label vs Saturation/Lightness
axs[1].scatter(labels, saturation_over_lightness, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Saturation/Lightness Ratio', **AXIS_FONT)
axs[1].set_ylim([0, max(saturation_over_lightness)+0.1])  # adjusted ylim to fit data
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Saturation/Lightness', **TITLE_FONT)

# Plot Label vs Lightness
axs[2].scatter(labels, lightness_reference, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Lightness Value', **AXIS_FONT)
axs[2].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Lightness', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"HSL_Channel_Over_Lightness_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

## 2.6. HSV

In [None]:
COLORIMETRY_SYSTEM = "hsv"

In [None]:
# Convert RGB values to HSV and normalize RGB for coloring
hue_reference = [rgb_to_hsv(k[0][0], k[0][1], k[0][2])[0] for k in reference.keys()]
saturation_reference = [rgb_to_hsv(k[0][0], k[0][1], k[0][2])[1] for k in reference.keys()]
value_reference = [rgb_to_hsv(k[0][0], k[0][1], k[0][2])[2] for k in reference.keys()]

### 2.6.1. Plot overall data

In [None]:
# Create subplots
fig, axs = plt.subplots(figsize=FIGURE_SIZE)

# Plot Label vs Hue
axs.scatter(labels, hue_reference, label='Hue', alpha=0.5)
axs.set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs.set_ylabel('HSV Value', **AXIS_FONT)
axs.set_ylim([0, 1])

# Plot Label vs Saturation
axs.scatter(labels, saturation_reference, label='Saturation', alpha=0.5)

# Plot Label vs Value
axs.scatter(labels, value_reference, label='Value', alpha=0.5)

if TITLE_DISPLAY == True:
    axs.set_title(f'{NAME_OF_LABEL} vs HSV', **TITLE_FONT)

# Add legend to the plot
axs.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), prop=LEGEND_FONT, framealpha=1, ncol=3)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Overview_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.6.2. Plot each feature

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 3, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Label vs Hue
axs[0].scatter(labels, hue_reference, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Hue Value', **AXIS_FONT)
axs[0].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Hue', **TITLE_FONT)

# Plot Label vs Saturation
axs[1].scatter(labels, saturation_reference, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Saturation Value', **AXIS_FONT)
axs[1].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Saturation', **TITLE_FONT)

# Plot Label vs Value
axs[2].scatter(labels, value_reference, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Value', **AXIS_FONT)
axs[2].set_ylim([0, 1])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Value', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Each_Channel_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.6.3. Plot HSV-Ratio

In [None]:
hue_over_value = [h/v if v!=0 else 0 for h, v in zip(hue_reference, value_reference)]
saturation_over_value = [s/v if v!=0 else 0 for s, v in zip(saturation_reference, value_reference)]

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 2, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Hue/Value vs Label
axs[0].scatter(labels, hue_over_value, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Hue/Value Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Hue/Value Ratio', **TITLE_FONT)

# Plot Saturation/Value vs Label
axs[1].scatter(labels, saturation_over_value, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Saturation/Value Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Saturation/Value Ratio', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"HSV_Channel_Over_Value_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

## 2.7. CIELAB

In [None]:
COLORIMETRY_SYSTEM = "lab"

In [None]:
# Convert RGB values to CIELAB and normalize RGB for coloring
lightness_reference = [rgb_to_lab(k[0][0], k[0][1], k[0][2])[0] for k in reference.keys()]
a_reference = [rgb_to_lab(k[0][0], k[0][1], k[0][2])[1] for k in reference.keys()]
b_reference = [rgb_to_lab(k[0][0], k[0][1], k[0][2])[2] for k in reference.keys()]


### 2.7.1. Plot overall data

In [None]:
# Create subplots
fig, axs = plt.subplots(figsize=FIGURE_SIZE)

# Plot Label vs Lightness
axs.scatter(labels, lightness_reference, label='Lightness', alpha=0.5)
axs.set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs.set_ylabel('CIELAB Value', **AXIS_FONT)
axs.set_ylim([-100, 100])

# Plot Label vs Channel a*
axs.scatter(labels, a_reference, label='Channel a*', alpha=0.5)

# Plot Label vs Channel b*
axs.scatter(labels, b_reference, label='Channel b*', alpha=0.5)

if TITLE_DISPLAY == True:
    axs.set_title(f'{NAME_OF_LABEL} vs CIELAB', **TITLE_FONT)

# Add legend to the plot
axs.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), prop=LEGEND_FONT, framealpha=1, ncol=3)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Overview_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.7.2. Plot each feature

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 3, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot Label vs Lightness
axs[0].scatter(labels, lightness_reference, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('Lightness Value', **AXIS_FONT)
axs[0].set_ylim([0, 100])
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs Lightness', **TITLE_FONT)

# Plot Label vs Channel a*
axs[1].scatter(labels, a_reference, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('Channel a* Value', **AXIS_FONT)
axs[1].set_ylim([-50, 50])
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs Channel a*', **TITLE_FONT)

# Plot Label vs Channel b*
axs[2].scatter(labels, b_reference, color=colors)
axs[2].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[2].set_ylabel('Channel b* Value', **AXIS_FONT)
axs[2].set_ylim([-50, 50])
if TITLE_DISPLAY == True:
    axs[2].set_title(f'{NAME_OF_LABEL} vs Channel b*', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Each_Channel_of_Dataset_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

### 2.7.3. Plot CIELAB-Ratio

In [None]:
a_over_lightness = [a/l if l!=0 else 0 for a, l in zip(a_reference, lightness_reference)]
b_over_lightness = [b/l if l!=0 else 0 for b, l in zip(b_reference, lightness_reference)]

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 2, figsize=(FIGURE_SIZE[0]*2, FIGURE_SIZE[1]))

# Plot a*/Lightness vs Label
axs[0].scatter(labels, a_over_lightness, color=colors)
axs[0].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[0].set_ylabel('a*/Lightness Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[0].set_title(f'{NAME_OF_LABEL} vs a*/Lightness Ratio', **TITLE_FONT)

# Plot b*/Lightness vs Label
axs[1].scatter(labels, b_over_lightness, color=colors)
axs[1].set_xlabel(f"{NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
axs[1].set_ylabel('b*/Lightness Ratio', **AXIS_FONT)
if TITLE_DISPLAY == True:
    axs[1].set_title(f'{NAME_OF_LABEL} vs b*/Lightness Ratio', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"CIELAB_Channel_Over_Lightness_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

# 3. Metrics

## 3.1. Local parameters

In [None]:
TITLE_DISPLAY = True
FIGURE_SIZE = (10, 10)
SAVE_FIGURE = True

## 3.2. Load Data

In [None]:
color_systems = ["cmyk", "hsl", "hsv", "lab", "rgb"]
color_systems_string = ["CMYK", "HSL", "HSV", "CIELAB", "RGB"]
models = ["random_forest", "gradient_boosting", "svm", "mlp"]
models_string = ["Random Forest", "Gradient Boosting", "SVM", "MLP"]
metrics = ["MAE", "MSE", "RMSE", "R^2"]
metrics_string = ["MAE", "MSE", "RMSE", r"$R^2$"]
datasets = {}

# Load data from files using pandas
try:
    metrics_dir = os.path.join(DATA_DIRECTORY, "metrics")
    for color_system in color_systems:
        metrics_file = os.path.join(metrics_dir, f"metrics_{color_system}_{DATASET_NAME}.csv")
        if not os.path.exists(metrics_file):
            print(f"No metric file is found at: {metrics_file}")
        else:
            df = pd.read_csv(metrics_file)
            datasets[color_system] = df
except Exception as e:
    print(f"Loading metric file error: {e}")

## 3.3. Visualize data

In [None]:
colors = ['red', 'blue', 'green', 'orange', 'purple']

# Adjusted model names for two lines
models_string = ["Random\nForest", "Gradient\nBoosting", "SVM", "MLP"]

# Plotting
fig, axes = plt.subplots(2, 2, figsize=FIGURE_SIZE)
axes = axes.ravel()

# The gap we want between groups of bars for each model
gap = 0.2

for i, metric in enumerate(metrics):
    bars_positions_list = []
    for idx, color_system in enumerate(color_systems):
        # Adjusting the bar's x-coordinates to add gaps
        bar_positions = np.arange(len(models)) + idx*0.2 + gap * np.arange(len(models))
        bars_positions_list.append(bar_positions)
        axes[i].bar(bar_positions, datasets[color_system][metric], width=0.2, label=color_systems_string[idx])
    if TITLE_DISPLAY:
        axes[i].set_title(metrics_string[i], **TITLE_FONT)
    # Getting the average position of each group of bars for x-axis labels
    average_positions = [np.mean([pos[j] for pos in bars_positions_list]) for j in range(len(models))]
    axes[i].set_xticks(average_positions)
    axes[i].set_xticklabels(models_string, **AXIS_FONT)
    #axes[i].set_xlabel('Model', **AXIS_FONT)
    axes[i].set_ylabel(metrics_string[i], **AXIS_FONT)
    if metric == "R^2": # Bound R^2 plot from 0 to 1
        axes[i].set_ylim([0, 1])

# Add a single legend to the figure
handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper center', ncol=len(color_systems), prop=LEGEND_FONT, bbox_to_anchor=(0.5, 1.05))

# Display the plots
plt.tight_layout()
plt.show()

# Save the figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Metric_Scores", DATASET_NAME, DOCUMENT_DIRECTORY)

# 4. Cross-validation

## 4.1. Local parameters

In [None]:
# Data
SAMPLE_NAME = "albumin"
PREDICTIONS_DIRECTORY = os.path.join(DATA_DIRECTORY, "predictions")
PREDICTION_FILE = f"prediction_corrected_{SAMPLE_NAME}_{DATASET_NAME}.csv"
TESTS_DIRECTORY = os.path.join(DATA_DIRECTORY, "tests")
TEST_FILE = f"test_{SAMPLE_NAME}_{DATASET_NAME}.csv"

# Plot
TITLE_DISPLAY = True
FIGURE_SIZE = (5, 5)
SAVE_FIGURE = True
FILTERED = False
FILTER_RANGE = (10, 80)

## 4.2. Load data

In [None]:
prediction_df = pd.read_csv(os.path.join(PREDICTIONS_DIRECTORY, PREDICTION_FILE))
test_df = pd.read_csv(os.path.join(TESTS_DIRECTORY, TEST_FILE))

validation_df = pd.DataFrame({
    'Label_Name': prediction_df['Label_Name'],
    'Red': prediction_df['Red'],
    'Green': prediction_df['Green'],
    'Blue': prediction_df['Blue'],
    'Predicted_Label': prediction_df['Predicted_Label'],
    'Conc': test_df.set_index('Label_Name').loc[prediction_df['Label_Name'], 'Conc'].values
})

# Drop rows where 'Conc' is '( - )'
validation_df = validation_df[validation_df['Conc'] != '( - )']

# Now, convert the columns to float
validation_df['Predicted_Label'] = validation_df['Predicted_Label'].astype(float)
validation_df['Conc'] = validation_df['Conc'].astype(float)
validation_df = validation_df.dropna()

# Filter the dataframe based on the criteria
if FILTERED == True:
    validation_df = validation_df[validation_df['Conc'].between(FILTER_RANGE[0], FILTER_RANGE[1])]

In [None]:
validation_df.head()

## 4.3. Visualization

In [None]:
# Colors based on RGB values from 'validation_df'
colors = [(row['Red']/255, row['Green']/255, row['Blue']/255) for _, row in validation_df.iterrows()]

In [None]:
# Create subplots
fig, ax = plt.subplots(figsize=(FIGURE_SIZE[0], FIGURE_SIZE[1]))

# Plot Conc vs Predicted_Label
ax.scatter(validation_df['Conc'], validation_df['Predicted_Label'], color=colors)
ax.set_xlabel(f"Actual {NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)
ax.set_ylabel(f"Predicted {NAME_OF_LABEL} ({LABEL_UNIT})", **AXIS_FONT)

# Plotting the line for R-squared = 1
min_val = min(validation_df['Predicted_Label'].min(), validation_df['Conc'].min())
max_val = max(validation_df['Predicted_Label'].max(), validation_df['Conc'].max())
ax.plot([min_val, max_val], [min_val, max_val], 'r--', label='$R^2=1$ Line')

if TITLE_DISPLAY == True:
    ax.set_title(f'{NAME_OF_LABEL} Cross-validation', **TITLE_FONT)

# Display the plots
plt.tight_layout()
plt.legend(prop=LEGEND_FONT, framealpha=1)
plt.show()

# Save figure
if SAVE_FIGURE == True:
    save_fig(fig, f"Cross-validation_{SAMPLE_NAME}_{COLORIMETRY_SYSTEM}", DATASET_NAME, DOCUMENT_DIRECTORY)

In [None]:
# Calculate correlation
correlation = validation_df['Predicted_Label'].corr(validation_df['Conc'])

# Calculate the regression line's slope and intercept
slope, intercept = np.polyfit(validation_df['Conc'], validation_df['Predicted_Label'], 1)

# Calculate the R-squared value
r_squared = correlation**2

# Store the results in a new dataframe
validation_results_df = pd.DataFrame({
    'R-squared': [r_squared],
    'Correlation': [correlation],
    'Slope': [slope],
    'Intercept': [intercept],
})

In [None]:
validation_results_df.head()