# Pre-processing data
This section intended only to importing library that used in the processing step. The library that used are:
1.   Color thief
2.   matplotlib
3.   pandas
4.   Scikit-learn
5.   numpy

After the library accessed, the next text block contained code to access data sample and extract RGB code from the sample.

In [None]:
#Importing library

!pip install colorthief #Installing colorthief
from colorthief import ColorThief #Importing colorthief library
import matplotlib.pyplot as plt #Importing matplotlib to be use in data visualization process
import pandas as pd #Importing pandas to be use in analyzing data process
from sklearn.decomposition import PCA #Importing scikit-learn to be use in PCA transformation process
import numpy as np #Importing numpy to do mathematical process

In [None]:
#Defining program code

class ColorAnalyzer:

  #Initialization
  def __init__(self, image_files):
    self.image_files = image_files
    self.dominant_colors = [] #To store result of extraction
    self.pca_transformed_data = [] # To store result of PCA
    self.duplicates = [] # To store duplicate color info

  #Identify dominant color from each sample
  def _get_dominant_color_from_image(self, image_file):
    ct = ColorThief(image_file)
    dominant_color = ct.get_color(quality=1)
    return dominant_color

  #Extracting RGB code from the dominant color
  def extract_dominant_colors(self):
    print("Extracting dominant colors...")
    self.dominant_colors = [self._get_dominant_color_from_image(image) for image in self.image_files]
    print(f"Extracted {len(self.dominant_colors)} dominant colors.")
    return self.dominant_colors # Optionally return, but primarily stores internally

In [None]:
class ProcessingData:
  def __init__(self, analyzer):
    self.analyzer = analyzer

  def display_dominant_colors(self):
    print("\nRGB-Code from every sample: ")
    for i, color in enumerate(self.analyzer.dominant_colors):
      hex_color = f"#{color[0]:02x}{color[1]:02x}{color[2]:02x}"
      r, g, b = color
      # Calculate perceived brightness for the current color (ITU-R BT.709 standard)
      brightness = (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255
      print(f"W{i+1}: - RGB Code: {color}, HEX: {hex_color}, Brightness: {brightness:.3f}")

  #Checking duplicated color between each images
  def check_duplicate_colors(self):
    print("Checking for duplicate dominant colors...")
    color_map_first_occurrence = {} # Stores color -> first occurrence index (1-based)
    self.duplicates = [] # To store a list of tuples: (first_occurrence_index, current_index)

    if not self.analyzer.dominant_colors:
      print("No dominant colors to check for duplicates.")
      return []

    for i, color in enumerate(self.analyzer.dominant_colors):
      current_index = i + 1
      if color in color_map_first_occurrence:
        # If color already seen, record the pair (first occurrence, current occurrence)
        self.duplicates.append((color_map_first_occurrence[color], current_index))
      else:
        # First time seeing this color, record its first occurrence
        color_map_first_occurrence[color] = current_index

    if self.duplicates:
        print(f"Found {len(self.duplicates)} pairs of images with duplicate dominant colors.")
    else:
        print("No duplicate dominant colors found.")

    return self.duplicates

  #Method to perform PCA on dominant colors
  def perform_pca(self):
    print("Performing PCA on dominant colors...")
    if not self.analyzer.dominant_colors:
      print("No dominant colors to perform PCA on. Please extract colors first.")
      return

    # Convert list of RGB tuples to a NumPy array
    colors_array = np.array(self.analyzer.dominant_colors)

    # Initialize PCA to reduce to 2 components
    pca = PCA(n_components=2)

    # Fit PCA and transform the data
    # Store the result in the analyzer object since it's used directly from analyzer later
    self.analyzer.pca_transformed_data = pca.fit_transform(colors_array)
    print(f"PCA transformed data shape: {self.analyzer.pca_transformed_data.shape}")
    return self.analyzer.pca_transformed_data

  # Method to check if samples are getting lighter
  def check_lightening_trend(self):
    print("\nChecking if samples are getting lighter...")
    if not self.analyzer.dominant_colors or len(self.analyzer.dominant_colors) < 2:
      print("Not enough samples to determine lightening trend.")
      return False

    # Calculate perceived brightness for each color (luminance formula)
    # https://contrastchecker.online/color-relative-luminance-calculator
    brightness_values = [
        (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255
        for r, g, b in self.analyzer.dominant_colors
    ]

    is_getting_lighter = True
    for i in range(len(brightness_values) - 1):
      # If current sample is NOT lighter (or equal) than the next, the trend is broken
      if brightness_values[i] >= brightness_values[i+1]:
        is_getting_lighter = False
        break

    if is_getting_lighter:
      print("Samples are consistently getting lighter from sample 1 to sample 9.")
    else:
      print("Samples are NOT consistently getting lighter from sample 1 to sample 9.")
    return is_getting_lighter

# **Dark Sand Sample**

# Processing Data
This section concentrated on analyzing and visualizing the sample. There are 5 steps of analyzing and 4 visuals that need to be done, which are:
1.   Make a color pallete to visualize the difference in every sample
2.   Transform 3-Dimension RGB into 2-Dimension with Principle Component Analysis
3.   RGB coordinate of every sample
4.   Visualize 2D RBG

In [None]:
#Accessing image of Dark Sand samples that soon to be extracted.
image_files = [f"Dark_T{i}.png" for i in range(0,9)]

In [None]:
# Create an instance of ColorAnalyzer and perform operations
analyzer = ColorAnalyzer(image_files)
dom_color = analyzer.extract_dominant_colors()

# Create an instance of ProcessingData
processor = ProcessingData(analyzer)

# Call the methods on the ProcessingData instance
duplicates = processor.check_duplicate_colors()
processor.display_dominant_colors()

# Check for lightening trend
is_lighter_trend = processor.check_lightening_trend()

# Visualizing Data

In [None]:
# Code to make color palletes

#Set the number of grids
num_column = 9 #Setting the number of columns
num_rows = (len(dom_color) + num_column - 1) // num_column #Set the number of rows dynamically

#Making palletes grid
fig, ax = plt.subplots(figsize=(12, 5))

# Remove axes
ax.set_axis_off()

for i, color in enumerate(dom_color):
    row = i // num_column
    col = i % num_column

    # Calculate position for each color block
    rect_x = col / num_column
    rect_y = 1 - (row + 1) / num_rows
    rect_width = 1 / num_column
    rect_height = 1 / num_rows

    # Create a rectangle for the color
    rect = plt.Rectangle((rect_x, rect_y), rect_width, rect_height, facecolor=[c/255 for c in color])
    ax.add_patch(rect)

    # Calculate perceived brightness for the current color
    r, g, b = color
    # Using the ITU-R BT.709 standard for luminance
    brightness = (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255
    text_color = 'white' if brightness < 0.5 else 'black' # Choose text color based on brightness

    # Add text label (optional: image number or RGB value)
    ax.text(rect_x + rect_width / 2, rect_y + rect_height / 2, f'T{i}',
            color=text_color, ha='center', va='center', fontsize=8)

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.title('Dominant Color Palettes for Darker Sand Sample')
plt.show()

In [None]:
processor.perform_pca()
print("PCA Transformed Data (2D RGB):")
print(analyzer.pca_transformed_data)

In [None]:
plt.figure(figsize=(10, 8)) # Increased figure size for higher resolution

# Calculate perceived brightness for each color (luminance formula)
brightness_values = [
    (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255
    for r, g, b in analyzer.dominant_colors
]

for i, (x, y) in enumerate(analyzer.pca_transformed_data):
    original_color = analyzer.dominant_colors[i]
    # Calculate perceived brightness for the current color
    r, g, b = original_color # Corrected: Use original_color from the loop
    # Using the ITU-R BT.709 standard for luminance
    brightness = (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255
    text_color = 'white' if brightness < 0.5 else 'black' # Choose text color based on brightness
    plt.scatter(x, y, color=[c/255 for c in original_color], s=350, label=f'T{i}')
    plt.annotate(f'T{i}', (x, y), color=text_color, textcoords="offset points", xytext=(0, 3), ha='center', va='top', fontsize=9)

plt.xlabel('Primary sample color')
plt.ylabel('Secondary sample color')
plt.title('RGB Coordinate of each sample with PCA')
plt.grid(True)
plt.show()

# **Light Sand Sample**

# Processing Data
This section concentrated on analyzing and visualizing the sample. There are 5 steps of analyzing and 4 visuals that need to be done, which are:
1.   Make a color pallete to visualize the difference in every sample
2.   Transform 3-Dimension RGB into 2-Dimension with Principle Component Analysis
3.   RGB coordinate of every sample
4.   Visualize 2D RBG

In [None]:
#Accessing image of Light Sand samples that soon to be extracted.
image_files = [f"Light_T{i}.png" for i in range(0,9)]

In [None]:
# Create an instance of ColorAnalyzer and perform operations
analyzer = ColorAnalyzer(image_files)
dom_color = analyzer.extract_dominant_colors()

# Create an instance of ProcessingData
processor = ProcessingData(analyzer)

# Call the methods on the ProcessingData instance
duplicates = processor.check_duplicate_colors()
processor.display_dominant_colors()

# Check for lightening trend
is_lighter_trend = processor.check_lightening_trend()

# Visualizing Data

In [None]:
# Code to make color palletes

#Set the number of grids
num_column = 9 #Setting the number of columns
num_rows = (len(dom_color) + num_column - 1) // num_column #Set the number of rows dynamically

#Making palletes grid
fig, ax = plt.subplots(figsize=(12, 5))

# Remove axes
ax.set_axis_off()

for i, color in enumerate(dom_color):
    row = i // num_column
    col = i % num_column

    # Calculate position for each color block
    rect_x = col / num_column
    rect_y = 1 - (row + 1) / num_rows
    rect_width = 1 / num_column
    rect_height = 1 / num_rows

    # Create a rectangle for the color
    rect = plt.Rectangle((rect_x, rect_y), rect_width, rect_height, facecolor=[c/255 for c in color])
    ax.add_patch(rect)

    # Calculate perceived brightness for the current color
    r, g, b = color
    # Using the ITU-R BT.709 standard for luminance
    brightness = (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255
    text_color = 'white' if brightness < 0.5 else 'black' # Choose text color based on brightness

    # Add text label (optional: image number or RGB value)
    ax.text(rect_x + rect_width / 2, rect_y + rect_height / 2, f'T{i}',
            color=text_color, ha='center', va='center', fontsize=8)

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.title('Dominant Color Palettes for Darker Sand Sample')
plt.show()

In [None]:
processor.perform_pca()
print("PCA Transformed Data (2D RGB):")
print(analyzer.pca_transformed_data)

In [None]:
plt.figure(figsize=(10, 8)) # Increased figure size for higher resolution

# Calculate perceived brightness for each color (luminance formula)
brightness_values = [
    (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255
    for r, g, b in analyzer.dominant_colors
]

for i, (x, y) in enumerate(analyzer.pca_transformed_data):
    original_color = analyzer.dominant_colors[i]
    # Calculate perceived brightness for the current color
    r, g, b = original_color # Corrected: Use original_color from the loop
    # Using the ITU-R BT.709 standard for luminance
    brightness = (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255
    text_color = 'white' if brightness < 0.5 else 'black' # Choose text color based on brightness
    plt.scatter(x, y, color=[c/255 for c in original_color], s=350, label=f'T{i}')
    plt.annotate(f'T{i}', (x, y), color=text_color, textcoords="offset points", xytext=(0, 3), ha='center', va='top', fontsize=9)

plt.xlabel('Primary sample color')
plt.ylabel('Secondary sample color')
plt.title('RGB Coordinate of each sample with PCA')
plt.grid(True)
plt.show()