### **Imports**

In [1]:
from histograms import *
from utility import *
from scipy.stats import wasserstein_distance
import seaborn as sns

In [2]:
seed_everything(42)

In [3]:
input_path = '/home/anto/University/Driving-Visual-Attention/data/images_aligned'

## **Data Analysis**

Extract 20 random images, one for each driver and perform a color distribution analysis.

In [12]:
# Extract 20 random paths, one for each driver
def extract_drivers_photos(main_folder, display = False):
    
    # List of dictionaries to store paths
    paths = []
    # Iterate over subdirectories in the main directory
    for driver in os.listdir(main_folder):
        seed_everything(42)
        driver_view_path = os.path.join(main_folder, driver, 'driver_view')
        samples = [folder for folder in os.listdir(driver_view_path) if os.path.isdir(os.path.join(driver_view_path, folder))] 
        random.shuffle(samples)
        random_sample = samples[1]
        image_files = [file for file in os.listdir(os.path.join(driver_view_path, random_sample)) if os.path.isdir(os.path.join(driver_view_path, random_sample))]
        random.shuffle(image_files)
        image_path = image_files[1]
        driver_dict = {'driver_num': driver, 'img_path': os.path.join(driver_view_path, random_sample, image_path)}
        if display:
            print(driver_dict['driver_num'], driver_dict['img_path'])
        paths.append(driver_dict)
        seed_everything(42)
    return paths

drivers_paths = extract_drivers_photos(input_path, display=True)

driver20 /home/anto/University/Driving-Visual-Attention/data/images_aligned/driver20/driver_view/sample105/frame_0011.jpg
driver22 /home/anto/University/Driving-Visual-Attention/data/images_aligned/driver22/driver_view/sample105/frame_0118.jpg
driver21 /home/anto/University/Driving-Visual-Attention/data/images_aligned/driver21/driver_view/sample105/frame_0080.jpg
driver3 /home/anto/University/Driving-Visual-Attention/data/images_aligned/driver3/driver_view/sample105/frame_0108.jpg
driver13 /home/anto/University/Driving-Visual-Attention/data/images_aligned/driver13/driver_view/sample105/frame_0128.jpg
driver23 /home/anto/University/Driving-Visual-Attention/data/images_aligned/driver23/driver_view/sample105/frame_0174.jpg
driver5 /home/anto/University/Driving-Visual-Attention/data/images_aligned/driver5/driver_view/sample105/frame_0056.jpg
driver8 /home/anto/University/Driving-Visual-Attention/data/images_aligned/driver8/driver_view/sample105/frame_0118.jpg
driver15 /home/anto/University

Calculate the histograms of one image. Using 2 different approaches:
- Compute 3 different histograms, one for Red, Green, Blue channel.
- Transform into HSV and compute a 2D histogram using only Hue and Saturation channels:
  - **Hue:** Represents the type of color, often described as the dominant wavelength of light. It is measured in degrees on a color wheel, with values ranging from 0 to 359. The 0 and 360 degrees correspond to red, while the other hues are distributed around the color wheel.
    - Red has a hue value around 0 or 360 degrees.
    - Green has a hue value around 120 degrees.
    - Blue has a hue value around 240 degrees.
  - **Saturation:** Refers to the intensity or vividness of a color. It quantifies the amount of white light mixed with a pure hue. A saturation value of 0 represents a shade of gray, and a value of 1 (or 100%) represents a fully saturated color.


Compute the color distribution aka find the colors in an image and plot the distribution.

Choose a random driver

In [None]:
driver_number = 1
random_image_path = drivers_paths[driver_number]['img_path']

Plot Histograms

In [None]:
r,g,b = RGB_histograms(random_image_path,choose_dim='1D',display = True)

In [None]:
hist_3D = RGB_histograms(random_image_path,choose_dim='3D',display = True)

In [None]:
hsv_hist_2D = HSV_histogram(random_image_path,choose_dim='3D',display = True)

In [None]:
hist_2d = HSV_histogram(random_image_path,choose_dim='2D',display = True)

In [None]:
h_hist, s_hist, v_hist = HSV_histogram(random_image_path,choose_dim='1D',display = True)

In [None]:
color_dist = color_distribution(random_image_path, display = True)

Calculate the histograms for all 20 images and compare the distance metrics.
We write different functions that calculate the different metrics of 2 histograms.

In [None]:
def retrieve_2_histograms(image_path1, image_path2, dimension, hist_type):
    if dimension not in ["1D", "2D", "3D"]:
        raise ValueError("Expected dimension must be '1D' or '2D' or '3D")

    if hist_type == 'RGB':
        if dimension == '1D':
            r1, g1, b1 = RGB_histograms(image_path1, choose_dim=dimension)
            r2, g2, b2 = RGB_histograms(image_path2, choose_dim=dimension)
            return [r1, g1, b1, r2, g2, b2]
        elif dimension == '3D':
            rgb_3d_1 = RGB_histograms(image_path1, choose_dim=dimension)
            rgb_3d_2 = RGB_histograms(image_path2, choose_dim=dimension)
            return [rgb_3d_1, rgb_3d_2]
    elif hist_type == 'HSV':
        if dimension == '1D':
            h1, s1, v1 = HSV_histogram(image_path1, choose_dim=dimension)
            h2, s2, v2 = HSV_histogram(image_path2, choose_dim=dimension)
            return [h1, s1, v1, h2, s2, v2]
        elif dimension == '2D':
            hs_1 = HSV_histogram(image_path1, choose_dim=dimension)
            hs_2 = HSV_histogram(image_path2, choose_dim=dimension)
            return [hs_1, hs_2]
        elif dimension == '3D':
            hsv_3d_1 = HSV_histogram(image_path1, choose_dim=dimension)
            hsv_3d_2 = HSV_histogram(image_path2, choose_dim=dimension)
            return [hsv_3d_1, hsv_3d_2]
    else:
        raise ValueError("Invalid histogram type")


In [None]:
def EMD(input,do_print = False):
    if len(input) == 6:
        histograms = {}
        distances = {}
        histograms['image_a'] = {'channel_1': input[0], 'channel_2': input[1], 'channel_3': input[2]}
        histograms['image_b'] = {'channel_1': input[3], 'channel_2': input[4], 'channel_3': input[5]}

        bins_1 = np.arange(len(input[0]))
        bins_2 = np.arange(len(input[1]))
        bins_3 = np.arange(len(input[2]))

        distances['channel_1'] = wasserstein_distance(bins_1,bins_1,histograms['image_a']['channel_1'], histograms['image_b']['channel_1'])
        distances['channel_2'] = wasserstein_distance(bins_2,bins_2,histograms['image_a']['channel_2'], histograms['image_b']['channel_2'])
        distances['channel_3'] = wasserstein_distance(bins_3,bins_3,histograms['image_a']['channel_3'], histograms['image_b']['channel_3'])
        distances['mean'] =  np.mean([distances['channel_1'], distances['channel_2'], distances['channel_3']])

        if do_print:
            print("Wasserstein Distances:", distances)
        return distances

    elif len(input) == 2:
        hist_img_a = input[0]
        hist_img_b = input[1]

        # Wassertein Distance, we need to specify the number of bins
        bins = np.arange(len(hist_img_a))
        wa_distance = wasserstein_distance(bins,bins,hist_img_a, hist_img_b)

        if do_print:
            print("Wasserstein Distance:", wa_distance)
        return wa_distance

    else:
        raise ValueError("Wrong number of histograms inserted")

def Chi_S(input,do_print = False):
    if len(input) == 6:
        histograms = {}
        distances = {}
        histograms['image_a'] = {'channel_1': input[0], 'channel_2': input[1], 'channel_3': input[2]}
        histograms['image_b'] = {'channel_1': input[3], 'channel_2': input[4], 'channel_3': input[5]}

        bins_1 = np.arange(len(input[0]))
        bins_2 = np.arange(len(input[1]))
        bins_3 = np.arange(len(input[2]))

        distances['channel_1'] = cv2.compareHist(histograms['image_a']['channel_1'], histograms['image_b']['channel_1'], cv2.HISTCMP_CHISQR)
        distances['channel_2'] = cv2.compareHist(histograms['image_a']['channel_2'], histograms['image_b']['channel_2'], cv2.HISTCMP_CHISQR)
        distances['channel_3'] = cv2.compareHist(histograms['image_a']['channel_3'], histograms['image_b']['channel_3'], cv2.HISTCMP_CHISQR)
        distances['mean'] = np.mean([distances['channel_1'], distances['channel_2'], distances['channel_3']])

        if do_print:
            print("Chi-Squared Distances:", distances)
        return distances

    elif len(input) == 2:
        hist_img_a = input[0]
        hist_img_b = input[1]

        # Chi-Squared Distance
        chi_squared_distance = cv2.compareHist(hist_img_a, hist_img_b, cv2.HISTCMP_CHISQR)
        if do_print:
            print("Chi-Squared Distance:", chi_squared_distance)
        return chi_squared_distance

    else:
        raise ValueError("Wrong number of histograms inserted")

def KL(input,do_print = False):
    if len(input) == 6:
        histograms = {}
        distances = {}
        histograms['image_a'] = {'channel_1': input[0], 'channel_2': input[1], 'channel_3': input[2]}
        histograms['image_b'] = {'channel_1': input[3], 'channel_2': input[4], 'channel_3': input[5]}

        bins_1 = np.arange(len(input[0]))
        bins_2 = np.arange(len(input[1]))
        bins_3 = np.arange(len(input[2]))

        distances['channel_1'] = cv2.compareHist(histograms['image_a']['channel_1'], histograms['image_b']['channel_1'], cv2.HISTCMP_KL_DIV)
        distances['channel_2'] = cv2.compareHist(histograms['image_a']['channel_2'], histograms['image_b']['channel_2'], cv2.HISTCMP_KL_DIV)
        distances['channel_3'] = cv2.compareHist(histograms['image_a']['channel_3'], histograms['image_b']['channel_3'], cv2.HISTCMP_KL_DIV)
        distances['mean'] = np.mean([distances['channel_1'], distances['channel_2'], distances['channel_3']])

        if do_print:
            print("Kullback-Leibler Divergences:", distances)
        return distances

    elif len(input) == 2:
        hist_img_a = input[0]
        hist_img_b = input[1]

        # Kullback-Leibler Divergence
        kl_divergence = cv2.compareHist(hist_img_a, hist_img_b, cv2.HISTCMP_KL_DIV)
        if do_print:
            print("Kullback-Leibler Divergence:", kl_divergence)
        return kl_divergence

    else:
        raise ValueError("Wrong number of histograms inserted")


In [None]:
output = retrieve_2_histograms(random_image_path,random_image_path,'1D','RGB')
distance = Chi_S(output,do_print=True)

In [None]:
def plot_matrix(matrix, title, figsize=(8, 6)):
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(matrix, cmap='viridis', annot=True, fmt=".0f", xticklabels=False, yticklabels=False, annot_kws={"size": 10}, ax=ax)
    plt.title(title)
    plt.show()
    return


def compare_images(dict_image_paths, dimension, hist_type):
    num_images = len(dict_image_paths)
    # Create a matrix to store the comparison results
    earth_mover_matrix = np.zeros((num_images, num_images))
    chi_squared_matrix = np.zeros((num_images, num_images))
    kl_divergence_matrix = np.zeros((num_images, num_images))

    for i in range(num_images - 1):
        for j in range(i + 1, num_images):
            # Calculate histograms for the current pair of images
            output_histograms = retrieve_2_histograms(dict_image_paths[i]['img_path'],dict_image_paths[j]['img_path'], dimension=dimension, hist_type= hist_type)
            symmetric_histograms = retrieve_2_histograms(dict_image_paths[j]['img_path'],dict_image_paths[i]['img_path'], dimension=dimension, hist_type= hist_type)

            # Compare histograms using the three distance metrics
            earth_mover_distance = EMD(output_histograms)
            chi_squared_distance = Chi_S(output_histograms)
            kl_divergence = KL(output_histograms)
            # Compute the Symetrics
            chi_s_symm = Chi_S(symmetric_histograms)
            kl_symm = KL(symmetric_histograms)
            
            if dimension == '3D' or dimension == '2D':
                earth_mover_matrix[i, j] = earth_mover_distance
                chi_squared_matrix[i, j] = chi_squared_distance
                kl_divergence_matrix[i, j] = kl_divergence
                #KL  and Chi-Squared are not symmetric
                chi_squared_matrix[j, i] = chi_s_symm
                kl_divergence_matrix[j, i] = kl_symm

            elif dimension == '1D':
                earth_mover_matrix[i, j] = earth_mover_distance['mean']
                chi_squared_matrix[i, j] = chi_squared_distance['mean']
                kl_divergence_matrix[i, j] = kl_divergence['mean']
                #KL  and Chi-Squared are not symmetric
                chi_squared_matrix[j, i] = chi_s_symm['mean']
                kl_divergence_matrix[j, i] = kl_symm['mean']
            else:
                raise ValueError("Wrong number of dimensions")


    # Plot the matrices using seaborn
    plot_matrix(earth_mover_matrix, "Earth Mover's Distance Matrix")
    plot_matrix(chi_squared_matrix, "Chi-Squared Distance Matrix")
    plot_matrix(kl_divergence_matrix, "Kullback-Leibler Divergence Matrix")
    return


In [None]:
compare_images(drivers_paths,'3D','RGB')

In [None]:
compare_images(drivers_paths,'1D','RGB')

In [None]:
compare_images(drivers_paths,'1D','HSV')

In [None]:
compare_images(drivers_paths,'2D','HSV')

In [None]:
compare_images(drivers_paths,'3D','HSV')