In [1]:
import os
import glob
import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from scipy.signal import find_peaks

encompass_images = glob.glob("./data/encompass/*/*.jpg")
camp_images = glob.glob("./data/camp/*/*.jpg")
print(f"encompass: {len(encompass_images)}\tcamp: {len(camp_images)}")

encompass: 21588	camp: 10273


In [9]:
def analyze_image(image):
    src = cv2.imread(image)
    rgb = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
    hsv = cv2.cvtColor(src, cv2.COLOR_BGR2HSV)
    lab = cv2.cvtColor(src, cv2.COLOR_BGR2LAB)

    r, g, b = cv2.split(rgb)
    h, s, v = cv2.split(hsv)
    l, a, b = cv2.split(lab)

    hist = cv2.calcHist(h, [0], None, [180], [0, 180])
    reshaped_hist = hist.reshape(-1)
    peaks, _ = find_peaks(reshaped_hist, height=0)

    mean_r, var_r = np.mean(r), np.var(r)
    mean_g, var_g = np.mean(g), np.var(g)
    mean_b, var_b = np.mean(b), np.var(b)

    h_peaks = len(peaks)
    mean_s, var_s = np.mean(s), np.var(s)
    mean_v, var_v = np.mean(v), np.var(v)
    
    contrast = calc_contrast(l)
    
    return [mean_r, var_r, mean_g, var_g, mean_b, var_b, h_peaks, mean_s, var_s, mean_v, var_v, contrast]    


def calc_contrast(l):
    kernel = np.ones((5,5),np.uint8)
    
    min_value = cv2.erode(l, kernel, iterations=1)
    max_value = cv2.dilate(l, kernel, iterations=1)

    min_value = min_value.astype(np.float64) 
    max_value = max_value.astype(np.float64) 

    contrast = (max_value - min_value) / (max_value + min_value)

    return np.mean(contrast)
    

def append_results(df, col_names, meta, result):
    dataset, ID, number, fname = meta
    mean_r, var_r, mean_g, var_g, mean_b, var_b, h_peaks, mean_s, var_s, mean_v, var_v, contrast = result
    
    df = df.append({col_names[0]: dataset, col_names[1]: ID, col_names[2]: number, col_names[3]: fname,
                    col_names[4]: mean_r, col_names[5]: var_r, 
                    col_names[6]: mean_g, col_names[7]: var_g, 
                    col_names[8]: mean_b, col_names[9]: var_b, 
                    col_names[10]: h_peaks, 
                    col_names[11]: mean_s, col_names[12]: var_s, 
                    col_names[13]: mean_v, col_names[14]: var_v, 
                    col_names[15]: contrast}, ignore_index=True)

    return df

In [None]:
col_names = ["dataset", "ID", "number", "fname", 
             "mean_r", "var_r", "mean_g", "var_g", "mean_b", "var_b", 
             "h_peaks", "mean_s", "var_s", "mean_v", "var_v", "contrast"]

df = pd.DataFrame([], columns=col_names)

for index, image in enumerate(tqdm(encompass_images)):
    meta_ = image.split("/")
    meta = [meta_[1], meta_[2], int(index), image]
    result = analyze_image(image)
    df = append_results(df, col_names, meta, result)
    
print(df.shape)

  0%|          | 0/21588 [00:00<?, ?it/s]

  contrast = (max_value - min_value) / (max_value + min_value)


In [None]:
for index, image in enumerate(tqdm(camp_images)):
    meta_ = image.split("/")
    meta = [meta_[1], meta_[2], int(index), image]
    result = analyze_image(image)
    df = append_results(df, col_names, meta, result)
    
print(df.shape)