In [None]:
import cv2
import numpy as np
import pandas as pd
import os
from skimage import exposure
from tqdm import tqdm

In [None]:
BASE_PATH = ""

train_csv_path = os.path.join(BASE_PATH, "Dataset/Index/Train.csv")
train_df = pd.read_csv(train_csv_path)

In [None]:
def compute_image_stats(image_path):
    image = cv2.imread(image_path)
    if image is None:
        return None, None

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Compute brightness (mean pixel value)
    brightness = np.mean(gray)

    # Compute histogram (normalized)
    hist = cv2.calcHist([gray], [0], None, [256], [0, 256]).flatten()
    hist /= hist.sum()  # Normalize histogram

    return brightness, hist

In [None]:
brightness_values = []
histograms = np.zeros((256,))

num_images = min(50000, len(train_df))
image_paths = train_df['filepath'][:num_images].tolist()

image_paths = [os.path.join(BASE_PATH, p) for p in image_paths]

In [None]:
for image_path in tqdm(image_paths, desc="Processing images"):
    brightness, hist = compute_image_stats(image_path)
    if brightness is not None:
        brightness_values.append(brightness)
        histograms += hist

In [None]:
mean_brightness = np.mean(brightness_values)
mean_histogram = histograms / len(brightness_values)

stats_df = pd.DataFrame({
    "mean_brightness": [mean_brightness],
    "histogram": [list(mean_histogram)]
})

stats_save_path = os.path.join(BASE_PATH, "morphii_train_stats.csv")
stats_df.to_csv(stats_save_path, index=False)

print(f"Saved statistics to {stats_save_path}")