In [None]:
import os
import glob
import cv2
import numpy as np
import pandas as pd
from scipy.stats import kurtosis
from concurrent.futures import ProcessPoolExecutor
import matplotlib.pyplot as plt

# ─── Part 1: Batch Compute & Save Stats ───────────────────────────────────────

def load_yolo_boxes(txt_path, img_w, img_h):
    boxes = []
    with open(txt_path, 'r') as f:
        for line in f:
            cls, xc, yc, w, h = map(float, line.split())
            x1 = int((xc - w/2) * img_w)
            x2 = int((xc + w/2) * img_w)
            y1 = int((yc - h/2) * img_h)
            y2 = int((yc + h/2) * img_h)
            x1, x2 = np.clip([x1, x2], 0, img_w-1)
            y1, y2 = np.clip([y1, y2], 0, img_h-1)
            boxes.append((int(x1), int(y1), int(x2), int(y2), int(cls)))
    return boxes

def compute_stats(img, mask):
    vals = img[mask].ravel()
    return vals.mean(), kurtosis(vals, fisher=False, bias=False)

def process_image(img_path):
    stem, _ = os.path.splitext(img_path)
    txt_path = stem + '.txt'
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None or not os.path.exists(txt_path):
        return []
    h, w = img.shape[:2]
    boxes = load_yolo_boxes(txt_path, w, h)

    outside_mask = np.ones((h, w), bool)
    rows = []
    for idx, (x1, y1, x2, y2, cls) in enumerate(boxes):
        mask = np.zeros((h, w), bool)
        mask[y1:y2, x1:x2] = True
        mean_inside, kurt_inside = compute_stats(img, mask)
        outside_mask[y1:y2, x1:x2] = False
        rows.append({
            'image': os.path.basename(img_path),
            'box_idx': idx,
            'class': cls,
            'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2,
            'box_mean': mean_inside,
            'box_kurt': kurt_inside
        })

    # outside stats
    mean_out, kurt_out = compute_stats(img, outside_mask)
    for row in rows:
        row['outside_mean'] = mean_out
        row['outside_kurt'] = kurt_out

    return rows

def batch_compute(img_dir, out_csv):
    all_rows = []
    img_paths = glob.glob(os.path.join(img_dir, '*.*'))
    with ProcessPoolExecutor() as executor:
        for rows in executor.map(process_image, img_paths):
            all_rows.extend(rows)
    df = pd.DataFrame(all_rows)
    df.to_csv(out_csv, index=False)
    return df

# Run batch computation
csv_path = '/mnt/data/all_box_stats.csv'
df_stats = batch_compute('path/to/your/images', csv_path)

# Display a preview of the results
import ace_tools as tools; tools.display_dataframe_to_user(name="Box Statistics Preview", dataframe=df_stats.head())

# ─── Part 2: Global Visualization ─────────────────────────────────────────────

df = pd.read_csv(csv_path)

# 1) Histogram of box means vs outside
plt.figure(figsize=(8, 4))
plt.hist(df['box_mean'], bins=50, alpha=0.7, label='Inside Boxes')
plt.axvline(df['outside_mean'].mean(), linestyle='--', label='Avg Outside Mean')
plt.title("Distribution of Box Means")
plt.xlabel("Mean Pixel Value")
plt.ylabel("Count")
plt.legend()
plt.tight_layout()
plt.show()

# 2) Boxplot of kurtosis by class
plt.figure(figsize=(8, 4))
df.boxplot(column='box_kurt', by='class')
plt.suptitle("")
plt.title("Box Kurtosis by Class")
plt.xlabel("Class")
plt.ylabel("Kurtosis")
plt.tight_layout()
plt.show()

# 3) Scatter of mean vs kurtosis colored by class
plt.figure(figsize=(6, 6))
for cls in sorted(df['class'].unique()):
    sub = df[df['class'] == cls]
    plt.scatter(sub['box_mean'], sub['box_kurt'], label=f'class {int(cls)}')
plt.title("Box Mean vs Box Kurtosis")
plt.xlabel("Mean")
plt.ylabel("Kurtosis")
plt.legend()
plt.tight_layout()
plt.show()

