In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.ndimage import binary_fill_holes, label, generate_binary_structure, convolve
from scipy.stats import linregress


In [18]:
# --- CONFIG ---
LATTICE_SIZE = 20000
FILL_PROB = 0.405
AREA_THRESHOLD = 1000
RUNS = 1
SLICE_FRACS = np.linspace(0.001, 0.999, 100)
BUCKETS = [(1000, 5000), (5000, 10000), (10000, 20000), (20000, np.inf)]

# --- STRUCTURE ---
convolution_structure = generate_binary_structure(rank=2, connectivity=1)

In [19]:
# --- HELPERS ---
def generate_lattice(width, height, prob):
    return np.random.rand(height, width) < prob

def filter_features_over_threshold(labeled_features, num_features, threshold):
    counts = np.bincount(labeled_features.ravel())
    valid = counts > threshold
    valid[0] = False
    mask = valid[labeled_features]
    filtered_labels, new_count = label(mask, structure=convolution_structure)
    return filtered_labels, new_count

def count_edge_contacts(coords, shape):
    rows, cols = coords[:, 0], coords[:, 1]
    height, width = shape
    return np.array([
        np.sum(cols == 0),
        np.sum(rows == 0),
        np.sum(cols == width - 1),
        np.sum(rows == height - 1)
    ])

def interpret_edge_touch(edge_counts):
    left, top, right, bottom = edge_counts > 0
    num_touched = np.count_nonzero(edge_counts)
    if num_touched == 0:
        return 0
    elif num_touched == 1:
        return 1
    elif num_touched == 2:
        if (left and right) or (top and bottom):
            return -1
        else:
            return 2
    else:
        return -1

def crop_to_bounding_box(mask):
    rows, cols = np.where(mask)
    if rows.size == 0:
        return np.zeros((0, 0), dtype=bool)
    return mask[rows.min():rows.max()+1, cols.min():cols.max()+1]

def compute_area(mask):
    return np.count_nonzero(mask)

def compute_perimeter(mask):
    kernel = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])
    neighbors = convolve(mask.astype(np.uint8), kernel, mode='constant', cval=0)
    return np.sum(mask * (4 - neighbors))

def compute_loglog_stats(areas, perims):
    if len(areas) < 2 or len(perims) < 2:
        return None
    log_area = np.log(areas)
    log_perim = np.log(perims)
    slope, _, r_value, _, _ = linregress(log_area, log_perim)
    D = 2 * slope
    return D

def get_bucket(area):
    for low, high in BUCKETS:
        if low <= area < high:
            return f"{int(low)}-{int(high) if np.isfinite(high) else 'inf'}"
    return None

In [20]:
import matplotlib.pyplot as plt

def visualize_cloud_mirroring(cropped, sliced, mirrored, slice_frac, cloud_id=None):
    """
    Show cropped cloud, the sliced half, and the mirrored result.
    Optionally show the shared edge used in the correction.
    """
    fig, axs = plt.subplots(1, 3, figsize=(15, 5))

    axs[0].imshow(cropped, cmap='gray')
    axs[0].set_title(f"Original Cloud\n{cropped.shape}")

    axs[1].imshow(sliced, cmap='gray')
    axs[1].set_title(f"Sliced (frac={slice_frac:.3f})\n{sliced.shape}")

    axs[2].imshow(mirrored, cmap='gray')
    axs[2].set_title(f"Mirrored Cloud\n{mirrored.shape}")

    for ax in axs:
        ax.axis('off')

    if cloud_id is not None:
        fig.suptitle(f"Cloud ID {cloud_id}", fontsize=14)

    plt.tight_layout()
    plt.show()

def visualize_shared_edge(sliced):
    """
    Show which pixels were counted as the shared edge in slicing.
    Useful for confirming shared_edge_len logic.
    """
    shared_edge_mask = np.zeros_like(sliced, dtype=np.uint8)
    shared_edge_mask[:, 0] = sliced[:, 0]  # highlight first column

    plt.figure(figsize=(4, 4))
    plt.imshow(shared_edge_mask, cmap='Reds')
    plt.title("Shared Edge Pixels (Exposed Cut Column)")
    plt.axis('off')
    plt.tight_layout()
    plt.show()

In [21]:
# --- MAIN EXPERIMENT ---
results_by_bucket = {
    f"{int(low)}-{int(high) if np.isfinite(high) else 'inf'}": {frac: [] for frac in SLICE_FRACS}
    for (low, high) in BUCKETS
}
true_stats_by_bucket = {
    f"{int(low)}-{int(high) if np.isfinite(high) else 'inf'}": []
    for (low, high) in BUCKETS
}

for run in tqdm(range(RUNS), desc="Running experiments"):
    lattice = generate_lattice(LATTICE_SIZE, LATTICE_SIZE, FILL_PROB)
    filled = binary_fill_holes(lattice, structure=convolution_structure)
    labeled, num_features = label(filled, structure=convolution_structure)
    labeled, num_features = filter_features_over_threshold(labeled, num_features, AREA_THRESHOLD)

    for label_id in range(1, num_features + 1):
        mask = (labeled == label_id)
        coords = np.argwhere(mask)
        if interpret_edge_touch(count_edge_contacts(coords, mask.shape)) != 0:
            continue

        cropped = crop_to_bounding_box(mask)
        area = compute_area(cropped)
        perim = compute_perimeter(cropped)
        bucket = get_bucket(area)
        if bucket is None:
            continue

        true_stats_by_bucket[bucket].append((area, perim))

        h, w = cropped.shape
        for frac in SLICE_FRACS:
            cut_col = int(w * frac)
            if cut_col < 2 or cut_col >= w - 1:
                continue
            sliced = cropped[:, cut_col:]
            if np.count_nonzero(sliced) == 0:
                continue

            shared_edge_len = np.count_nonzero(sliced[:, 0])
            area_m = 2 * compute_area(sliced)
            perim_m = 2 * (compute_perimeter(sliced) - shared_edge_len)

            results_by_bucket[bucket][frac].append((area_m, perim_m))


# --- ANALYSIS, CSV EXPORT & PLOT ---
all_rows = []
for bucket in results_by_bucket:
    if len(true_stats_by_bucket[bucket]) < 2:
        continue

    areas_true, perims_true = zip(*true_stats_by_bucket[bucket])
    D_true = compute_loglog_stats(np.array(areas_true), np.array(perims_true))

    delta_Ds = []
    slice_fracs_valid = []

    for frac in SLICE_FRACS:
        if len(results_by_bucket[bucket][frac]) < 2:
            continue
        areas_m, perims_m = zip(*results_by_bucket[bucket][frac])
        D_mirror = compute_loglog_stats(np.array(areas_m), np.array(perims_m))
        if D_mirror is not None:
            delta_D = D_mirror - D_true
            delta_Ds.append(delta_D)
            slice_fracs_valid.append(frac)
            all_rows.append({
                "bucket": bucket,
                "slice_frac": frac,
                "D_true": D_true,
                "D_mirror": D_mirror,
                "delta_D": delta_D
            })

    if delta_Ds:
        plt.plot(slice_fracs_valid, delta_Ds, label=f"Bucket {bucket}")

# --- Save to CSV ---
df_results = pd.DataFrame(all_rows)
df_results.to_csv("delta_D_vs_slice_fraction.csv", index=False)

# --- Final Plot ---
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel("Slice Fraction")
plt.ylabel("ΔD (D_mirror - D_true)")
plt.title("ΔD vs Slice Fraction per Bucket")
plt.legend()
plt.grid(True, linestyle='--')
plt.tight_layout()
plt.show()


# --- Per-Bucket: D_mirror vs slice_frac with D_true line ---
for bucket in df_results['bucket'].unique():
    bucket_df = df_results[df_results['bucket'] == bucket]
    if bucket_df.empty:
        continue

    plt.figure(figsize=(10, 5))
    plt.plot(bucket_df['slice_frac'], bucket_df['D_mirror'], 'o-', label='D_mirror')
    plt.axhline(bucket_df['D_true'].iloc[0], color='red', linestyle='--', label=f'D_true = {bucket_df["D_true"].iloc[0]:.4f}')
    plt.xlabel("Slice Fraction")
    plt.ylabel("D Value")
    plt.title(f"D_mirror vs Slice Fraction — Bucket {bucket}")
    plt.legend()
    plt.grid(True, linestyle='--')
    plt.tight_layout()
    plt.show()

Running experiments:   0%|          | 0/1 [21:58:34<?, ?it/s]


KeyboardInterrupt: 