# Processed Data � Refined Color Masks

Preview and analyze basil masks using the refined color segmentation (yellow label removed).

In [1]:
import sys
from pathlib import Path

def find_project_root(start: Path) -> Path:
    for path in [start] + list(start.parents):
        if (path / "src").is_dir():
            return path
    raise FileNotFoundError("Could not locate project root containing 'src'.")

PROJECT_ROOT = find_project_root(Path.cwd())
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

import cv2
import pandas as pd
from src.segmentation import compute_basil_metrics
import matplotlib.pyplot as plt

from src import basil_mask_color_refined, save_metrics_csv


In [2]:
IMAGE_DIR = PROJECT_ROOT / "processed_data"
IMAGE_PATHS = sorted(IMAGE_DIR.glob('*top.png'))
len(IMAGE_PATHS)


144

In [3]:
def preview_refined_color_masks(image_paths, *, down_long=1000, k_clusters=3, use_subset=None):
    paths = image_paths[:use_subset] if use_subset else image_paths
    for img_path in paths:
        bgr = cv2.imread(str(img_path))
        if bgr is None:
            print(f'Skip unreadable: {img_path}')
            continue
        mask = basil_mask_color_refined(bgr, down_long=down_long, k_clusters=k_clusters)
        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

        fig, axes = plt.subplots(1, 2, figsize=(12, 6))
        fig.suptitle(f"{img_path.name} (down_long={down_long}, k={k_clusters})")
        axes[0].imshow(rgb)
        axes[0].set_title('Original RGB')
        axes[0].axis('off')

        axes[1].imshow(mask, cmap='gray')
        axes[1].set_title('Refined Color Mask')
        axes[1].axis('off')

        plt.tight_layout()
        plt.show()


In [4]:
METRICS_DIR = PROJECT_ROOT / "processed_metrics_v2"
METRICS_DIR.mkdir(parents=True, exist_ok=True)

existing_csvs = sorted(METRICS_DIR.glob('*.csv'))
loaded_metrics = []
for csv_path in existing_csvs:
    df = pd.read_csv(csv_path)
    if 'image_id' in df.columns:
        df = df.set_index('image_id')
    else:
        df.index = [csv_path.stem]
    loaded_metrics.append(df)

if loaded_metrics:
    metrics_df = pd.concat(loaded_metrics)
    print(f"Loaded {len(loaded_metrics)} metric files from {METRICS_DIR}")
else:
    metrics_df = pd.DataFrame()
    print(f"No existing metrics found in {METRICS_DIR}. Run the next cell to compute them.")

metrics_df


No existing metrics found in c:\Users\Filip\604 proj. 4\processed_metrics_v2. Run the next cell to compute them.


In [9]:
import numpy as np

force_recompute = True  # Set True to rerun segmentation and refresh metrics

if force_recompute or metrics_df.empty:
    all_metrics = []
    for image_path in IMAGE_PATHS:
        try:
            bgr = cv2.imread(str(image_path))
            if bgr is None:
                print(f'Could not read {image_path.name}; skipping.')
                continue
            mask = basil_mask_color_refined(bgr, down_long=1000, k_clusters=3)
            rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
            metrics_series = compute_basil_metrics(rgb, (mask > 0).astype(np.uint8))
            metrics_series.name = image_path.stem
            csv_path = METRICS_DIR / f"{image_path.stem}.csv"
            save_metrics_csv(metrics_series, image_path.stem, csv_path)
            all_metrics.append(metrics_series)
        except Exception as exc:
            print(f"Skipping {image_path.name} due to error: {exc}")
            continue

    metrics_df = pd.DataFrame(all_metrics)
else:
    print('Using metrics loaded from disk; toggle force_recompute to refresh.')

metrics_df


Skipping exp2_day(4)_pot(6)_plant(7)_top.png due to error: Basil mask is empty; ensure segmentation succeeded before computing metrics.
Skipping exp2_day(4)_pot(6)_plant(8)_top.png due to error: Basil mask is empty; ensure segmentation succeeded before computing metrics.
Skipping exp2_day(5)_pot(4)_plant(1)_top.png due to error: Basil mask is empty; ensure segmentation succeeded before computing metrics.
Skipping exp2_day(5)_pot(5)_plant(8)_top.png due to error: Basil mask is empty; ensure segmentation succeeded before computing metrics.
Skipping exp2_day(5)_pot(6)_plant(7)_top.png due to error: Basil mask is empty; ensure segmentation succeeded before computing metrics.
Skipping exp2_day(5)_pot(6)_plant(8)_top.png due to error: Basil mask is empty; ensure segmentation succeeded before computing metrics.


Unnamed: 0,basil_pixels,coverage_fraction,mean_R,mean_G,mean_B,std_R,std_G,std_B,mean_hue_deg,mean_saturation,mean_value,std_saturation,std_value,mean_lab_L,mean_lab_a,mean_lab_b,freshness_index
exp2_day(0)_pot(4)_plant(1)_top,109030.0,0.008942,59.507246,76.000138,26.191424,15.519672,15.443391,12.757566,80.142034,169.792176,76.000523,25.982830,15.444815,76.025846,-15.373228,26.689232,0.343526
exp2_day(0)_pot(4)_plant(2)_top,559719.0,0.045906,58.145457,77.191702,17.290592,15.138522,14.921800,13.852947,79.234845,201.739564,77.192080,31.368152,14.924163,76.626002,-17.408371,31.064134,0.388619
exp2_day(0)_pot(4)_plant(3)_top,1344833.0,0.110298,56.329628,76.749793,17.343055,13.272863,13.500396,12.917219,80.663803,201.422032,76.749963,32.142623,13.501553,75.940216,-18.008661,30.708692,0.387988
exp2_day(0)_pot(4)_plant(4)_top,514540.0,0.042200,57.622039,75.073992,19.848813,17.276002,17.039222,16.114425,79.026979,193.588248,75.074301,35.550861,17.041031,74.648778,-16.297404,29.051224,0.375681
exp2_day(0)_pot(4)_plant(5)_top,232194.0,0.019044,50.917758,64.833075,24.189510,19.517823,18.431663,19.964581,80.345108,168.112940,64.834384,43.475163,18.436016,64.361771,-13.215458,22.215195,0.330421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
exp2_day(5)_pot(6)_plant(2)_top,29526.0,0.002422,48.749306,50.609497,16.572987,16.940785,17.053636,13.321671,63.066992,180.417463,50.745241,39.136636,17.039351,51.124263,-6.559202,20.096661,0.330747
exp2_day(5)_pot(6)_plant(3)_top,18965.0,0.001555,65.076193,66.694543,27.606591,22.884705,21.491871,20.477271,63.560032,161.961983,67.184287,50.883218,21.790030,69.176641,-7.038967,22.522225,0.323209
exp2_day(5)_pot(6)_plant(4)_top,252787.0,0.020733,57.905612,68.245740,13.105425,19.786693,17.407354,15.840638,71.470883,213.542156,68.317948,40.230046,17.524846,68.572597,-12.815256,29.336837,0.395820
exp2_day(5)_pot(6)_plant(5)_top,109970.0,0.009019,63.402201,66.855397,29.825707,27.380060,26.026862,27.028630,65.053269,160.187187,67.140566,56.543986,26.353437,68.585432,-7.760535,21.110803,0.322900


In [13]:
# save metrics_df using pandas
metrics_csv_path = "all_metrics_final.csv"
metrics_df.to_csv(metrics_csv_path)


In [10]:
def extract_day_pot_plant(name):
    parts = name.split('_')
    day = pot = plant = None
    for part in parts:
        if part.startswith('day('):
            day = int(part[4:-1])
        elif part.startswith('pot('):
            pot = int(part[4:-1])
        elif part.startswith('plant('):
            plant = int(part[6:-1])
    return day, pot, plant

def compute_percentage_changes(df):
    records = []
    for name, row in df.iterrows():
        day, pot, plant = extract_day_pot_plant(name)
        records.append({
            'image_id': name,
            'day': day,
            'pot': pot,
            'plant': plant,
            'freshness_index': row.get('freshness_index'),
            'mean_B': row.get('mean_B')
        })
    full_df = pd.DataFrame(records)
    pivot = full_df.pivot_table(index=['pot', 'plant'], columns='day', values=['freshness_index', 'mean_B'])
    if not {('freshness_index', 0), ('freshness_index', 5), ('mean_B', 0), ('mean_B', 5)} <= set(pivot.columns):
        print('Day 0 or Day 5 data missing for some pots/plants.')
        return pd.DataFrame(columns=['pct_change_freshness', 'pct_change_mean_B'])
    freshness_day0 = pivot['freshness_index'][0]
    freshness_day5 = pivot['freshness_index'][5]
    meanB_day0 = pivot['mean_B'][0]
    meanB_day5 = pivot['mean_B'][5]
    pct_freshness = ((freshness_day5 - freshness_day0) / freshness_day0.replace(0, pd.NA)) * 100
    pct_meanB = ((meanB_day5 - meanB_day0) / meanB_day0.replace(0, pd.NA)) * 100
    changes_df = pd.DataFrame({
        'pct_change_freshness': pct_freshness,
        'pct_change_mean_B': pct_meanB
    }).dropna()
    return changes_df

changes_df = compute_percentage_changes(metrics_df)
changes_df


Unnamed: 0_level_0,Unnamed: 1_level_0,pct_change_freshness,pct_change_mean_B
pot,plant,Unnamed: 2_level_1,Unnamed: 3_level_1
4,2,-5.193282,18.861437
4,3,-1.311313,-15.887588
4,4,1.392056,-8.118072
4,5,18.667606,-44.453091
4,6,6.232846,-4.398243
4,7,3.277052,-5.001292
4,8,-12.817482,-2.425629
5,1,2.804519,-31.268289
5,2,0.1497,-10.982776
5,3,-7.122774,6.351204


In [None]:
treatment_map = {
    (4, 1): 0, (4, 2): 0, (4, 3): 1, (4, 4): 0,
    (4, 5): 1, (4, 6): 1, (4, 7): 1, (4, 8): 0,
    (5, 1): 1, (5, 2): 1, (5, 3): 0, (5, 4): 1,
    (5, 5): 0, (5, 6): 1, (5, 7): 0, (5, 8): 0,
    (6, 1): 1, (6, 2): 0, (6, 3): 0, (6, 4): 1,
    (6, 5): 1, (6, 6): 1, (6, 7): 0, (6, 8): 0,
}

if not changes_df.empty:
    changes_with_treatment = changes_df.copy()
    treatments = []
    for pot, plant in changes_with_treatment.index:
        assignment = treatment_map.get((pot, plant))
        treatments.append('water' if assignment == 1 else 'control')
    changes_with_treatment['treatment'] = treatments

    summary_stats = changes_with_treatment.groupby('treatment')[['pct_change_freshness', 'pct_change_mean_B']].agg(['mean', 'std', 'count'])
    display(summary_stats)

    if set(changes_with_treatment['treatment']) >= {'water', 'control'}:
        mean_water = changes_with_treatment.loc[changes_with_treatment['treatment'] == 'water', ['pct_change_freshness', 'pct_change_mean_B']].mean()
        mean_control = changes_with_treatment.loc[changes_with_treatment['treatment'] == 'control', ['pct_change_freshness', 'pct_change_mean_B']].mean()
        diff = mean_water - mean_control
        print('Mean difference (water - control):')
        print(diff)
    else:
        print('Not enough treatment groups to compute differences.')
else:
    print('No percentage change data available.')

Unnamed: 0_level_0,pct_change_freshness,pct_change_freshness,pct_change_freshness,pct_change_mean_B,pct_change_mean_B,pct_change_mean_B
Unnamed: 0_level_1,mean,std,count,mean,std,count
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
control,-10.22832,7.378332,8,25.612674,39.980646,8
water,-0.513185,8.543149,12,28.966094,62.21228,12


Mean difference (water - control):
pct_change_freshness    9.715135
pct_change_mean_B       3.353419
dtype: float64


In [None]:
if not changes_df.empty:
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    axes[0].hist(changes_df['pct_change_freshness'], bins=20, alpha=0.8)
    axes[0].set_title('Freshness % Change (Day 0 -> Day 5)')
    axes[0].set_xlabel('% change')
    axes[0].set_ylabel('Count')

    axes[1].hist(changes_df['pct_change_mean_B'], bins=20, alpha=0.8, color='orange')
    axes[1].set_title('Mean B % Change (Day 0 -> Day 5)')
    axes[1].set_xlabel('% change')
    axes[1].set_ylabel('Count')

    plt.tight_layout()
    plt.show()
else:
    print('Insufficient data for percentage change computation.')


In [14]:
# Preview subset (set use_subset to limit figures)
#preview_refined_color_masks(IMAGE_PATHS, down_long=1000, k_clusters=3)
