In [1]:
def median_region_hits(boxes, img_width, region_width=50, n_trials=10):
    """
    随机在水平范围 [0, img_width-region_width] 采样 n_trials 个起点，
    构造宽度为 region_width 的竖条区域，统计该区域与多少个框水平重叠，
    返回这 n_trials 个命中数的中位数。

    boxes: DataFrame，包含 xmin, xmax（像素）两列
    img_width: 图像宽度（像素）
    region_width: 区域宽度（像素），默认 50
    n_trials: 试验次数，默认 10
    """
    counts = []
    max_start = max(0, img_width - region_width)
    for _ in range(n_trials):
        x0 = np.random.uniform(0, max_start)
        x1 = x0 + region_width
        # 统计水平重叠：boxes.xmin <= x1 且 boxes.xmax >= x0
        hits = ((boxes["xmin"] <= x1) & (boxes["xmax"] >= x0)).sum()
        counts.append(int(hits))
    return float(np.mean(counts))



In [None]:
def load_yolo_boxes(txt_path, img_w, img_h):
    """
    读取 YOLO 格式的 txt，转换为像素坐标的 box 和中心点
    返回 DataFrame 包含：xmin, ymin, xmax, ymax, x_center, y_center, width, height
    """
    cols = ["cls", "x_c_norm", "y_c_norm", "w_norm", "h_norm"]
    df = pd.read_csv(txt_path, sep=" ", names=cols)
    # 转为像素
    df["x_center"] = df["x_c_norm"] * img_w
    df["y_center"] = df["y_c_norm"] * img_h
    df["width_px"] = df["w_norm"] * img_w
    df["height_px"] = df["h_norm"] * img_h
    df["xmin"] = df["x_center"] - df["width_px"] / 2
    df["ymin"] = df["y_center"] - df["height_px"] / 2
    df["xmax"] = df["x_center"] + df["width_px"] / 2
    df["ymax"] = df["y_center"] + df["height_px"] / 2
    return df[
        [
            "xmin",
            "ymin",
            "xmax",
            "ymax",
            "x_center",
            "y_center",
            "width_px",
            "height_px",
        ]
    ]


def compute_density(n_stomata, img_w, img_h):
    """气孔密度 = 气孔数 / 图像像素面积"""
    area = img_w * img_h
    return n_stomata / area


def count_rows(df, y_tol=None):
    """
    基于 PCA 把气孔中心点旋转到行方向坐标系，再在垂直于行（PC2）方向上聚类统计行数。

    df: 包含 x_center, y_center, height_px 的 DataFrame
    y_tol: 同一行在 PC2 方向上的最大距离阈值（默认用平均高度投影计算）
    """
    # 1. 提取中心坐标
    coords = df[["x_center", "y_center"]].values

    # 2. PCA：第一主成分 PC1 对应行的延伸方向，第二主成分 PC2 正交于行
    pca = PCA(n_components=2).fit(coords)
    # 在 PC2 方向上的投影
    pc2 = pca.transform(coords)[:, 1]

    # 3. 如果用户没给阈值，用平均高度在 PC2 轴上的投影长度做阈值
    if y_tol is None:
        # PC2 方向在原 y 轴分量 = |pca.components_[1,1]|
        proj = abs(pca.components_[1, 1])
        y_tol = df["height_px"].mean() * proj

    # 4. 对投影值排序，跳跃大于阈值时新起一行
    sorted_vals = np.sort(pc2)
    row_count = 1
    prev = sorted_vals[0]
    for v in sorted_vals[1:]:
        if v - prev > y_tol:
            row_count += 1
        prev = v

    return row_count


def count_rows_cluster_dbscan(df, eps=None):
    mask = (df["x_center"] >= 50) & (df["x_center"] <= 300)
    filtered_df = df.loc[mask]
    # 1. 做 PCA 找到正交于“行”的 PC2 轴
    coords = filtered_df[["x_center", "y_center"]].values

    pca = PCA(n_components=2).fit(coords)
    pc2 = pca.components_[1]  # PC2 单位向量，长度为 2

    # 2. 每个中心点在 PC2 轴上的投影距离
    proj_distances = coords.dot(pc2)
    filtered_df["proj_dist"] = proj_distances

    proj_coeff = abs(pc2[1])
    height_proj = filtered_df["height_px"] * proj_coeff
    filtered_df["height_proj"] = height_proj
    eps = height_proj.mean() * 3
    # 3. 在一维 PC2 上跑 DBSCAN
    clustering = DBSCAN(eps=eps, min_samples=2).fit(filtered_df[["y_center"]])
    labels = clustering.labels_
    # 不同 label 数即为行数
    return len(set(labels))


def compute_disorder(df):
    """
    以最近邻距的变异系数（CV = std / mean）衡量气孔排列“混乱程度”
    """
    pts = df[["x_center", "y_center"]].values
    D = distance_matrix(pts, pts)
    # 把自己到自己的距离设为大数以排除
    np.fill_diagonal(D, np.inf)
    nn_dist = D.min(axis=1)
    return nn_dist.std() / nn_dist.mean()


def compute_areas(df):
    """返回每个框的像素面积，以及总面积和平均面积"""
    df["area_px2"] = df["width_px"] * df["height_px"]
    return df["area_px2"].sum(), df["area_px2"].mean(), df["area_px2"]


In [204]:
image_path = "海南预测/81 (3)_det.jpg"  # 替换为你的图片路径
yolo_txt = "海南预测/81 (3).txt"  # 替换为你的检测结果 txt


# 1. 读取图像以获取尺寸
img = cv2.imread(image_path)
if img is None:
    raise FileNotFoundError(f"Cannot open image: {image_path}")
H, W = img.shape[:2]

# 2. 载入 YOLO 检测框
boxes = load_yolo_boxes(yolo_txt, img_w=W, img_h=H)
median_region_hits(boxes, W, region_width=20, n_trials=10)

11.4

In [205]:
if __name__ == "__main__":
    # --- 用户设置部分 ---
    image_path = "海南预测/81 (3)_det.jpg"  # 替换为你的图片路径
    yolo_txt = "海南预测/81 (3).txt"  # 替换为你的检测结果 txt
    # ------------------------

    # 1. 读取图像以获取尺寸
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Cannot open image: {image_path}")
    H, W = img.shape[:2]

    # 2. 载入 YOLO 检测框
    boxes = load_yolo_boxes(yolo_txt, img_w=W, img_h=H)

    # 3. 统计气孔数 & 密度
    n = len(boxes)
    density = compute_density(n, W, H)

    # 4. 行数
    n_rows = count_rows_cluster_dbscan(boxes)

    # 5. 混乱程度
    disorder_cv = compute_disorder(boxes)

    # 6. 面积统计
    total_area, mean_area, area_series = compute_areas(boxes)

    # 7. 输出结果
    print(f"气孔总数: {n}")
    print(f"气孔密度 (每像素): {density:.3e}")
    print(f"气孔行数: {n_rows}")
    print(f"气孔排列混乱程度 (CV of NN distances): {disorder_cv:.3f}")
    print(f"气孔总面积 (px²): {total_area:.1f}")
    print(f"气孔平均面积 (px²): {mean_area:.1f}")

    # 如需将每个气孔面积保存为 CSV，可取消下面注释：
    # area_series.to_csv('stomata_areas.csv', index=False)


气孔总数: 320
气孔密度 (每像素): 1.665e-04
气孔行数: 10
气孔排列混乱程度 (CV of NN distances): 0.198
气孔总面积 (px²): 513933.0
气孔平均面积 (px²): 1606.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [103]:
import os
import cv2
import numpy as np
import pandas as pd
from scipy.spatial import distance_matrix

In [218]:
# —— 前面那些函数保持不变 —— #
# load_yolo_boxes, compute_density, count_rows, compute_disorder, compute_areas
# 请确保这几个函数已经和之前一样定义在脚本中


def process_detect_folder(image_dir, output_csv="stomata_summary.csv"):
    records = []

    for fname in os.listdir(image_dir):
        if not fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif", ".tiff")):
            continue

        image_path = os.path.join(image_dir, fname)
        # 构造同名 txt 路径：去掉扩展名，换成 .txt
        base, _ = os.path.splitext(fname)
        base = base.replace("_det", "")
        yolo_txt = os.path.join(image_dir, base + ".txt")

        if not os.path.exists(yolo_txt):
            print(f"警告：未找到检测结果 {yolo_txt}，跳过 {fname}")
            continue

        # 1. 读取图像
        img = cv2.imread(image_path)
        if img is None:
            print(f"警告：无法读取 {fname}")
            continue
        H, W = img.shape[:2]

        # 2. 载入检测框
        boxes = load_yolo_boxes(yolo_txt, img_w=W, img_h=H)

        # 3–6 计算指标
        n = len(boxes)
        density = compute_density(n, W, H)
        n_rows = median_region_hits(boxes, W, region_width=30, n_trials=200)
        disorder = compute_disorder(boxes)
        total_a, mean_a, _ = compute_areas(boxes)

        # 记录
        records.append(
            {
                "image": fname,
                "n_stomata": n,
                "density_per_px": density,
                "n_rows": n_rows,
                "disorder_cv": disorder,
                "total_area_px2": total_a,
                "mean_area_px2": mean_a,
            }
        )

        print(
            f"[{fname}] 气孔: {n}, 密度: {density:.3e}, 行数: {n_rows}, 混乱度(CV): {disorder:.3f}"
        )

    # 汇总保存
    df = pd.DataFrame(records)
    df.to_csv(output_csv, index=False)
    print(f"\n已处理 {len(df)} 张图片，结果保存到 {output_csv}")


In [219]:
if __name__ == "__main__":
    IMAGE_FOLDER = "海南预测"  # 替换为你的图片+txt文件夹
    process_detect_folder(IMAGE_FOLDER, output_csv="海南叶片气孔统计.csv")
    IMAGE_FOLDER = "河南预测"  # 替换为你的图片+txt文件夹
    process_detect_folder(IMAGE_FOLDER, output_csv="河南叶片气孔统计.csv")

[121 (2)_det.jpg] 气孔: 278, 密度: 1.446e-04, 行数: 10.97, 混乱度(CV): 0.310
[121 (3)_det.jpg] 气孔: 208, 密度: 1.082e-04, 行数: 8.7, 混乱度(CV): 0.226
[121_det.jpg] 气孔: 219, 密度: 1.139e-04, 行数: 8.985, 混乱度(CV): 0.281
[122 (2)_det.jpg] 气孔: 275, 密度: 1.431e-04, 行数: 11.68, 混乱度(CV): 0.278
[122 (3)_det.jpg] 气孔: 295, 密度: 1.535e-04, 行数: 12.745, 混乱度(CV): 0.249
[122 (4)_det.jpg] 气孔: 303, 密度: 1.577e-04, 行数: 13.005, 混乱度(CV): 0.252
[122_det.jpg] 气孔: 295, 密度: 1.535e-04, 行数: 12.775, 混乱度(CV): 0.260
[123 (2)_det.jpg] 气孔: 290, 密度: 1.507e-04, 行数: 11.44, 混乱度(CV): 0.210
[123 (3)_det.jpg] 气孔: 232, 密度: 1.207e-04, 行数: 10.255, 混乱度(CV): 0.261
[123_det.jpg] 气孔: 214, 密度: 1.112e-04, 行数: 9.235, 混乱度(CV): 0.283
[124 (2)_det.jpg] 气孔: 161, 密度: 8.369e-05, 行数: 7.215, 混乱度(CV): 0.250
[124 (3)_det.jpg] 气孔: 189, 密度: 9.839e-05, 行数: 8.26, 混乱度(CV): 0.239
[124_det.jpg] 气孔: 177, 密度: 9.210e-05, 行数: 7.455, 混乱度(CV): 0.211
[126 (2)_det.jpg] 气孔: 189, 密度: 9.824e-05, 行数: 8.305, 混乱度(CV): 0.291
[126 (3)_det.jpg] 气孔: 202, 密度: 1.051e-04, 行数: 8.335, 混乱度(CV): 0

In [106]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops


In [None]:
def compute_glcm_features(
    img_gray, distances=[1], angles=[0, np.pi / 4, np.pi / 2, 3 * np.pi / 4], levels=256
):
    """
    计算灰度共生矩阵（GLCM）纹理特征，并对角度取平均
    返回 dict：contrast, dissimilarity, homogeneity, ASM, energy, correlation
    """
    # 生成 GLCM，normalize=True 归一化
    glcm = graycomatrix(
        img_gray,
        distances=distances,
        angles=angles,
        levels=levels,
        symmetric=True,
        normed=True,
    )
    props = ["contrast", "dissimilarity", "homogeneity", "ASM", "energy", "correlation"]
    feats = {}
    for p in props:
        vals = graycoprops(glcm, p)
        # vals.shape = (len(distances), len(angles))
        feats[p] = vals.mean()
    return feats


def process_folder(image_dir, output_csv="texture_features.csv"):
    records = []
    for fname in os.listdir(image_dir):
        if not fname.lower().endswith((".jpg", ".jpeg", ".png", ".tif", ".tiff")):
            continue
        path = os.path.join(image_dir, fname)
        # 1. 读取并灰度化
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"警告：无法读取 {fname}")
            continue
        # 2. 可选：尺寸标准化 / ROI 截取，根据需要
        # img = cv2.resize(img, (512,512))
        # 3. 量化到 8-bit（如果不是）
        if img.dtype != np.uint8:
            img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
        # 4. 计算纹理特征
        feats = compute_glcm_features(img)
        feats["image"] = fname
        records.append(feats)

    # 5. 保存为 CSV
    df = pd.DataFrame(records)
    df = df[
        [
            "image",
            "contrast",
            "dissimilarity",
            "homogeneity",
            "ASM",
            "energy",
            "correlation",
        ]
    ]
    df.to_csv(output_csv, index=False)
    print(f"已保存 {len(df)} 张图像的纹理特征到 {output_csv}")


In [None]:
if __name__ == "__main__":
    # 用户只需修改下面这一行
    IMAGE_FOLDER = "河南自交系穗位叶"
    process_folder(IMAGE_FOLDER, output_csv="河南叶片纹理特征.csv")
    IMAGE_FOLDER = "海南裁剪"
    process_folder(IMAGE_FOLDER, output_csv="海南叶片纹理特征.csv")


已保存 839 张图像的纹理特征到 河南叶片纹理特征.csv
已保存 631 张图像的纹理特征到 海南叶片纹理特征.csv
