In [9]:
import pyproj
pyproj.datadir.set_data_dir("/home/yifan/anaconda3/envs/myenv/share/proj")
print(pyproj.datadir.get_data_dir())

/home/yifan/anaconda3/envs/myenv/share/proj


In [10]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import box
from sklearn.metrics import precision_score, recall_score, f1_score, jaccard_score
import rasterio
from rasterio import features
from rasterio.features import geometry_mask
from rasterio.transform import from_origin

In [11]:
# 栅格化函数
def rasterize_polygon(gdf, transform, width, height):
    shapes = [(geom, 1) for geom in gdf.geometry]
    raster = features.rasterize(shapes, out_shape=(height, width), transform=transform, fill=0, dtype='int16')
    return raster

In [12]:
# 计算两年 forest_mask 之间的差异并返回差异栅格
def calculate_forest_loss(forest_mask_2020_path, forest_mask_2021_path):
    # 读取2020和2021年的forest_mask文件
    forest_2020 = gpd.read_file(forest_mask_2020_path)
    forest_2021 = gpd.read_file(forest_mask_2021_path)
    
    # 定义栅格化的分辨率
    pixel_size = 0.0001  # 根据你的需求调整分辨率

    # 获取两者的联合边界（bounding box）
    bounds_2020 = forest_2020.total_bounds
    bounds_2021 = forest_2021.total_bounds
    
    # 获取两个年份 forest_mask 的联合 bounding box
    minx = min(bounds_2020[0], bounds_2021[0])
    miny = min(bounds_2020[1], bounds_2021[1])
    maxx = max(bounds_2020[2], bounds_2021[2])
    maxy = max(bounds_2020[3], bounds_2021[3])
    
    # 创建输出的栅格形状
    width = int((maxx - minx) / pixel_size)
    height = int((maxy - miny) / pixel_size)
    
    # 定义栅格的元数据
    transform = rasterio.transform.from_bounds(minx, miny, maxx, maxy, width, height)
    
    # 将多边形栅格化
    raster_2020 = rasterize_polygon(forest_2020, transform, width, height)
    raster_2021 = rasterize_polygon(forest_2021, transform, width, height)
    
    # 创建差异栅格
    # 值为1表示森林损失，值为-1表示森林恢复，值为0表示没有变化
    difference_raster = np.zeros_like(raster_2020, dtype='int16')
    difference_raster[np.logical_and(raster_2020 == 1, raster_2021 == 0)] = 1  # 森林损失
    difference_raster[np.logical_and(raster_2020 == 0, raster_2021 == 1)] = -1  # 森林恢复
    
    # 计算森林减少的总像素数
    forest_loss_count = np.sum(difference_raster == 1)
    
    # 计算森林恢复的总像素数
    forest_recovery_count = np.sum(difference_raster == -1)
    
    # 输出结果
    print(f"Forest loss area (in pixels): {forest_loss_count}")
    print(f"Forest recovery area (in pixels): {forest_recovery_count}")
    
    # 返回差异栅格及元数据
    return difference_raster, transform, width, height

In [13]:
def manual_forest_loss(transform, width, height, merged_shp_path):
    # 读取合并后的标注shp文件
    merged_shp = gpd.read_file(merged_shp_path)
    merged_raster = rasterize_polygon(merged_shp, transform, width, height)

    # 将 merged_raster 二值化（假设标注文件中的所有值为1，其他区域为0）
    merged_raster = (merged_raster > 0).astype('int16')
    
    return merged_raster

In [14]:
# 计算差异栅格与手动标注的对比指标
def calculate_metrics(difference_raster, merged_raster):
    # 展平栅格以进行像素级别的对比
    difference_raster_flat = difference_raster.flatten()
    merged_raster_flat = merged_raster.flatten()

    # 只考虑difference_raster和merged_raster中非0的部分，避免计算过程中包含无效区域
    valid_mask = (difference_raster_flat != 0) | (merged_raster_flat != 0)

    # 提取有效像素进行对比
    y_true = merged_raster_flat[valid_mask]  # 手动标注值
    y_pred = (difference_raster_flat[valid_mask] == 1).astype('int16')  # 预测的森林损失区域

    # 检查y_true和y_pred的分布，避免全为一类的情况
    if y_true.sum() == 0 or y_pred.sum() == 0:
        print("警告：y_true或y_pred中没有正样本或负样本，可能导致召回率或精确率异常。")
        return None, None, None, None

    # 计算评估指标
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    iou = jaccard_score(y_true, y_pred, zero_division=0)

    # 输出结果
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-score: {f1}")
    print(f"IoU: {iou}")

    return precision, recall, f1, iou

In [15]:
def calculate_metrics_between_rasters(pred_raster, true_raster):
    """
    计算两个栅格（预测与真实标注）之间的 TP, FP, FN，并返回 Precision, Recall, F1-score 和 IoU。
    
    参数:
    - pred_raster: numpy 数组，预测栅格，值为 1 表示正类，0 表示负类。
    - true_raster: numpy 数组，真实标注栅格，值为 1 表示正类，0 表示负类。
    
    返回:
    - TP: 真正例的数量
    - FP: 假正例的数量
    - FN: 假负例的数量
    - Precision: 精确率
    - Recall: 召回率
    - F1-score: F1 分数
    - IoU: 交并比 (Intersection over Union)
    """
    
    # 确保两个栅格的形状相同
    assert pred_raster.shape == true_raster.shape, "两个栅格的尺寸必须相同"
    
    # 计算 TP, FP, FN
    TP = np.sum((pred_raster == 1) & (true_raster == 1))  # 预测为1，且真实为1
    FP = np.sum((pred_raster == 1) & (true_raster == 0))  # 预测为1，但真实为0
    FN = np.sum((pred_raster == 0) & (true_raster == 1))  # 预测为0，但真实为1
    
    #print(TP)
    #print(FP)
    #print(FN)

    # 计算 Precision, Recall, F1-score, IoU
    if TP + FP > 0:
        precision = TP / (TP + FP)
    else:
        precision = 0.0

    if TP + FN > 0:
        recall = TP / (TP + FN)
    else:
        recall = 0.0

    if precision + recall > 0:
        f1_score = 2 * (precision * recall) / (precision + recall)
    else:
        f1_score = 0.0

    if TP + FP + FN > 0:
        iou = TP / (TP + FP + FN)
    else:
        iou = 0.0

    return TP, FP, FN, precision, recall, f1_score, iou

In [16]:
# Step 1: 计算差异栅格
difference_raster, transform, width, height = calculate_forest_loss('Zone_Dataset_Mask_2020.shp', 'Zone_Dataset_Mask_2021.shp')

merged_raster =  manual_forest_loss(transform, width, height, '622_975_2021.shp')

precision, recall, f1, iou = calculate_metrics(difference_raster, merged_raster)

#calculate_metrics_between_rasters(difference_raster, merged_raster)


Forest loss area (in pixels): 63247
Forest recovery area (in pixels): 0
Precision: 0.7570477651113887
Recall: 0.6294334165899829
F1-score: 0.6873676579311929
IoU: 0.5236558904589003
