### 该文档旨在检查手工爬取数据的准确性

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
from shapely.ops import nearest_points
import geopandas as gpd
from tqdm import tqdm

In [None]:
gdf_processing = gpd.read_file(r"origin_data\04_transformation_industry").to_crs(epsg=32632)
gdf_buildings = gpd.read_file(r"origin_data\10_built_environment").to_crs(epsg=32632)

# 创建结果列
nearest_geoms = []

# tqdm 显示进度
for pt in tqdm(gdf_processing.geometry, desc="查找最近建筑"):
    # 找出距离最小的建筑几何
    nearest_geom = gdf_buildings.geometry.distance(pt).idxmin()
    nearest_geoms.append(gdf_buildings.loc[nearest_geom].geometry)

# 写入结果
gdf_processing["nearest_building_geom"] = nearest_geoms
gdf_processing_with_poly = gdf_processing.set_geometry("nearest_building_geom")

### 输出点和面的位置以方便查看

In [None]:
import matplotlib.pyplot as plt
import geopandas as gpd
import os

# 创建保存目录
output_dir =  "playground/Pro_Build_Debug"
os.makedirs(output_dir, exist_ok=True)

# 确保 geometry 是点
gdf_processing_with_poly = gdf_processing_with_poly.set_geometry("geometry")

# 遍历每个 index
for idx, row in gdf_processing.iterrows():
    fig, ax = plt.subplots(figsize=(6, 6))

    # 面：最近建筑几何体
    gpd.GeoSeries([row["nearest_building_geom"]]).plot(ax=ax, facecolor='lightblue', edgecolor='black', linewidth=1, alpha=0.6, label='建筑面')

    # 点：加工厂中心
    gpd.GeoSeries([row["geometry"]]).plot(ax=ax, color='red', markersize=30, label='加工厂点')

    ax.set_title(f"Index {idx}")
    ax.axis('equal')
    ax.axis('off')
    plt.legend()

    # 保存图像
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"{idx}.png"), dpi=150)
    plt.close()

print(f"✅ 已保存 {len(gdf_processing)} 张图像到 {output_dir}/")

In [None]:
gdf_to_save = gdf_processing_with_poly[["DENOMINAZI","Type","nearest_building_geom"]]
gdf_to_save = gdf_to_save.rename(columns={
    "nearest_building_geom": "geometry"
})
gdf_to_save = gdf_to_save.set_crs("EPSG:32632")
gdf_to_save.to_file("processing_data/img/processing_red/processing_building.shp")