In [1]:
# ==== Kurulum & Yol Tanımları ====
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# ---- Dosya/dizin yolları----
BASE_NEON = "/content/drive/MyDrive/NEON_struct-plant"
BASE_NTE  = "/content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc"

ANN_DIR   = f"{BASE_NTE}/annotations"
ANN_WS    = f"{BASE_NTE}/annotations_with_species"
POLY_GPKG = f"{ANN_WS}/voc_polygons_with_species_FIXED.gpkg"

REPORT_DIR = f"{ANN_WS}/reports"
import os
os.makedirs(REPORT_DIR, exist_ok=True)

print("NEON:", BASE_NEON)
print("NTE :", BASE_NTE)
print("Reports ->", REPORT_DIR)


Mounted at /content/drive
NEON: /content/drive/MyDrive/NEON_struct-plant
NTE : /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc
Reports -> /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports


In [2]:
# ==== Kütüphaneler ====
!pip -q install geopandas fiona shapely pyproj rtree

import os, re, glob, xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import geopandas as gpd
import fiona


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.6/56.6 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.2/17.2 MB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.6/507.6 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
# ==== NEON CSV kontrolü(örnekleme) ====
for root, dirs, files in os.walk(BASE_NEON):
    level = root.replace(BASE_NEON, "").count(os.sep)
    indent = " " * (2 * level)
    print(f"{indent}{os.path.basename(root) or '/'}")
    for f in files[:10]:
        print(f"{indent}  - {f}")

csvs = glob.glob(os.path.join(BASE_NEON, "**/*.csv"), recursive=True)
print("\nToplam CSV:", len(csvs))
print("Örnek 5:", [os.path.basename(c) for c in csvs[:5]])

# İlk 5 dosyadan kolon başlıklarını denetle
for c in csvs[:5]:
    try:
        df = pd.read_csv(c, nrows=1)
        print(f"\n--- {os.path.basename(c)} ---")
        print("Kolonlar:", list(df.columns))
    except Exception as e:
        print(f"\n--- {os.path.basename(c)} --- Hata:", e)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    - NEON.D02.BLAN.DP1.10098.001.vst_perplotperyear.2021-12.basic.20241118T073304Z.csv
    - NEON.D02.BLAN.DP0.10098.001.categoricalCodes.20241118T073304Z.csv
    - NEON.D02.BLAN.DP1.10098.001.EML.20211223-20211223.20250129T000730Z.xml
    - NEON.D02.BLAN.DP1.10098.001.variables.20241118T073304Z.csv
    - NEON.D02.BLAN.DP0.10098.001.validation.20241118T073304Z.csv
  NEON.D07.GRSM.DP1.10098.001.2020-11.basic.20250129T000730Z.RELEASE-2025
    - NEON.D07.GRSM.DP1.10098.001.EML.20201112-20201112.20250129T000730Z.xml
    - NEON.D07.GRSM.DP1.10098.001.readme.20250129T000730Z.txt
    - NEON.D07.GRSM.DP1.10098.001.vst_apparentindividual.2020-11.basic.20241118T160319Z.csv
    - NEON.D07.GRSM.DP1.10098.001.vst_mappingandtagging.basic.20241118T160319Z.csv
    - NEON.D07.GRSM.DP1.10098.001.vst_perplotperyear.2020-11.basic.20241118T160319Z.csv
    - NEON.D07.GRSM.DP0.10098.001.validation.20241118T160319Z.csv
    - NEON.D07.GRSM.DP0.1

In [4]:
# ==== Field noktaları (2017–2020) ====
itc_csvs = glob.glob(os.path.join(BASE_NEON, "*_individuals_with_coords.csv"))

gdfs = []
for p in itc_csvs:
    # yıl & site ismini dosya adından çıkar
    m_year = re.search(r"_(\d{4})-", os.path.basename(p))
    year = int(m_year.group(1)) if m_year else None
    if (year is None) or not (2017 <= year <= 2020):
        continue

    df = pd.read_csv(p, low_memory=False)
    # lon/lat için esnek seçim
    cl = {c.lower(): c for c in df.columns}
    lon = cl.get("lon") or cl.get("longitude")
    lat = cl.get("lat") or cl.get("latitude")
    if not lon or not lat:
        continue

    g = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df[lon].astype(float), df[lat].astype(float)),
        crs="EPSG:4326"
    ).to_crs(5070)

    m_site = re.search(r"^([A-Z]{3,4})_", os.path.basename(p))
    g["site_field"] = m_site.group(1) if m_site else None
    g["year_field"] = year

    keep = [c for c in ["individualID","scientificName","year_field","site_field","geometry"] if c in g.columns or c=="geometry"]
    gdfs.append(g[keep])

field_pts = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=5070) if gdfs else gpd.GeoDataFrame(crs=5070)
print("Field noktaları:", len(field_pts))
field_pts.head(3)


Field noktaları: 582


Unnamed: 0,individualID,scientificName,year_field,site_field,geometry
0,NEON.PLA.D17.SJER.00037,Quercus wislizeni A. DC.,2019,SJER,POINT (-2067133.252 1822983.277)
1,NEON.PLA.D17.SJER.00148,Quercus wislizeni A. DC.,2019,SJER,POINT (-2067147.879 1822638.099)
2,NEON.PLA.D17.SJER.00149,Quercus wislizeni A. DC.,2019,SJER,POINT (-2067147.693 1822638.997)


In [5]:
# ==== Poligon okuma & yıl/site çıkarma ====
layers = fiona.listlayers(POLY_GPKG)
pref = ["polygons_with_species", "Tree_to_Species", "all_matches"]
layer = next((L for L in pref if L in layers), layers[0])
print("Seçilen layer:", layer)

polys = gpd.read_file(POLY_GPKG, layer=layer).to_crs(5070)
print("Poligon sayısı (ham):", len(polys))

# Kaynak kolon adayları (xml/rgb/file/name)
cand_keys = [c for c in polys.columns if any(k in c.lower() for k in ["source","xml","rgb","file","name","filename"])]

def extract_year_any(row):
    for c in cand_keys:
        m = re.search(r"(20\d{2})", str(row.get(c)))
        if m: return int(m.group(1))
    return None

def extract_site_any(row):
    for c in cand_keys:
        m = re.search(r"\b([A-Z]{3,4})\b", str(row.get(c)))
        if m: return m.group(1)
    return None

polys["year_ann"] = polys.apply(extract_year_any, axis=1)
polys["site_ann"] = polys.apply(extract_site_any, axis=1)
polys = polys[(polys["year_ann"]>=2017) & (polys["year_ann"]<=2020)]
print("Poligon (2017–2020):", len(polys))
polys.head(3)


Seçilen layer: polygons_with_species
Poligon sayısı (ham): 221958
Poligon (2017–2020): 221512


Unnamed: 0,source_xml_x,source_rgb_x,label_x,ann_index,source_xml_y,source_rgb_y,label_y,index_right,individualID,scientificName,lat,lon,plotID,_coord_src,dist_m,match_type,geometry,year_ann,site_ann
0,10.xml,2018_SJER_3_255000_4107000_image_298.tif,Tree,0,10.xml,2018_SJER_3_255000_4107000_image_298.tif,Tree,1569,NEON.PLA.D17.SJER.04149,Quercus douglasii Hook. & Arn.,37.082927,-119.743247,,LL,810.322945,,"POLYGON ((-2069836.038 1820410.01, -2069833.61...",2018.0,
1,10.xml,2018_SJER_3_255000_4107000_image_298.tif,Tree,0,10.xml,2018_SJER_3_255000_4107000_image_298.tif,Tree,1181,NEON.PLA.D17.SJER.04149,Quercus douglasii Hook. & Arn.,37.082927,-119.743247,,LL,810.322945,,"POLYGON ((-2069836.038 1820410.01, -2069833.61...",2018.0,
2,10.xml,2018_SJER_3_255000_4107000_image_298.tif,Tree,0,10.xml,2018_SJER_3_255000_4107000_image_298.tif,Tree,1957,NEON.PLA.D17.SJER.04149,Quercus douglasii Hook. & Arn.,37.082927,-119.743247,,LL,810.322945,,"POLYGON ((-2069836.038 1820410.01, -2069833.61...",2018.0,


In [6]:
# ==== Join edilmiş tabloyu yükle & species coalesce ====
JOIN_CSV = f"{ANN_WS}/Tree_to_FieldSpecies_joined_FIXED.csv"

joined = None
if os.path.exists(JOIN_CSV):
    joined = pd.read_csv(JOIN_CSV, low_memory=False)
    print("Yüklendi:", os.path.basename(JOIN_CSV), "| Satır:", len(joined))
    # species benzeri kolonları topla ve ilk dolu olanı 'species' olarak birleştir
    bases = ["scientificName", "taxonID", "species", "species_pred", "sci_name", "speciesName", "taxon"]
    cand_cols = []
    for c in joined.columns:
        lc = c.lower()
        for b in bases:
            if lc == b.lower() or lc.startswith(b.lower()+"_"):
                cand_cols.append(c)
    cand_cols = list(dict.fromkeys(cand_cols))

    joined["species"] = None
    for c in cand_cols:
        joined["species"] = joined["species"].fillna(joined[c])

    joined["species"] = joined["species"].replace(["", "None", "nan"], pd.NA)
    print("Etiketli (species not null):", int(joined["species"].notna().sum()))
else:
    print("Join CSV bulunamadı, bu hücreyi atlıyorum:", JOIN_CSV)


Yüklendi: Tree_to_FieldSpecies_joined_FIXED.csv | Satır: 221958
Etiketli (species not null): 221958


In [7]:
# ==== En iyi eşleşme seçimi & SAFE kaydet ====
if joined is not None:
    # match_type & mesafe kolonlarını tekilleştir
    match_candidates = [c for c in joined.columns if c.lower().startswith("match_type")]
    dist_candidates  = [c for c in joined.columns if c.lower().startswith("dist_m")]

    def coalesce_row(row, cols):
        for c in cols:
            v = row.get(c)
            if pd.notna(v):
                return v
        return np.nan

    joined["match_type_c"] = joined.apply(lambda r: coalesce_row(r, match_candidates), axis=1)
    joined["dist_m_c"]     = joined.apply(lambda r: coalesce_row(r, dist_candidates), axis=1)
    joined["match_type_c"] = joined["match_type_c"].fillna("no_hit")

    # yoksa ann_index = index
    if "ann_index" not in joined.columns:
        joined["ann_index"] = joined.index

    rank_map = {"within":0, "nearest_centroid":1, "stem_buffer_intersects":2, "nearest":3, "no_hit":99}
    joined["_rank"] = joined["match_type_c"].map(rank_map).fillna(99).astype(int)
    joined["_dist"] = pd.to_numeric(joined["dist_m_c"], errors="coerce").fillna(1e12)

    best = (
        joined.sort_values(["ann_index","_rank","_dist"])
              .drop_duplicates(subset=["ann_index"], keep="first")
              .copy()
    )

    # güven etiketi
    NEAREST_THRESH_M = 3.0
    def conf(mt, d):
        if mt == "within": return "high"
        if mt in ("nearest_centroid","stem_buffer_intersects","nearest"):
            if pd.isna(d): return "medium"
            return "medium" if float(d) <= NEAREST_THRESH_M else "none"
        return "none"

    best["confidence"] = [conf(mt, d) for mt, d in zip(best["match_type_c"], best["dist_m_c"])]

    # çıktılar
    OUT_DEDUP = f"{ANN_WS}/Tree_to_FieldSpecies_JOINED_dedup.csv"
    OUT_SAFE  = f"{ANN_WS}/Tree_to_FieldSpecies_JOINED_SAFE.csv"
    best.drop(columns=["geometry"], errors="ignore").to_csv(OUT_DEDUP, index=False)
    best[(best["confidence"].isin(["high","medium"])) & best["species"].notna()] \
        .drop(columns=["geometry"], errors="ignore").to_csv(OUT_SAFE, index=False)

    print("Kaydedildi:")
    print(" -", OUT_DEDUP)
    print(" -", OUT_SAFE)

    # kısa özet
    total_poly   = best["ann_index"].nunique()
    within_n     = int((best["match_type_c"]=="within").sum())
    nearcent_n   = int((best["match_type_c"]=="nearest_centroid").sum())
    bufint_n     = int((best["match_type_c"]=="stem_buffer_intersects").sum())
    nearest_n    = int((best["match_type_c"]=="nearest").sum())
    labeled_n    = int(best["species"].notna().sum())
    safe_n       = int(((best["confidence"].isin(["high","medium"])) & best["species"].notna()).sum())

    print(f"\nPoligon: {total_poly} | within:{within_n}  nearest_centroid:{nearcent_n}  buffer_intersects:{bufint_n}  nearest:{nearest_n}")
    print(f"Etiketli: {labeled_n} | SAFE (high/medium & species): {safe_n}")
else:
    print("joined yok, bu hücre atlandı.")


Kaydedildi:
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/Tree_to_FieldSpecies_JOINED_dedup.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/Tree_to_FieldSpecies_JOINED_SAFE.csv

Poligon: 221958 | within:2992  nearest_centroid:10  buffer_intersects:260  nearest:140
Etiketli: 221958 | SAFE (high/medium & species): 3402


In [8]:
# ==== Tür raporları ====
# SAFE varsa onu kullanıyoruz yoksa DEDUP
SAFE = f"{ANN_WS}/Tree_to_FieldSpecies_JOINED_SAFE.csv"
DEDUP= f"{ANN_WS}/Tree_to_FieldSpecies_JOINED_dedup.csv"
path_in = SAFE if os.path.exists(SAFE) else (DEDUP if os.path.exists(DEDUP) else None)

if path_in is None:
    raise SystemExit("Rapor için SAFE/DEDUP bulunamadı.")

df = pd.read_csv(path_in, low_memory=False)
print("Kaynak:", os.path.basename(path_in), "| Satır:", len(df))

def pick_col(cands):
    for c in cands:
        if c in df.columns: return c
    return None

col_species = pick_col(["species","scientificName","scientificName_x","scientificName_y","taxonID"])
col_xml     = pick_col(["xml_file","source_xml","source_xml_x","source_xml_y","source","filename"])
col_rgb     = pick_col(["rgb_file","source_rgb","source_rgb_x","source_rgb_y","filename"])
col_match   = pick_col(["match_type_c","match_type","match_type_x","match_type_y"])
col_conf    = pick_col(["confidence"])
col_year    = pick_col(["year_ann","year_field"])
col_site    = pick_col(["site_ann","site_field"])

if not col_species:
    raise SystemExit("Tür kolonu bulunamadı (species/scientificName/taxonID).")

# xml & rgb "stem" oluştur (uzantısız gövde)
def stem(x):
    if pd.isna(x): return None
    b = os.path.basename(str(x))
    b = re.sub(r"\.(xml|tif|tiff)$", "", b, flags=re.IGNORECASE)
    return b

df["xml_file"] = df[col_xml].map(stem) if col_xml else None
df["rgb_file"] = df[col_rgb].map(stem) if col_rgb else None

labeled = df[df[col_species].notna()].copy()

# long & pivot tablolar
by_xml_species = labeled.groupby(["xml_file", col_species], dropna=False).size().reset_index(name="count") if "xml_file" in labeled.columns else pd.DataFrame()
by_rgb_species = labeled.groupby(["rgb_file", col_species], dropna=False).size().reset_index(name="count") if "rgb_file" in labeled.columns else pd.DataFrame()

pivot_xml = by_xml_species.pivot_table(index="xml_file", columns=col_species, values="count", fill_value=0, aggfunc="sum") if len(by_xml_species) else pd.DataFrame()
pivot_rgb = by_rgb_species.pivot_table(index="rgb_file", columns=col_species, values="count", fill_value=0, aggfunc="sum") if len(by_rgb_species) else pd.DataFrame()

xml_summary = (by_xml_species.groupby("xml_file")["count"].sum().reset_index().rename(columns={"count":"total_trees_labeled"})) if len(by_xml_species) else pd.DataFrame()
rgb_summary = (by_rgb_species.groupby("rgb_file")["count"].sum().reset_index().rename(columns={"count":"total_trees_labeled"})) if len(by_rgb_species) else pd.DataFrame()

top_species = labeled[col_species].value_counts().head(20).reset_index().rename(columns={"index":"species", col_species:"n"})

# opsiyonel ek
extra = {}
if col_match:
    extra["match_type_counts.csv"] = labeled[col_match].value_counts(dropna=False).rename_axis("match_type").reset_index(name="n")
if col_conf:
    extra["confidence_counts.csv"] = labeled[col_conf].value_counts(dropna=False).rename_axis("confidence").reset_index(name="n")
if col_year:
    extra["by_year_species.csv"] = labeled.groupby([col_year, col_species]).size().reset_index(name="count")
if col_site:
    extra["by_site_species.csv"] = labeled.groupby([col_site, col_species]).size().reset_index(name="count")

# Kaydet
p_xml_long  = os.path.join(REPORT_DIR, "per_xml_species_counts.csv")
p_rgb_long  = os.path.join(REPORT_DIR, "per_rgb_species_counts.csv")
p_xml_pivot = os.path.join(REPORT_DIR, "per_xml_species_pivot.csv")
p_rgb_pivot = os.path.join(REPORT_DIR, "per_rgb_species_pivot.csv")
p_xml_sum   = os.path.join(REPORT_DIR, "per_xml_summary.csv")
p_rgb_sum   = os.path.join(REPORT_DIR, "per_rgb_summary.csv")
p_top       = os.path.join(REPORT_DIR, "top20_species_overall.csv")

if len(by_xml_species): by_xml_species.to_csv(p_xml_long, index=False)
if len(by_rgb_species): by_rgb_species.to_csv(p_rgb_long, index=False)
if len(pivot_xml):      pivot_xml.to_csv(p_xml_pivot)
if len(pivot_rgb):      pivot_rgb.to_csv(p_rgb_pivot)
if len(xml_summary):    xml_summary.to_csv(p_xml_sum, index=False)
if len(rgb_summary):    rgb_summary.to_csv(p_rgb_sum, index=False)
top_species.to_csv(p_top, index=False)
for name, tbl in extra.items():
    tbl.to_csv(os.path.join(REPORT_DIR, name), index=False)

print("Raporlar kaydedildi:")
for p in [p_xml_long, p_rgb_long, p_xml_pivot, p_rgb_pivot, p_xml_sum, p_rgb_sum, p_top] + \
         [os.path.join(REPORT_DIR, n) for n in extra.keys()]:
    print(" -", p)

# Konsola kısa örnek
if len(by_xml_species):
    print("\n— by_xml_species örnek —")
    print(by_xml_species.head(10).to_string(index=False))
if len(xml_summary):
    print("\n— En çok 10 XML (etiketli ağaç sayısına göre) —")
    print(xml_summary.sort_values("total_trees_labeled", ascending=False).head(10).to_string(index=False))
print("\n— Top 15 tür —")
print(top_species.head(15).to_string(index=False))


Kaynak: Tree_to_FieldSpecies_JOINED_SAFE.csv | Satır: 3402
Raporlar kaydedildi:
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/per_xml_species_counts.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/per_rgb_species_counts.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/per_xml_species_pivot.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/per_rgb_species_pivot.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/per_xml_summary.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/per_rgb_summary.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/top20_species_overall.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_w

In [9]:
# ==== XML coverage ====
XMLS = glob.glob(os.path.join(ANN_DIR, "*.xml"))
print("XML dosyası sayısı:", len(XMLS))

# xml bazında toplam 'object' (poligon) say
rows = []
for f in XMLS:
    try:
        root = ET.parse(f).getroot()
        nobj = len(root.findall(".//object"))
    except Exception:
        nobj = 0
    rows.append({"xml_file": os.path.basename(f), "n_objects": nobj})
df_counts = pd.DataFrame(rows)
total_polys_ann = int(df_counts["n_objects"].sum())
print("Toplam poligon (annotations):", total_polys_ann)

# SAFE yükleme ve xml_file gövdesini üretme
SAFE = f"{ANN_WS}/Tree_to_FieldSpecies_JOINED_SAFE.csv"
if not os.path.exists(SAFE):
    raise SystemExit("SAFE bulunamadı, önce Cell 7'yi başarıyla üretmelisin.")

df_safe = pd.read_csv(SAFE, low_memory=False)

xml_cands = [c for c in ["xml_file","source_xml","source_xml_x","source_xml_y","source","filename","source_rgb","source_rgb_x","source_rgb_y"] if c in df_safe.columns]

def to_xml_name(v):
    if pd.isna(v): return None
    b = os.path.basename(str(v))
    if re.search(r"\.xml$", b, flags=re.IGNORECASE):
        return b
    # RGB isminden XML türet
    return re.sub(r"\.(tif|tiff)$", ".xml", b, flags=re.IGNORECASE)

if "xml_file" not in df_safe.columns:
    df_safe["xml_file"] = None
    for c in xml_cands:
        df_safe["xml_file"] = df_safe["xml_file"].fillna(df_safe[c].map(to_xml_name))
df_safe["xml_file"] = df_safe["xml_file"].fillna("UNKNOWN.xml")

safe_polys = len(df_safe)
coverage = 100.0 * safe_polys / total_polys_ann if total_polys_ann>0 else 0.0
print(f"Eşleşme oranı: {safe_polys}/{total_polys_ann} = %{coverage:.2f}")

# dosya bazlı coverage
by_file = df_safe["xml_file"].value_counts().rename_axis("xml_file").reset_index(name="safe_count")
df_merged = df_counts.merge(by_file, on="xml_file", how="left").fillna({"safe_count":0})
df_merged["safe_count"] = df_merged["safe_count"].astype(int)
df_merged["coverage_%"] = (df_merged["safe_count"] / df_merged["n_objects"] * 100).round(2)

# site bazlı coverage
def extract_site(s):
    m = re.search(r"\b([A-Z]{3,4})\b", str(s))
    return m.group(1) if m else "UNK"

df_merged["site"] = df_merged["xml_file"].map(extract_site)
site_cov = df_merged.groupby("site").agg(
    files=("xml_file","count"),
    ann_polys=("n_objects","sum"),
    safe_polys=("safe_count","sum")
).reset_index()
site_cov["coverage_%"] = (site_cov["safe_polys"] / site_cov["ann_polys"] * 100).round(2)

# Kaydet
out_per_file = os.path.join(REPORT_DIR, "coverage_per_file.csv")
out_per_site = os.path.join(REPORT_DIR, "coverage_per_site.csv")
df_merged.to_csv(out_per_file, index=False)
site_cov.to_csv(out_per_site, index=False)

print("\n Kaydedildi:")
print(" -", out_per_file)
print(" -", out_per_site)

print("\n— En yüksek kapsama oranına sahip 10 dosya —")
print(df_merged.sort_values("coverage_%", ascending=False).head(10).to_string(index=False))

print("\n— Site bazında özet —")
print(site_cov.sort_values("coverage_%", ascending=False).to_string(index=False))


XML dosyası sayısı: 227
Toplam poligon (annotations): 31044
Eşleşme oranı: 3402/31044 = %10.96

 Kaydedildi:
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/coverage_per_file.csv
 - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations_with_species/reports/coverage_per_site.csv

— En yüksek kapsama oranına sahip 10 dosya —
         xml_file  n_objects  safe_count  coverage_% site
SJER_004_2018.xml          1          10     1000.00  UNK
TEAK_062_2018.xml         39         288      738.46  UNK
SJER_050_2018.xml         15         110      733.33  UNK
SJER_063_2018.xml         13          90      692.31  UNK
TEAK_044_2018.xml         36         248      688.89  UNK
SJER_003_2018.xml          9          60      666.67  UNK
SJER_046_2018.xml         14          90      642.86  UNK
TEAK_046_2018.xml         39         240      615.38  UNK
TEAK_050_2018.xml         48         280      583.33  UNK
SJER_021_2018.xml          4 