In [2]:
# === 0) Drive'ı bağla
from google.colab import drive
drive.mount('/content/drive')

# === 1) Ayarlar
ANN_DIR = "/content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotations"
OUT_SUMMARY_CSV = "/content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotation_summary_per_file.csv"
OUT_CLASSES_CSV = "/content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotation_class_stats.csv"

# === 2) Kütüphaneler
import os, json, xml.etree.ElementTree as ET
import pandas as pd
from collections import Counter, defaultdict

xml_files = sorted([f for f in os.listdir(ANN_DIR) if f.lower().endswith(".xml")])

rows = []
class_totals = Counter()
bbox_totals = defaultdict(int)

def get_text(node, tag, default=None):
    t = node.find(tag)
    return t.text if (t is not None and t.text is not None) else default

for xf in xml_files:
    path = os.path.join(ANN_DIR, xf)
    try:
        root = ET.parse(path).getroot()
    except Exception as e:
        rows.append({
            "xml_file": xf, "image_file": None, "width": None, "height": None,
            "n_objects": 0, "classes": None, "per_class_counts": None,
            "avg_bbox_w": None, "avg_bbox_h": None, "avg_bbox_area_px2": None,
            "total_bbox_area_ratio_%": None, "parse_error": str(e)
        })
        continue

    filename = get_text(root, "filename", default=None)
    size = root.find("size")
    W = int(get_text(size, "width", "0") or 0) if size is not None else 0
    H = int(get_text(size, "height", "0") or 0) if size is not None else 0

    objs = root.findall("object")
    n = len(objs)
    clist = []
    bw_list, bh_list, ba_list = [], [], []
    class_counts = Counter()

    for o in objs:
        cname = get_text(o, "name", default="unknown")
        if cname is None: cname = "unknown"
        cname = cname.strip()
        class_counts[cname] += 1
        clist.append(cname)

        bb = o.find("bndbox")
        if bb is not None:
            xmin = int(float(get_text(bb, "xmin", "0") or 0))
            ymin = int(float(get_text(bb, "ymin", "0") or 0))
            xmax = int(float(get_text(bb, "xmax", "0") or 0))
            ymax = int(float(get_text(bb, "ymax", "0") or 0))
            w = max(0, xmax - xmin)
            h = max(0, ymax - ymin)
            a = w * h
            bw_list.append(w); bh_list.append(h); ba_list.append(a)

    # dosya-özet metrikler
    avg_w = sum(bw_list)/len(bw_list) if bw_list else None
    avg_h = sum(bh_list)/len(bh_list) if bh_list else None
    avg_a = sum(ba_list)/len(ba_list) if ba_list else None
    total_area_ratio = (sum(ba_list) / (W*H) * 100.0) if (ba_list and W>0 and H>0) else None

    # toplamlara ekle
    class_totals.update(class_counts)
    for k,v in class_counts.items():
        bbox_totals[k] += v

    rows.append({
        "xml_file": xf,
        "image_file": filename,
        "width": W, "height": H,
        "n_objects": n,
        "classes": ";".join(sorted(set(clist))) if clist else None,
        "per_class_counts": json.dumps(class_counts, ensure_ascii=False),
        "avg_bbox_w": avg_w,
        "avg_bbox_h": avg_h,
        "avg_bbox_area_px2": avg_a,
        "total_bbox_area_ratio_%": total_area_ratio,
        "parse_error": None
    })

# === 3) DataFrame'ler ve kayıt
df = pd.DataFrame(rows).sort_values(["n_objects","xml_file"], ascending=[False, True])
df_classes = pd.DataFrame(
    [{"class": k, "count": v} for k, v in class_totals.items()]
).sort_values("count", ascending=False)

print("Toplam XML:", len(xml_files))
print("Toplam etiketli nesne:", int(df["n_objects"].sum()))
print("Sınıflar:", ", ".join(df_classes["class"].astype(str).tolist()))
display(df.head(10))
display(df_classes)

df.to_csv(OUT_SUMMARY_CSV, index=False)
df_classes.to_csv(OUT_CLASSES_CSV, index=False)
print("Kaydedildi:")
print("  -", OUT_SUMMARY_CSV)
print("  -", OUT_CLASSES_CSV)

# === 4) info
print("\n--- Hızlı özet ---")
print("• Dosya başına ortalama ağaç:", round(df["n_objects"].mean(), 2))
print("• En fazla ağaçlı dosya:", df.loc[df["n_objects"].idxmax(), "xml_file"],
      "→", int(df["n_objects"].max()))
print("• Görüntü boyutları (eşleşenler):",
      df[["width","height"]].dropna().drop_duplicates().head().to_dict(orient="records"))


Mounted at /content/drive
Toplam XML: 227
Toplam etiketli nesne: 31044
Sınıflar: Tree


Unnamed: 0,xml_file,image_file,width,height,n_objects,classes,per_class_counts,avg_bbox_w,avg_bbox_h,avg_bbox_area_px2,total_bbox_area_ratio_%,parse_error
8,2018_NIWO_2_450000_4426000_image_crop.xml,2018_NIWO_2_450000_4426000_image_crop.tif,2511,4132,9730,Tree,"{""Tree"": 9730}",19.613258,20.556012,441.578726,41.410832,
46,2018_TEAK_3_315000_4094000_image_crop.xml,2018_TEAK_3_315000_4094000_image_crop.tif,6000,6000,3670,Tree,"{""Tree"": 3670}",51.107902,52.055313,3115.422888,31.760006,
42,2018_SJER_3_258000_4106000_image.xml,2018_SJER_3_258000_4106000_image.tif,10000,10000,1498,Tree,"{""Tree"": 1498}",82.391188,83.586115,7859.306409,11.773241,
7,2018_MLBS_3_541000_4140000_image_crop2.xml,2018_MLBS_3_541000_4140000_image_crop2.tif,1603,2213,1057,Tree,"{""Tree"": 1057}",51.235572,52.280038,2876.48912,85.708281,
43,2018_SJER_3_259000_4110000_image.xml,2018_SJER_3_259000_4110000_image.tif,10000,10000,1047,Tree,"{""Tree"": 1047}",73.885387,77.268386,7105.08405,7.439023,
80,2019_DSNY_5_452000_3113000_image_crop.xml,2019_DSNY_5_452000_3113000_image_crop.tif,6172,4394,888,Tree,"{""Tree"": 888}",55.006757,56.55518,3982.113739,13.038891,
6,2018_MLBS_3_541000_4140000_image_crop.xml,2018_MLBS_3_541000_4140000_image_crop.tif,1429,1864,864,Tree,"{""Tree"": 864}",47.534722,49.407407,2552.135417,82.782649,
83,2019_OSBS_5_405000_3287000_image_crop.xml,2019_OSBS_5_405000_3287000_image_crop.tif,1579,1920,699,Tree,"{""Tree"": 699}",35.918455,36.546495,1559.380544,35.953894,
84,2019_OSBS_5_405000_3287000_image_crop2.xml,2019_OSBS_5_405000_3287000_image_crop2.tif,2539,2353,617,Tree,"{""Tree"": 617}",40.090762,41.058347,1992.667747,20.579529,
88,2019_YELL_2_528000_4978000_image_crop2.xml,2019_YELL_2_528000_4978000_image_crop2.tif,2299,2472,574,Tree,"{""Tree"": 574}",41.761324,42.198606,2073.303136,20.94051,


Unnamed: 0,class,count
0,Tree,31044


Kaydedildi:
  - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotation_summary_per_file.csv
  - /content/drive/MyDrive/weecology-NeonTreeEvaluation-d0b90bc/annotation_class_stats.csv

--- Hızlı özet ---
• Dosya başına ortalama ağaç: 136.76
• En fazla ağaçlı dosya: 2018_NIWO_2_450000_4426000_image_crop.xml → 9730
• Görüntü boyutları (eşleşenler): [{'width': 2511, 'height': 4132}, {'width': 6000, 'height': 6000}, {'width': 10000, 'height': 10000}, {'width': 1603, 'height': 2213}, {'width': 6172, 'height': 4394}]
