In [None]:
import json, os
from collections import defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from PIL import Image
import networkx as nx

'ANNOT_PATH = "/Users/aumkeshchaudhary/Downloads/train/_annotations.coco.json"\nIMAGES_ROOT = "/Users/aumkeshchaudhary/Downloads/train"\nOUT = "results"\nos.makedirs(OUT, exist_ok=True)\n\nwith open(ANNOT_PATH, "r") as f:\n    coco = json.load(f)\n\n# map category id -> species name (skip generic "objects")\nid_to_species = {c["id"]: c["name"] for c in coco["categories"] if c["name"] != "objects"}\nspecies_list = sorted(id_to_species.values())\n\n# group annotations by image id\nann_by_image = defaultdict(list)\nfor ann in coco["annotations"]:\n    ann_by_image[ann["image_id"]].append(ann)\n\n# containers\nco_occurrence = defaultdict(int)          # (s1, s2) -> count\ndistances = defaultdict(list)            # (s1, s2) -> [distance...]\nfeeding_zones = defaultdict(list)        # species -> [(x_norm, y_norm)...]\ndom_weighted_dist_sum = defaultdict(float)  # species -> sum(dist * weight)\ndom_weight_sum = defaultdict(float)         # species -> sum(weight)\n\n# Main loop over the images

In [6]:
ANNOT_PATH = "/Users/aumkeshchaudhary/Downloads/train/_annotations.coco.json"
IMAGES_ROOT = "/Users/aumkeshchaudhary/Downloads/train"
OUT = "results"
os.makedirs(OUT, exist_ok=True)

with open(ANNOT_PATH, "r") as f:
    coco = json.load(f)

# map category id -> species name (skip generic "objects")
id_to_species = {c["id"]: c["name"] for c in coco["categories"] if c["name"] != "objects"}
species_list = sorted(id_to_species.values())

# group annotations by image id
ann_by_image = defaultdict(list)
for ann in coco["annotations"]:
    ann_by_image[ann["image_id"]].append(ann)

# containers
co_occurrence = defaultdict(int)          # (s1, s2) -> count
distances = defaultdict(list)            # (s1, s2) -> [distance...]
feeding_zones = defaultdict(list)        # species -> [(x_norm, y_norm)...]
dom_weighted_dist_sum = defaultdict(float)  # species -> sum(dist * weight)
dom_weight_sum = defaultdict(float)         # species -> sum(weight)

In [7]:
# Main loop over the images
for img in coco["images"]:
    anns = ann_by_image.get(img["id"], [])
    if not anns:
        continue

    img_path = os.path.join(IMAGES_ROOT, img["file_name"])
    try:
        W, H = Image.open(img_path).size
    except Exception as e:
        # if image missing, skip spatial stuff but still count co-occurrence
        W = H = None

    # species present in this image
    species_in_img = [id_to_species[a["category_id"]] for a in anns]

    # Co-occurrence 
    unique_species = list(set(species_in_img))
    for s1 in unique_species:
        for s2 in unique_species:
            if s1 != s2:
                co_occurrence[(s1, s2)] += 1

    # if we don't know image size, we can't do spatial metrics
    if W is None or H is None:
        continue

    # Compute centers (normalized) & collect for spacing + dominance + occupancy 
    centers_norm = []
    species_for_centers = []
    for a in anns:
        sp = id_to_species[a["category_id"]]
        x, y, w, h = a["bbox"]
        cx = x + w / 2.0
        cy = y + h / 2.0
        cx_n = cx / W
        cy_n = cy / H
        centers_norm.append((cx_n, cy_n))
        species_for_centers.append(sp)
        feeding_zones[sp].append((cx_n, cy_n))

    centers_norm = np.array(centers_norm)
    N = len(centers_norm)
    if N < 2:
        continue

    #  Social spacing (pairwise distances between birds)
    D = cdist(centers_norm, centers_norm)
    for i in range(N):
        for j in range(i + 1, N):
            s1 = species_for_centers[i]
            s2 = species_for_centers[j]
            d = float(D[i, j])
            distances[(s1, s2)].append(d)
            distances[(s2, s1)].append(d)

    #  Dominance proxy: distance to group centroid in crowded scenes 
    if N >= 3:  # only consider multi-bird situations
        centroid = centers_norm.mean(axis=0)
        dists_to_centroid = np.linalg.norm(centers_norm - centroid, axis=1)
        for sp, d in zip(species_for_centers, dists_to_centroid):
            dom_weighted_dist_sum[sp] += d * N
            dom_weight_sum[sp] += N

In [8]:
#   1) Co-Occurrence Heatmap
mat = np.zeros((len(species_list), len(species_list)))
for i, s1 in enumerate(species_list):
    for j, s2 in enumerate(species_list):
        mat[i, j] = co_occurrence.get((s1, s2), 0)

plt.figure(figsize=(8, 8))
plt.imshow(mat, cmap="viridis")
plt.xticks(range(len(species_list)), species_list, rotation=70)
plt.yticks(range(len(species_list)), species_list)
plt.title("Co-Occurrence Heatmap of Birds at Feeder")
plt.colorbar(label="Frames with both species present")
plt.tight_layout()
plt.savefig(os.path.join(OUT, "cooccurrence_heatmap.png"), dpi=300)
plt.close()

In [9]:
#   2) Social Spacing Boxplot
dist_rows = []
for (s1, s2), vals in distances.items():
    for v in vals:
        dist_rows.append({"pair": f"{s1}–{s2}", "distance": v})
dist_df = pd.DataFrame(dist_rows)

if not dist_df.empty:
    plt.figure(figsize=(12, 6))
    dist_df.boxplot(by="pair", column="distance", rot=60)
    plt.suptitle("")
    plt.title("Social Spacing at Feeder (Normalized Pixel Distance)")
    plt.ylabel("Distance")
    plt.tight_layout()
    plt.savefig(os.path.join(OUT, "social_spacing_boxplot.png"), dpi=300)
    plt.close()

<Figure size 1200x600 with 0 Axes>

In [10]:
#   3) Spatial Occupancy Heatmaps
for sp, coords in feeding_zones.items():
    coords = np.array(coords)
    if coords.size == 0:
        continue
    plt.figure(figsize=(6, 6))
    plt.hist2d(coords[:, 0], coords[:, 1], bins=30, cmap="inferno")
    plt.title(f"Spatial Occupancy Map — {sp}")
    plt.xlabel("X (normalized)")
    plt.ylabel("Y (normalized)")
    plt.gca().invert_yaxis()  # often top-left origin in images
    plt.colorbar(label="Count")
    plt.tight_layout()
    plt.savefig(os.path.join(OUT, f"occupancy_{sp}.png"), dpi=300)
    plt.close()

In [11]:
#   4) Social Network Graph
G = nx.Graph()
for sp in species_list:
    G.add_node(sp)

# add edges with weights
for (s1, s2), w in co_occurrence.items():
    if w > 0:
        if G.has_edge(s1, s2):
            G[s1][s2]["weight"] += w
        else:
            G.add_edge(s1, s2, weight=w)

# draw network
plt.figure(figsize=(8, 8))
if G.number_of_edges() > 0:
    pos = nx.spring_layout(G, seed=0)
    weights = [G[u][v]["weight"] for u, v in G.edges()]
    max_w = max(weights)
    widths = [0.5 + 4 * (w / max_w) for w in weights]

    nx.draw_networkx_nodes(G, pos, node_size=1200, node_color="lightblue")
    nx.draw_networkx_labels(G, pos, font_size=9)
    nx.draw_networkx_edges(G, pos, width=widths, alpha=0.7)

plt.title("Species Co-Feeding Social Network")
plt.axis("off")
plt.tight_layout()
plt.savefig(os.path.join(OUT, "social_network_graph.png"), dpi=300)
plt.close()

In [12]:
#   5) Dominance / Centrality Score
dom_data = []
for sp in species_list:
    if dom_weight_sum[sp] > 0:
        mean_dist = dom_weighted_dist_sum[sp] / dom_weight_sum[sp]
        dom_data.append({"species": sp, "mean_center_distance": mean_dist})

dom_df = pd.DataFrame(dom_data)
if not dom_df.empty:
    # lower distance = more central -> convert to dominance score in [0,1]
    max_d = dom_df["mean_center_distance"].max()
    min_d = dom_df["mean_center_distance"].min()
    if max_d > min_d:
        dom_df["dominance_score"] = 1.0 - (dom_df["mean_center_distance"] - min_d) / (max_d - min_d)
    else:
        dom_df["dominance_score"] = 1.0  # all equal

    dom_df = dom_df.sort_values("dominance_score", ascending=False)

    plt.figure(figsize=(8, 5))
    plt.bar(dom_df["species"], dom_df["dominance_score"])
    plt.xticks(rotation=45, ha="right")
    plt.ylabel("Dominance / Centrality Score")
    plt.title("Estimated Species Dominance at Feeder (Centrality-Based)")
    plt.tight_layout()
    plt.savefig(os.path.join(OUT, "dominance_barplot.png"), dpi=300)
    plt.close()