In [4]:
import json
from collections import defaultdict
from shapely.geometry import Polygon

def get_vgg_instance_stats_and_areas(vgg_json_path):
    with open(vgg_json_path, 'r') as f:
        vgg_data = json.load(f)

    label_counts = defaultdict(int)
    label_empties = defaultdict(int)
    label_areas = defaultdict(float)
    print(len(vgg_data))
    for filename, entry in vgg_data.items():
        regions = entry.get('regions', {})

        for region in regions.values():
            if isinstance(region, dict):
                attrs = region.get('region_attributes', {})
                label = attrs.get('label', 'undefined')

                shape = region.get('shape_attributes', {})
                if shape.get('name') == 'polygon':
                    all_x = shape.get('all_points_x', [])
                    all_y = shape.get('all_points_y', [])

                    if len(all_x) >= 3 and len(all_x) == len(all_y):
                        coords = list(zip(all_x, all_y))
                        try:
                            poly = Polygon(coords)
                            poly = poly if poly.is_valid else poly.buffer(0)
                            area = poly.area
                            if poly.is_empty: label_empties[label] += 1
                            label_areas[label] += area
                        except Exception as e:
                            print(f"Warning: invalid polygon in {filename}, label '{label}': {e}")
                            continue

                label_counts[label] += 1

    # Sort by instance count
    sorted_labels = sorted(label_counts.items(), key=lambda x: x[1])

    # Print stats
    print("==== VGG Instance & Area Stats ====")
    total_instances = sum(label_counts.values())
    print(f"Total labeled instances: {total_instances}")
    print(f"Number of unique labels: {len(label_counts)}\n")

    print(f"{'Label':30s} | {'Count':>6s} | {'nbr Empties':>15s} |{'Total Area (nbr_img)':>20s}")
    print("-" * 60)
    for label, count in sorted_labels:
        area = label_areas[label]
        empties = label_empties[label]
        print(f"{label:30s} | {count:6d} | {empties:>15} | {area/(1024*1024):20.2f}")

    return dict(label_counts), dict(label_areas)

# Example usage
if __name__ == "__main__":
    vgg_json_path = "/home/adelb/Documents/Bpartners/Pleiades/dataset/bati_2022_cherbourg/pleiade_2014_cherbourg_bati.json"  # Replace with your file
    counts, areas = get_vgg_instance_stats_and_areas(vgg_json_path)


1130
==== VGG Instance & Area Stats ====
Total labeled instances: 24396
Number of unique labels: 1

Label                          |  Count |     nbr Empties |Total Area (nbr_img)
------------------------------------------------------------
Bati                           |  24396 |            1117 |                29.81


0.9309922265683023

In [7]:
36.06+19.94+36.55+496.40+980.47+403.19+2824.57+1799.03+1180.43-8811

-1034.3599999999997

In [None]:
vgg_json_path = "/home/adelb/Documents/Bpartners/Stanislas/all_vgg.json"  # Replace with your file
    
counts, areas = get_vgg_instance_stats_and_areas(vgg_json_path)

40689


KeyboardInterrupt: 

In [None]:
[1, 3, 5, 8, 9, 10]

In [5]:
','.join(['roof_ardoise',
 'roof_asphalte_bitume',
 'roof_bac_acier',
 'roof_beton_brut',
 'roof_fibro_ciment',
 'roof_gravier',
 'roof_tole_ondulee',
 'roof_tuiles',
 'roof_zinc'])

'roof_ardoise,roof_asphalte_bitume,roof_bac_acier,roof_beton_brut,roof_fibro_ciment,roof_gravier,roof_tole_ondulee,roof_tuiles,roof_zinc'

In [4]:
','.join(sorted(areas.keys()))

'cheminee,humidite_autres,moisissure_couleur_ardoise,moisissure_tuiles,obstacles,roof_ardoise,roof_autres,roof_tuiles,usure_ardoise,usure_autres,usure_tuiles,velux'

In [5]:
class_names = ['background','cheminee',
 'humidite_autres',
 'moisissure_couleur_ardoise',
 'moisissure_tuiles',
 'obstacles',
 'roof_ardoise',
 'roof_autres',
 'roof_tuiles',
 'usure_ardoise',
 'usure_autres',
 'usure_tuiles',
 'velux']
{i: cls for i, cls in enumerate(class_names)}

{0: 'background',
 1: 'cheminee',
 2: 'humidite_autres',
 3: 'moisissure_couleur_ardoise',
 4: 'moisissure_tuiles',
 5: 'obstacles',
 6: 'roof_ardoise',
 7: 'roof_autres',
 8: 'roof_tuiles',
 9: 'usure_ardoise',
 10: 'usure_autres',
 11: 'usure_tuiles',
 12: 'velux'}

In [5]:
(areas['roof'] - sum([areas[k] for k in areas.keys() if k != 'roof']))/(1024*1024)

2165.0764529913704

In [7]:
sum([areas[k] for k in areas.keys() if k != 'roof'])/(1024*1024)

759.6095728781914

In [5]:
sorted(counts.keys())

['roof_ardoise',
 'roof_asphalte_bitume',
 'roof_bac_acier',
 'roof_beton_brut',
 'roof_fibro_ciment',
 'roof_gravier',
 'roof_tole_ondulee',
 'roof_tuiles',
 'roof_zinc']

In [1]:
','.join("background cheminee humidite_clair_autres humidite_intense_autres moisissure_clair_tuiles moisissure_couleur_ardoise moisissure_couleur_tuiles moisissure_noircie_tuiles obstacle pv roof_ardoise roof_autres roof_tuiles usure_importante_ardoise usure_importante_tuiles usure_legere_ardoise usure_legere_autres velux".split())

'background,cheminee,humidite_clair_autres,humidite_intense_autres,moisissure_clair_tuiles,moisissure_couleur_ardoise,moisissure_couleur_tuiles,moisissure_noircie_tuiles,obstacle,pv,roof_ardoise,roof_autres,roof_tuiles,usure_importante_ardoise,usure_importante_tuiles,usure_legere_ardoise,usure_legere_autres,velux'

In [7]:
fd = 'bp_simp_tt_200eps_model3r_se_resnext50_32x4d_512'*2
print(f"{' Metrics Avant Post Processing ':#^100}")
print(f"{' '+fd+' ':#^100}")
print('#'*100)

################################## Metrics Avant Post Processing ###################################
# bp_simp_tt_200eps_model3r_se_resnext50_32x4d_512bp_simp_tt_200eps_model3r_se_resnext50_32x4d_512 #
####################################################################################################
