Simplify vgg polygons

In [1]:
import json
from shapely.geometry import Polygon, MultiPolygon

def simplify_vgg(input_file, output_file, tolerance = 2.0):
    """Simplify polygons in a VGG file and save to a new VGG file."""
    with open(input_file) as f:
        data = json.load(f)

    simplified_data = {}

    for img_file, info in data.items():
        simplified_data[img_file] = {"filename": info["filename"], "size": info["size"], "regions": {}}
        for idx, region in info['regions'].items():
            xs = region['shape_attributes']['all_points_x']
            ys = region['shape_attributes']['all_points_y']

            poly = Polygon(zip(xs, ys))
            if isinstance(poly, MultiPolygon):
                print(poly)
            simplified = poly.simplify(tolerance, preserve_topology=True)

            xs_new, ys_new = simplified.exterior.coords.xy

            simplified_data[img_file]['regions'][idx] = {
                "shape_attributes": {
                    "name": "polygon",
                    "all_points_x": xs_new[:-1].tolist(),  # omit closing point
                    "all_points_y": ys_new[:-1].tolist()
                },
                "region_attributes": region['region_attributes'].copy()
            }
    
    with open(output_file, "w") as f:
        json.dump(simplified_data, f, indent=4)

# if __name__ == "__main__":
#     input_file = "/home/adelb/Documents/Bpartners/VGG-PROCESSING/clean-vggs/damages_kept_classes_remap_no-intensity_no-roof_roofed_complemented.json"
#     output_file = "/home/adelb/Documents/Bpartners/VGG-PROCESSING/clean-vggs/damages_kept_classes_remap_no-intensity_no-roof_roofed_complemented_simplified_3.json"
#     tolerance = 3.0  # adjust for more or less smoothing

#     simplify_vgg(input_file, output_file, tolerance)
#     print(f"Simplified VGG file has been saved to {output_file}")

VGG STATS

In [2]:
import json
from collections import defaultdict
from shapely.geometry import Polygon

def get_vgg_instance_stats_and_areas(vgg_json_path):
    with open(vgg_json_path, 'r') as f:
        vgg_data = json.load(f)

    label_counts = defaultdict(int)
    label_areas = defaultdict(float)
    print(len(vgg_data))
    for filename, entry in vgg_data.items():
        regions = entry.get('regions', {})

        for region in regions.values():
            if isinstance(region, dict):
                attrs = region.get('region_attributes', {})
                label = attrs.get('label', 'undefined')

                shape = region.get('shape_attributes', {})
                if shape.get('name') == 'polygon':
                    all_x = shape.get('all_points_x', [])
                    all_y = shape.get('all_points_y', [])

                    if len(all_x) >= 3 and len(all_x) == len(all_y):
                        coords = list(zip(all_x, all_y))
                        try:
                            poly = Polygon(coords)
                            area = poly.area
                            label_areas[label] += area
                        except Exception as e:
                            print(f"Warning: invalid polygon in {filename}, label '{label}': {e}")
                            continue

                label_counts[label] += 1

    # Sort by instance count
    sorted_labels = sorted(label_counts.items(), key=lambda x: x[1])

    # Print stats
    print("==== VGG Instance & Area Stats ====")
    total_instances = sum(label_counts.values())
    print(f"Total labeled instances: {total_instances}")
    print(f"Number of unique labels: {len(label_counts)}\n")

    print(f"{'Label':30s} | {'Count':>6s} | {'Total Area (nbr_img)':>20s}")
    print("-" * 60)
    for label, count in sorted_labels:
        area = label_areas[label]
        print(f"{label:30s} | {count:6d} | {area/(1024*1024):20.2f}")

    return dict(label_counts), dict(label_areas)

# Example usage
# if __name__ == "__main__":
#     vgg_json_path = ""  # Replace with your file
#     counts, areas = get_vgg_instance_stats_and_areas(vgg_json_path)

VGG_TO_MASK

In [3]:
import json, os
import numpy as np
import cv2, shutil
from shapely.geometry import Polygon, MultiPolygon
from PIL import Image, ImageEnhance
import albumentations as A 


def vgg_to_masks(in_vgg_path, dest_folder, imsize):

    with open(in_vgg_path) as f:
        vgg= json.load(f)

    os.makedirs(dest_folder, exist_ok=True)

    classes= set([reg['region_attributes']['label'] for file in vgg.values() for reg in file['regions'].values()])
    my_classes= sorted(list(classes))
    my_classes.insert(0, 'background')

    print(my_classes)
    cls_to_shade= {cls: i for i, cls in enumerate(my_classes)}


    for fn, file in vgg.items():
        mask= np.zeros(shape=imsize, dtype=np.uint8)
        polygons= []
        labels= []
        for reg in file['regions'].values():
            
            label= reg['region_attributes']['label']
            # if label in classes_to_augment and not to_augment:
            #     to_augment = True
            Xs= reg['shape_attributes']['all_points_x']
            Ys= reg['shape_attributes']['all_points_y']
            
            XYs= zip(Xs, Ys)
            P= Polygon(XYs)
            P= P if P.is_valid else P.buffer(0)
            
            if isinstance(P, MultiPolygon):
                for p in P.geoms:
                    polygons.append(p)
                    labels.append(label)
            elif isinstance(P, Polygon)  :
                polygons.append(P)
                labels.append(label)
            else: continue
            
            
            
        polygons_with_labels= zip(polygons, labels)
        polygons_with_labels_sorted= sorted(polygons_with_labels, key= lambda x: x[0].area, reverse=True)
        # print(polygons_with_labels_sorted)
        i= 0
        for polygon, label in polygons_with_labels_sorted:
            if polygon.is_empty:
                print('empty poly')
                i += 1
                continue
            coords= [(int(x), int(y)) for x, y in polygon.exterior.coords if not polygon.is_empty]
            cv2.fillPoly(mask, [np.array([coords]).reshape(-1,1,2)], cls_to_shade[label])
            # print(label)
        
        # print(i)
        cv2.imwrite(f"{dest_folder}/{fn.replace('.jpg', '.png')}", mask)
        
# if __name__ == "__main__":
#     in_vgg_path = ""
#     dest_folder = ""
#     imsize = ()
    
#     vgg_to_masks(in_vgg_path, dest_folder, imsize)

  check_for_updates()


VGG TO COCO

In [4]:
import os, glob
import json
from shapely.geometry import Polygon
import uuid

def convert_vgg_to_coco(vgg_json_path, coco_json_path, class_mapping, default_width=1024, default_height=1024):
    """
    Convertit un fichier d'annotations VGG en format COCO.
    """
    
    cls2ids= {v: k for k, v in class_mapping.items()}
    
    def img_to_id(img_fn: str, n_digits: int=4):
        return int(uuid.uuid5(uuid.NAMESPACE_DNS, img_fn).hex, 16) % 10 ** n_digits
    
    def calculate_bbox(polygon_points):
        """Calculate the bounding box of a polygon."""
        if len(polygon_points['all_points_x']) < 3:
            return None  # Invalid polygon
        if (polygon_points['all_points_x'][0] != polygon_points['all_points_x'][-1] or
            polygon_points['all_points_y'][0] != polygon_points['all_points_y'][-1]):
            polygon_points['all_points_x'].append(polygon_points['all_points_x'][0])
            polygon_points['all_points_y'].append(polygon_points['all_points_y'][0])
        polygon = Polygon(zip(polygon_points['all_points_x'], polygon_points['all_points_y']))
        min_x, min_y, max_x, max_y = polygon.bounds
        return min_x, min_y, max_x - min_x, max_y - min_y

    def calculate_area(polygon_points):
        """Calculate the area of a polygon."""
        if len(polygon_points['all_points_x']) < 3:
            return 0  # Invalid polygon
        if (polygon_points['all_points_x'][0] != polygon_points['all_points_x'][-1] or
            polygon_points['all_points_y'][0] != polygon_points['all_points_y'][-1]):
            polygon_points['all_points_x'].append(polygon_points['all_points_x'][0])
            polygon_points['all_points_y'].append(polygon_points['all_points_y'][0])
        polygon = Polygon(zip(polygon_points['all_points_x'], polygon_points['all_points_y']))
        return polygon.area

    # Vérifier et créer le répertoire parent du fichier COCO
    coco_dir = os.path.dirname(coco_json_path)
    if not os.path.exists(coco_dir):
        os.makedirs(coco_dir)

    # Lecture du fichier VGG
    with open(vgg_json_path) as file:
        vgg_data = json.load(file)

    # Initialisation de la structure de base COCO
    coco_data = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    # Dictionnaire pour les identifiants uniques des catégories
    category_ids = {}

    # Traitement de chaque image dans les données VGG
    for filename, image_info in vgg_data.items():
        image_id = img_to_id(filename, 4)  # ID d'image unique
        coco_data['images'].append({
            "id": image_id,
            "width": image_info.get("width", default_width),
            "height": image_info.get("height", default_height),
            "file_name": filename
        })

        for region in image_info.get('regions', {}).values():
            category_name = region['region_attributes']['label']
            if category_name not in category_ids:
                category_id = f"{cls2ids[category_name]}"  # ID unique pour la catégorie
                category_ids[category_name] = category_id
                coco_data['categories'].append({
                    "id": category_id,
                    "name": category_name
                })
            else:
                category_id = category_ids[category_name]

            polygon_points = region['shape_attributes']
            bbox = calculate_bbox(polygon_points)
            if not bbox:  # Ignorer les polygones invalides
                continue
            area = calculate_area(polygon_points)

            segmentation = []
            all_x = polygon_points['all_points_x']
            all_y = polygon_points['all_points_y']
            for point in range(len(all_x)):
                segmentation.append(all_x[point])
                segmentation.append(all_y[point])

            coco_data['annotations'].append({
                "id": int(uuid.uuid4().hex, 16) % (10 ** 4),
                "segmentation": [segmentation],
                "area": area,
                "iscrowd": 0,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": bbox
            })

    # Sauvegarde des données COCO dans un fichier
    with open(coco_json_path, 'w') as file:
        json.dump(coco_data, file, indent=4)

    print(f"Conversion terminée. Les données COCO sont enregistrées dans : {coco_json_path}")

# if __name__ == "__main__":
#     class_mapping: dict[int, str] = {}
#     vgg_path: str = ""
#     coco_path: str = ""
#     width: int = 0
#     height: int = 0
       
#     convert_vgg_to_coco(vgg_path, coco_path, class_mapping, width, height)


VGG TO COCO BOXES

In [5]:
import os, glob, json, uuid

def img_to_id(img_fn: str, n_digits: int = 4):
    """Create a unique id for each image based on its name."""
    return int(uuid.uuid5(uuid.NAMESPACE_DNS, img_fn).hex, 16) % 10 ** n_digits

def calculate_bbox_rect(rect):
    """Convert VGG rectangle into COCO bbox format [x, y, width, height]."""
    return [rect['x'], rect['y'], rect['width'], rect['height']]

def calculate_area_rect(rect):
    """Calculate area of a rectangle."""
    return rect['width'] * rect['height']

def segmentation_rect(rect):
    """Create segmentation points for a rectangle in COCO format."""
    x, y, w, h = rect['x'], rect['y'], rect['width'], rect['height']

    return [[
        x, y,
        x + w, y,
        x + w, y + h,
        x, y + h
    ]]

def convert_vgg_to_coco(vgg_json_path, coco_json_path, class_mapping, default_width=1024, default_height=1024):
    """Convert a VGG file with rectangle annotations into COCO format."""
    
    cls2ids = {v: k for k, v in class_mapping.items()}
    
    coco_data = {
        "images": [],
        "annotations": [],
        "categories": []
    }
    category_ids = {}
    for k, v in cls2ids.items():
        coco_data["categories"].append({"id": v, "name": k})
        category_ids[k] = v

    with open(vgg_json_path) as f:
        vgg_data = json.load(f)

    for filename, image_info in vgg_data.items():
        image_id = img_to_id(filename, 4)

        coco_data['images'].append({"id": image_id, "width": image_info.get("width", default_width),
                                   "height": image_info.get("height", default_height),
                                   "file_name": filename})

        for region in image_info.get('regions', {}).values():
            shape = region['shape_attributes']

            if shape['name'] != 'rect':
                continue  # ignore non-rects

            label = region['region_attributes']['label']

            if label not in category_ids:
                continue

            category_id = category_ids[label]
            bbox = calculate_bbox_rect(shape)
            area = calculate_area_rect(shape)
            segmentation = segmentation_rect(shape)

            coco_data['annotations'].append({"id": int(uuid.uuid4().hex, 16) % (10 ** 4),
                                            "image_id": image_id,
                                            "category_id": category_id,
                                            "bbox": bbox,
                                            "area": area,
                                            "iscrowd": 0,
                                            "segmentation": segmentation})

    with open(coco_json_path, 'w') as f:
        json.dump(coco_data, f, indent=4)


# if __name__ == "__main__":
#     class_mapping: dict[int, str] = {}
#     vgg_path: str = ""
#     coco_path: str = ""
#     width: int = 0
#     height: int = 0
    
#     convert_vgg_to_coco(vgg_path, coco_path, class_mapping, width, height)
