# This was only ran on holding set

In [13]:
import os
import json
import tifffile
import numpy as np
import matplotlib.pyplot as plt
import shapely
from shapely.geometry import shape
from collections import Counter
from shapely import wkt

In [4]:
image_dir = '/Users/ashleychen/Downloads/geotiffs/hold/images'
label_dir = '/Users/ashleychen/Downloads/geotiffs/hold/labels'

image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.tif')])
label_files = sorted([f for f in os.listdir(label_dir) if f.endswith('.json')])

print(f"Number of images: {len(image_files)}")
print(f"Number of label files: {len(label_files)}")

# Get shape images 
img_name = image_files[0]
img_path = os.path.join(image_dir, img_name)
img = tifffile.imread(img_path)

print(f"Image shape (H x W x C): {img.shape}")


Number of images: 1866
Number of label files: 1866
Image shape (H x W x C): (1024, 1024, 3)


In [20]:
## Looking at the what is in the labels file

for json_name, img_name in zip(label_files[:3], image_files[:3]):
    label_path = os.path.join(label_dir, json_name)
    img_path = os.path.join(image_dir, img_name)

    with open(label_path) as f:
        label_data = json.load(f)

    # Extract the 'lng_lat' list
    lng_lat_features = label_data['features']['lng_lat']

    # Loop through each feature and extract its properties and WKT polygon
    for feature in lng_lat_features:
        # Extract properties
        properties = feature['properties']
        feature_type = properties.get('feature_type')
        subtype = properties.get('subtype')
        uid = properties.get('uid')

        # Extract the WKT polygon
        wkt_str = feature.get('wkt')
        
        # Convert WKT to a shapely polygon
        polygon = wkt.loads(wkt_str)
        
        # For visualization or further processing
        if polygon.is_valid:
            coords = [(x, y) for x, y in polygon.exterior.coords]
            print(f"Feature {uid} (Type: {feature_type}, Subtype: {subtype}), Polygon coordinates: {coords}")


Feature 104d384e-412b-4a72-8361-3c6c75cafdf0 (Type: building, Subtype: major-damage), Polygon coordinates: [(-90.832067814151, 14.42973661152615), (-90.83198135159103, 14.42967876630043), (-90.83204507143566, 14.42958863344386), (-90.83208152595631, 14.42962017119394), (-90.83210793558972, 14.42958690007866), (-90.8322374224695, 14.42968516523737), (-90.83221349607304, 14.42970690267509), (-90.83224532274491, 14.4297382444936), (-90.832067814151, 14.42973661152615)]
Feature 9badd81e-16bf-4406-b8ad-9cb50a208011 (Type: building, Subtype: major-damage), Polygon coordinates: [(-90.83252997708334, 14.42954147154196), (-90.83240693146364, 14.42962261218812), (-90.83230910007273, 14.42950468856743), (-90.8324489988482, 14.42941164582603), (-90.83252806857338, 14.42951308446236), (-90.83251488806118, 14.42952147417456), (-90.83252851486947, 14.42954216734044), (-90.83252997708334, 14.42954147154196)]
Feature ea0cde31-1b8c-4951-ad5c-8f7bdd5b23e5 (Type: building, Subtype: major-damage), Polygon 

In [18]:
## Seeing unique values in the type and subtype fields in the labels file 

unique_feature_types = set()
unique_subtypes = set()

# Loop through each image and corresponding label
for json_name, img_name in zip(label_files[:3], image_files[:3]):
    label_path = os.path.join(label_dir, json_name)
    img_path = os.path.join(image_dir, img_name)

    with open(label_path) as f:
        label_data = json.load(f)

    # Extract the 'lng_lat' list from the features
    lng_lat_features = label_data['features']['lng_lat']

    # Loop through each feature to extract properties and WKT
    for feature in lng_lat_features:
        properties = feature['properties']
        
        # Extract metadata
        feature_type = properties.get('feature_type')
        subtype = properties.get('subtype')
        
        # Add unique values to sets
        if feature_type:
            unique_feature_types.add(feature_type)
        if subtype:
            unique_subtypes.add(subtype)
                
# Print unique feature types and subtypes
print("Unique Feature Types:", unique_feature_types)
print("Unique Subtypes:", unique_subtypes)

Unique Feature Types: {'building'}
Unique Subtypes: {'no-damage', 'major-damage', 'minor-damage', 'destroyed'}


### Basic Format of Labels File: 

{

  "features": {

    "lng_lat": [

      {

        "properties": {

          "feature_type": "building",

          "subtype": "major-damage",

          "uid": "104d384e-412b-4a72-8361-3c6c75cafdf0"

        },
        
        "wkt": "POLYGON ((-90.832067814151 14.42973661152615, -90.83198135159103 14.42967876630043, ...))"

      },

      {

        "properties": {

          "feature_type": "building",

          "subtype": "major-damage",

          "uid": "9badd81e-16bf-4406-b8ad-9cb50a208011"

        },

        "wkt": "POLYGON ((-90.83252997708334 14.42954147154196, -90.83240693146364 14.42962261218812, ...))"

      },

      ...

    ]

  }
  
}

### Explanation:

Features = dictionary, lng_lat = key that holds array of feature data
Each feature inside lng_lat has:
    properties- contains feature_type, subtype, uid
    wkt - Well-Known Text representation of polygon 

UID - differentiate polygon annotations/different buildings

Type - what is being annotated in image

Subtype - indicates damage level 

Polygon Coordinates - longitude-latitude coordinates to form geographic outline of annotated object // last point = first point to enclose polygon

** Look at code above to see how to extract using shapely