In [1]:
import os
import rasterio
import numpy as np
from rasterio.enums import Resampling
from tqdm import tqdm


In [9]:
import os
import rasterio
import numpy as np
from tqdm import tqdm

# Configuration
input_dir = "VertexAI_Patches"      # Folder where your labeled images are stored
output_dir = "VertexAI_masks_png"   # Folder where the generated PNG masks will be saved
class_map = {
    "wheat": 1,
    "persian_clover": 2,
    "sugarcane": 3,
    "barren": 4,
    "builtup": 5
}

# Create output folder if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each file
for filename in tqdm(os.listdir(input_dir)):
    if filename.endswith(".tif"):
        input_path = os.path.join(input_dir, filename)

        # Auto-detect class name from file name prefix
        for class_name, class_id in class_map.items():
            if filename.startswith(class_name):
                break
        else:
            print(f"Unknown class in file: {filename}")
            continue

        # Read source image (only to get dimensions and georeferencing for potential future use,
        # though the mask itself will be saved as PNG without explicit CRS in its header for Vertex AI)
        with rasterio.open(input_path) as src:
            meta = src.meta.copy()
            height, width = src.height, src.width
            # We are not directly transferring CRS to the PNG mask, but keeping it here for context.
            # crs = src.crs
            # transform = src.transform

            # Create mask filled with class ID
            # Ensure it's uint8, which is suitable for PNG indexed color or grayscale
            mask = np.full((height, width), class_id, dtype=np.uint8)

            # Update metadata for PNG output
            meta.update({
                "count": 1,
                "dtype": "uint8",
                "nodata": 0, # If you have a nodata value in your mask, define it. 0 is common for background.
                "driver": "PNG" # <--- Change driver to PNG
            })

            # Save the mask with a .png extension
            output_filename = os.path.splitext(filename)[0] + "_mask.png" # <--- Change extension to .png
            output_path = os.path.join(output_dir, output_filename)
            with rasterio.open(output_path, 'w', **meta) as dst:
                dst.write(mask, 1)

print(f"Masks saved to: {output_dir}")

100%|██████████| 262/262 [00:02<00:00, 100.13it/s]

Masks saved to: VertexAI_masks_png





In [12]:
import json

bucket = "pabbi_bucket"
image_prefix = f"gs://{bucket}/images"
# Mask prefix should still point to where your PNG masks are
mask_prefix = f"gs://{bucket}/masks_png"
class_names = ["wheat", "persian_clover", "sugarcane", "barren", "builtup"]
images_per_class = 50

output_path = "vertex_segmentation_final_corrected-V7.jsonl"

# This defines the mapping from your class names to the integer pixel values in your masks.
# This structure IS required by Vertex AI.
annotation_specs = {}
class_map = {
    "wheat": 1,
    "persian_clover": 2,
    "sugarcane": 3,
    "barren": 4,
    "builtup": 5
}
for class_name, pixel_value in class_map.items():
    annotation_specs[str(pixel_value)] = {"displayName": class_name}


with open(output_path, "w") as f:
    for class_name in class_names:
        for i in range(images_per_class):
            image_filename = f"{class_name}_{i}.tif"
            mask_filename = f"{class_name}_{i}_mask.png" # Still expecting PNG masks

            # --- CORRECTED JSONL STRUCTURE ---
            entry = {
                # Use 'imageGcsUri' as in your original attempt
                "imageGcsUri": f"{image_prefix}/{image_filename}",

                # Use 'maskGcsUri' for the segmentation mask URI
                # This seems to be the field for semantic segmentation masks
                "maskGcsUri": f"{mask_prefix}/{mask_filename}",

                # Provide the mapping of pixel values (as strings) to display names
                "annotationSpecs": annotation_specs,

                "dataItemResourceLabels": {
                    "aiplatform.googleapis.com/ml_use": "training"
                }
            }
            json.dump(entry, f)
            f.write("\n")

print(f"✅ Final corrected .jsonl written to: {output_path}")

✅ Final corrected .jsonl written to: vertex_segmentation_final_corrected-V7.jsonl
