# Necessary Steps

In [None]:
#!pip install -U ultralytics

In [1]:
import json
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import shutil
from random import choice
import re
import csv
import zipfile
import pandas as pd
import glob
import math
import geopandas as gpd
from shapely.geometry import shape, Polygon, box
from PIL import Image
import requests
import time
from ultralytics import YOLO
from skimage.filters.rank import entropy
from skimage.morphology import disk
from skimage.util import img_as_ubyte
from shapely.ops import unary_union

In [2]:
cd /home/core-stack/Documents/Ponds

/home/core-stack/Documents/Ponds


In [3]:
pwd = os.getcwd()
pwd

'/home/core-stack/Documents/Ponds'

In [4]:
# Load GoeJSON of the block on which you want to compute the ponds
gdf = gpd.read_file("Shapefiles/Masalia_mws.geojson")

# Zoom level
zoom_level = 17

# Folder paths where you want to save image tiles
image_dir = "Data/Zoom17/Masalia"

# Scale of image tile
scale = 1           # scale of 1 = 256*256 dimensional image

# load trained model
model_path = "Models/Ponds_best.pt"

# CSV file name where masks of detected object will be saved
csv_file = "CSV_Output/Masalia_Ponds.csv"

# Entropy threshold needed to calculate entropy (only in wet ponds case)
entropy_threshold = 2.5


# Download Data for Inference

Get Bounding boxes automatically from GeoJSON instead of manually drawing on GEE

In [None]:
# Get the bounding box coordinates
minx, miny, maxx, maxy = gdf.total_bounds

# Define bounding box points
topLeft = [minx, maxy]
topRight = [maxx, maxy]
bottomRight = [maxx, miny]
bottomLeft = [minx, miny]


In [None]:
base_url = "https://mt1.google.com/vt/lyrs=s"

if not os.path.exists(image_dir):
    os.makedirs(image_dir)
    print(f"Created the folder: {image_dir}")
else:
    print(f"The folder already exists: {image_dir}")

In [None]:
from tile_downloader import download_map_tiles
# Call the function to download map tiles
download_map_tiles(base_url, image_dir, zoom_level, scale, topLeft, topRight, bottomLeft, bottomRight)

# SAVE PREDICTIONS IN CSV

In [None]:
conf_thresholds = {
    'Dry': 0.75,
    'Wet': 0.6
}

class_names = ['Dry', 'Wet']


class_abbreviations = {'Dry': 'D', 'Wet': 'W'}

Load the model

In [None]:
my_new_model = YOLO(model_path)

Predictions

In [None]:
image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]

In [None]:
len(image_files)

In [None]:
from image_processor import process_image, extract_xtile_ytile, tile_corners_to_latlon, calculate_tile_center

In [None]:
max_vertices = 0
image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]

# To store processed data from process_image
image_data = []

# First Pass: Find max vertices in polygons and store image data
for image_path in image_files:
    _, _, polygons, pred_classes, conf_scores, entropies = process_image(
        image_path, conf_thresholds, my_new_model, class_names)  # Add the missing arguments
    if polygons:
        max_vertices = max(max_vertices, max(len(polygon) for polygon in polygons))
    image_data.append({
        'image_path': image_path,
        'polygons': polygons,
        'pred_classes': pred_classes,
        'entropies': entropies
    })

# Create dynamic column headers for X/Y coordinates
coordinate_headers = []
for i in range(1, max_vertices + 1):
    coordinate_headers.append(f"X_{i}")
    coordinate_headers.append(f"Y_{i}")

# Full header row
header = ["Image Path", "Predicted Class", "Center Latitude", "Center Longitude",
          "Top Left Latitude", "Top Left Longitude", "Top Right Latitude", "Top Right Longitude",
          "Bottom Left Latitude", "Bottom Left Longitude", "Bottom Right Latitude", "Bottom Right Longitude"] + coordinate_headers

# Process and save to CSV
start_time = time.time()

with open(csv_file, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(header)  # Write dynamic header row

    for data in image_data:
        image_path = data['image_path']
        polygons = data['polygons']
        pred_classes = data['pred_classes']
        entropies = data['entropies']

        if image_path is None:
            continue

        xtile, ytile = extract_xtile_ytile(image_path)
        top_left, top_right, bottom_left, bottom_right = tile_corners_to_latlon(xtile, ytile, zoom)
        latitude, longitude = calculate_tile_center(top_left, top_right, bottom_left, bottom_right)

        for pred_class, polygon, entropy_value in zip(pred_classes, polygons, entropies):
            if entropy_value is None or entropy_value > entropy_threshold:
                print(f"Skipped {pred_class} due to high entropy: {entropy_value if entropy_value is not None else 'None'}")
                continue

            row = [image_path, pred_class, latitude, longitude, top_left[0], top_left[1],
                   top_right[0], top_right[1], bottom_left[0], bottom_left[1], bottom_right[0], bottom_right[1]]

            # Flatten polygon coordinates while ensuring it matches max_vertices
            flat_polygon = [coord for point in polygon for coord in point]
            flat_polygon += [None] * (2 * max_vertices - len(flat_polygon))  # Fill missing values with None

            row.extend(flat_polygon)
            csvwriter.writerow(row)

end_time = time.time()

print(f"CSV file '{csv_file}' saved successfully.")
print(f"Time taken: {end_time - start_time:.2f} seconds.")

# Add Buffer to combine nearby predicted objects

In [5]:
EARTH_CIRCUMFERENCE_DEGREES = 360  # degrees

In [6]:
# Load the CSV file
df = pd.read_csv(csv_file)
df.rename(columns={'Predicted Class': 'Class'}, inplace=True)

In [7]:
# Extract the base name dynamically (e.g., "TRY" from "TRY.csv")
csv_basename = os.path.splitext(os.path.basename(csv_file))[0]

In [9]:
from geo_utils import pixel_to_geo

# Initialize an empty list for GeoJSON features
geojson_features = []

# Iterate through each row
for _, row in df.iterrows():
    lat_top_left = row['Top Left Latitude']
    lon_top_left = row['Top Left Longitude']
    lat_bottom_right = row['Bottom Right Latitude']
    lon_bottom_right = row['Bottom Right Longitude']

    tile_width, tile_height = 256, 256

    object_coords = []

    # Iterate over all possible coordinate columns dynamically
    i = 1
    while True:
        x_col = f'X_{i}'
        y_col = f'Y_{i}'

        if x_col not in row or y_col not in row:
            break  # Stop if columns don't exist

        x = row[x_col]
        y = row[y_col]

        if pd.notna(x) and pd.notna(y):
            lon, lat = pixel_to_geo(x, y, lat_top_left, lon_top_left, lat_bottom_right, lon_bottom_right, tile_width, tile_height)
            if np.isfinite(lon) and np.isfinite(lat):
                object_coords.append((lon, lat))
        else:
            break  # Stop when NaN values appear

        i += 1  # Move to the next set of coordinates

    # Ensure the polygon has at least 3 points before adding
    if len(object_coords) >= 3:
        object_coords.append(object_coords[0])  # Close the polygon
        polygon_geometry = Polygon(object_coords)
        feature = {
            "type": "Feature",
            "geometry": polygon_geometry,
            "properties": {
                "Class": row['Class']
            }
        }
        geojson_features.append(feature)

# Convert to GeoDataFrame
gdf_final = gpd.GeoDataFrame(
    [feature['properties'] for feature in geojson_features],
    geometry=[feature['geometry'] for feature in geojson_features],
    crs="EPSG:4326"
)

# Merge overlapping geometries


# Merge overlapping geometries
buffer_distance = 0.0005
gdf_final['Buffered'] = gdf_final.geometry.buffer(buffer_distance)
combined_polygons = unary_union(gdf_final['Buffered'])
combined_polygons = combined_polygons.buffer(-buffer_distance)

# Convert back to GeoDataFrame
gdf_combined = gpd.GeoDataFrame(geometry=[combined_polygons], crs="EPSG:4326")

# Ensure the output folder exists
output_folder = 'Shapefile_Output'
os.makedirs(output_folder, exist_ok=True)

# Save the final shapefile in the 'Shapefile_Output' folder
shapefile_path = os.path.join(output_folder, f"{csv_basename}_COMBINED_GEOMETRY.shp")
gdf_combined.to_file(shapefile_path)

# Create the ZIP file in the 'Shapefile_Output' folder
zip_filename = os.path.join(output_folder, f"{csv_basename}_COMBINED_GEOMETRY.zip")
shapefile_files = glob.glob(os.path.join(output_folder, f"{csv_basename}_COMBINED_GEOMETRY.*"))

# Exclude the .csv file from the list of files to be zipped
shapefile_files = [file for file in shapefile_files if not file.endswith('.csv')]

# Add the shapefile files to the zip archive
with zipfile.ZipFile(zip_filename, "w") as zipf:
    for file in shapefile_files:
        zipf.write(file, os.path.basename(file))

print(f"Created ZIP file: {zip_filename}")

# Delete the original shapefile files after zipping, but keep the .csv file
for file in shapefile_files:
    os.remove(file)
    print(f"Deleted: {file}")

print("All shapefile components have been deleted after zipping, except for the .csv file.")


  gdf_final['Buffered'] = gdf_final.geometry.buffer(buffer_distance)


Created ZIP file: Shapefile_Output/Masalia_Ponds_COMBINED_GEOMETRY.zip
Deleted: Shapefile_Output/Masalia_Ponds_COMBINED_GEOMETRY.dbf
Deleted: Shapefile_Output/Masalia_Ponds_COMBINED_GEOMETRY.shx
Deleted: Shapefile_Output/Masalia_Ponds_COMBINED_GEOMETRY.shp
Deleted: Shapefile_Output/Masalia_Ponds_COMBINED_GEOMETRY.cpg
Deleted: Shapefile_Output/Masalia_Ponds_COMBINED_GEOMETRY.prj
All shapefile components have been deleted after zipping, except for the .csv file.
