In [1]:
import sys
sys.path.append('../mlai_research/')
import log
import utils
import cv2
import rasterio
import rasterio.plot
from rasterio.io import DatasetReader
from rasterio.mask import mask
from rasterio.enums import Resampling
from rasterio.warp import reproject, Resampling
from shapely.geometry import box, mapping, Polygon, Point
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from PIL import Image
from typing import Tuple, Union, List

In [2]:
logger = log.get_logger(__name__)

In [3]:
conf = utils.load_config("base")

24-Dec-23 00:01:37 - INFO - Starting 'load_config'.
24-Dec-23 00:01:37 - INFO - Finished 'load_config' in 0.0233 secs.


In [4]:
gdf = gpd.read_file(f"{conf.data.path_base_points}{conf.data.fn_shp_combined}")

In [5]:
gdf

Unnamed: 0,Species,pid,geometry
0,Other,0,POINT (340729.445 7371235.519)
1,Other,1,POINT (340722.163 7371248.913)
2,Other,2,POINT (340713.499 7371248.818)
3,Xanthium,3,POINT (340717.911 7371249.236)
4,Other,4,POINT (340701.246 7371250.714)
...,...,...,...
301,Xanthium,301,POINT (369123.342 7352421.287)
302,Xanthium,302,POINT (369264.777 7352360.554)
303,Xanthium,303,POINT (370468.706 7351748.584)
304,Xanthium,304,POINT (370456.853 7351746.022)


In [4]:
def plot_raster(gdf, rasterimg):
    fig, ax = plt.subplots(figsize = (20,20))
    rasterio.plot.show(rasterimg, ax=ax)
    gdf.plot(column='Species',
                   categorical=True,
                   legend=True,
                   # markersize=45,
                   cmap="Set2",
                   ax=ax,
            aspect=1)
    ax.set_title("Letaba Points Subset")
    for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf.photoID):
        ax.annotate(label, xy=(x, y), xytext=(3, 3), textcoords="offset points")
    plt.show()

In [5]:
def load_raster(fn="../data/01_base/rgba_7cm/rgba_aoi_1.tif"):
    with rasterio.open(fn) as ds:
        bounds = ds.bounds
        transform = ds.transform
        array = ds.read(1)
        crs = ds.crs
    return bounds, transform, array, crs


def gen_random_points(num_points, bounds):
    minx, miny, maxx, maxy = bounds
    xs = np.random.uniform(minx, maxx, num_points)
    ys = np.random.uniform(miny, maxy, num_points)
    points = [Point(x, y) for x, y in zip(xs, ys)]
    return points

def filter_valid_points(points, transform, array):
    # Filter out points that fall within the no-data regions of the raster
    valid_points = []
    for point in points:
        # Convert the point's coordinates to row and column indices
        row, col = rasterio.transform.rowcol(transform, point.x, point.y)
        
        # Check if the point falls within the bounds of the array
        if 0 <= row < array.shape[0] and 0 <= col < array.shape[1]:
            # Check if the corresponding pixel value in the raster is not a no-data value
            if array[row, col] != 0:
                valid_points.append(point)
    return valid_points

In [6]:
# def poisson_disc_sampling(bounds, radius, seed=None):
#     """
#     Poisson disc sampling using Bridson's algorithm.

#     Parameters:
#     bounds (tuple): The bounds of the area to sample as (minx, miny, maxx, maxy).
#     radius (float): The minimum distance between samples.
#     seed (int, optional): The seed for the random number generator.

#     Returns:
#     list: The samples as a list of shapely.geometry.Point.
#     """
#     # Set the seed for the random number generator
#     if seed is not None:
#         np.random.seed(seed)

#     # Define the dimensions of the area to sample
#     minx, miny, maxx, maxy = bounds
#     width, height = maxx - minx, maxy - miny

#     # Define the cell size and create an empty grid
#     cell_size = radius / np.sqrt(2)
#     grid_width, grid_height = int(np.ceil(width / cell_size)), int(np.ceil(height / cell_size))
#     grid = -np.ones((grid_height, grid_width), dtype=int)

#     # Create a list to hold the samples and the active list
#     samples = []
#     active_list = []

#     # Add the first sample
#     first_sample = np.random.uniform([minx, miny], [maxx, maxy])
#     samples.append(Point(*first_sample))
#     grid[int((first_sample[1] - miny) / cell_size), int((first_sample[0] - minx) / cell_size)] = len(samples) - 1
#     active_list.append(first_sample)

#     # While the active list is not empty, add new samples
#     while active_list:
#         # Choose a random sample from the active list
#         sample = active_list.pop(np.random.randint(len(active_list)))

#         # Generate new samples in the annulus around the sample
#         for _ in range(30):  # 30 is a commonly used value
#             angle = 2 * np.pi * np.random.random()
#             distance = np.random.uniform(radius, 2 * radius)
#             new_sample = sample + distance * np.array([np.cos(angle), np.sin(angle)])

#             # Check if the new sample is inside the area and far enough from existing samples
#             if (minx <= new_sample[0] < maxx and miny <= new_sample[1] < maxy and
#                 grid[int((new_sample[1] - miny) / cell_size) - 2:int((new_sample[1] - miny) / cell_size) + 3,
#                      int((new_sample[0] - minx) / cell_size) - 2:int((new_sample[0] - minx) / cell_size) + 3] == -1).all():
#                 # Add the new sample to the samples list, grid, and active list
#                 samples.append(Point(*new_sample))
#                 grid[int((new_sample[1] - miny) / cell_size), int((new_sample[0] - minx) / cell_size)] = len(samples) - 1
#                 active_list.append(new_sample)

#     return samples

In [7]:
bounds, transform, array, crs = load_raster()

In [10]:
points = poisson_disc_sampling(bounds, 0.01, seed=42)
len(points)

: 

In [None]:
bounds, transform, array, crs = load_raster()
# points = gen_random_points(2100, bounds)

In [9]:
valid_points = filter_valid_points(points, transform, array)

In [10]:
len(valid_points)

0

In [None]:
# Create a GeoDataFrame from the valid points
gdf = gpd.GeoDataFrame(geometry=valid_points, crs=ds.crs)

In [None]:
gdf['photoID'] = np.arange(1000+0, 1000+len(gdf))
gdf['Species'] = 'Unknown'

In [None]:
raster = rasterio.open("../data/01_base/rgba_7cm/rgba_aoi_1.tif")

In [None]:
plot_raster(gdf, raster)

In [None]:
threshold = 0

In [None]:
# Create a mask where data values are greater than the threshold
mask = data > threshold

In [None]:
total_points = 100

In [None]:
# Calculate the number of points along each axis
points_per_axis = int(np.sqrt(total_points))

In [None]:
# Generate grid points based on the number of points along each axis
rows = np.linspace(0, raster.height - 1, points_per_axis)
cols = np.linspace(0, raster.width - 1, points_per_axis)

In [None]:
rows

In [None]:
cols

In [None]:
# Create meshgrid from rows and cols
cols, rows = np.meshgrid(cols, rows)
cols = cols.flatten()
rows = rows.flatten()

In [None]:
# Convert grid points to geographical coordinates
xs, ys = raster.transform * (cols, rows)


In [None]:
# Filter points by mask
points = [Point(x, y) for x, y in zip(xs, ys) if mask[int(y), int(x)]]
    

In [None]:
raster.shape

In [None]:
raster.crs

In [None]:
# Read the raster data and create a mask
data = raster.read(1)
mask = data != raster.nodata  # Assuming nodata represents empty areas

In [None]:
data

In [None]:
data.shape

In [None]:
mask.shape

In [None]:
mask[mask == False].shape

In [None]:
density = np.sum(mask) / mask.size

In [None]:
density

In [None]:
# Generate grid points based on the desired density
rows = np.linspace(0, raster.height - 1, int(raster.height / density))
cols = np.linspace(0, raster.width - 1, int(raster.width / density))

In [None]:
# Create meshgrid from rows and cols
cols, rows = np.meshgrid(cols, rows)
cols = cols.flatten()
rows = rows.flatten()

In [None]:
# Convert grid points to geographical coordinates
xs, ys = raster.transform * (cols, rows)

In [None]:
# Create a GeoDataFrame from the points
shapes = [Point(x, y) for x, y in zip(xs, ys)]
all_points = gpd.GeoDataFrame(geometry=shapes, crs=raster.crs.data)
all_points['geometry'] = all_points.buffer(raster.transform.a / 2.0)  # Buffer by half the pixel size
    

In [None]:
# Create a mask for the points
point_mask = [~mask[int(y), int(x)] for x, y in zip(cols, rows)]
    

In [None]:
all_points

In [None]:
point_mask

In [None]:
# Filter points by mask
points = all_points.geometry[point_mask]

In [None]:
points

In [None]:
# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(geometry=points, crs=raster.crs.data)

In [None]:
# Update GeoDataFrame
gdf['photoID'] = np.arange(1000+0, 1000+len(gdf))
gdf['Species'] = 'Unknown'

In [None]:
gdf.head()

In [None]:
plot_raster(gdf, raster)

In [None]:
# # Use rasterio.features.geometry_mask to create a mask for the points
# shapes = [Point(x, y) for x, y in zip(xs, ys)]
# all_points = gpd.GeoDataFrame(geometry=shapes, crs=raster.crs.data)
# all_points['geometry'] = all_points.buffer(raster.transform.a / 2.0)  # Buffer by half the pixel size
# geom_mask = rasterio.features.geometry_mask(all_points.geometry, transform=raster.transform, invert=True, out_shape=(raster.height, raster.width))


In [None]:
# Filter points by mask
points = all_points.geometry[~geom_mask.flatten()]

In [None]:
# Read the raster data and create a mask
data = raster.read(1)
mask = data != raster.nodata  # Assuming nodata represents empty areas

In [None]:
# Generate grid points
rows, cols = np.indices((raster.height, raster.width))

In [None]:
# Apply the mask to rows and cols
rows = rows[mask]
cols = cols[mask]

In [None]:
# Convert grid points to geographical coordinates
xs, ys = raster.transform * (cols, rows)

In [None]:
# Filter points by mask
# points = [Point(x, y) for x, y, m in zip(xs, ys, mask.flatten()) if m]
# Create points using NumPy arrays directly
# points = np.array([Point(x, y) for x, y in zip(xs, ys)])

In [None]:
num_points = 100

In [None]:
# If there are more points than needed, sample them
if len(points) > num_points:
    indices = np.random.choice(points, size=num_points, replace=False)
    points = points[indices]

In [None]:
# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(geometry=points, crs=raster.crs.data)

In [None]:
# Update GeoDataFrame
gdf['photoID'] = np.arange(1000+0, 1000+len(gdf))
gdf['Species'] = 'Unknown'

In [None]:
plot_raster(gdf, raster)

In [None]:
# fig, ax = plt.subplots(figsize = (20,20))
# rasterio.plot.show(raster, ax=ax)
# plt.show()

In [None]:
# Get the transform parameters
transform = raster.transform

In [None]:
# Get the bounds of the raster data
bounds = raster.bounds

# Number of points to generate
n_points = 1000

# Generate random points within the bounds
x_points = np.random.uniform(bounds.left, bounds.right, n_points)
y_points = np.random.uniform(bounds.bottom, bounds.top, n_points)

In [None]:
# Convert geographical coordinates to pixel coordinates
pixel_points = [raster.index(x, y) for x, y in zip(x_points, y_points)]

# Filter points that are inside the raster bounds
inside_points = [(x, y) for x, y in pixel_points if 0 <= x < raster.width and 0 <= y < raster.height]

# Convert pixel coordinates to geographical coordinates
geo_points = [raster.xy(x, y) for x, y in inside_points]

In [None]:
# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(geometry=[Point(x, y) for x, y in geo_points], crs=raster.crs)
gdf['photoID'] = np.arange(1000+0, 1000+len(gdf))
gdf['Species'] = 'Unknown'

In [None]:
gdf.head()

In [None]:
def clip_gdf(gdf, bounds):
    clipped_gdf = gdf[
        (gdf.geometry.x > bounds.left) & 
        (gdf.geometry.x < bounds.right) &
        (gdf.geometry.y > bounds.bottom) &
        (gdf.geometry.y < bounds.top)
    ]
    logger.info(f"Original gdf: {gdf.shape}")
    logger.info(f"Clipped gdf: {clipped_gdf.shape}")
    logger.info(f"Species split: {clipped_gdf.Species.value_counts()}")
    return clipped_gdf

In [None]:
clipped_gdf = clip_gdf(gdf, bounds)

In [None]:
plot_raster(clipped_gdf, raster)