In [None]:
from gee_scripts.extract_values import find_pixel_values_for_coordinates, get_image_index, test_find_pixel_values_for_coordinates
from pathlib import Path

# 1. Retrieve all the images

In [None]:
# We will extract the SM over all the chips without creating mosaics beforehand (not needed)

islands_path = [
    "ALL",
]

base_path = Path("bosf")

all_images = [list((base_path/island).rglob("*.tif")) for island in islands_path]

print([len(group) for group in all_images])

all_images = [image for island_images in all_images for image in island_images]
print(len(all_images))

# 2. Create an spatial index to all the images

In [None]:
%%time

# To improve efficiency, we will create an index that will store
# all the bounds of the images, this index will be used to check either
# the image has points to be consulted or not.

index = get_image_index(all_images)

# 3. EXtract the SM values from all images to all the points

In [None]:
import geopandas as gpd

In [None]:
# read geopandas file

all_points = gpd.read_file("bosf/dipwells/bosf_dipwells.shp")
all_points.head(5)

In [None]:
# Create an unique id merging different coordinates

all_points["id"] = all_points["SiteCd"].astype(str) + "_" + all_points["PointRef"].astype(str) + "_" + all_points["PointIndex"].astype(str)

In [None]:
# Extract the coordinates, the function expects:
# ((x,y), id)

all_coords = [((row.geometry.x, row.geometry.y),row.id) for _, row in all_points.iterrows()]

In [None]:
len(all_coords), len(index)

In [None]:
%%time

# Start the calculation, this process will take some time
results = find_pixel_values_for_coordinates(index, all_coords)

In [None]:
# Convert output into a pandas dataframe
import pandas as pd
results_df = pd.DataFrame(results)
results_df

In [None]:
# Store the results as csv

results_df.to_csv("data/all_bosf_extracted_data.csv", index=False)

# 3. Test the value extraction with a sample of three points

The following lines ensure that the output from the find_pixel_values_for_coordinates function is <br>
consistent, we manually checked the values of some random points and images, and run the process. <br>
The returned result from the function has to be the same as the expected that was manually checked <br>
in local GIS software.

In [None]:
from shapely.geometry import Point, Polygon
import geopandas as gpd
import pandas as pd
import random

In [None]:
# read geopandas file
test_sample_points = gpd.read_file("test_data/test_sample.shp")
test_sample_points

In [None]:
# get the coordinates of each points
sample_coords = [(p.x, p.y) for p in test_sample_points["geometry"].tolist()]

In [None]:
test_results = find_pixel_values_for_coordinates(index, sample_coords)

In [None]:
# set the seed value
random.seed(42)

# select 10 values 
random_sample = random.sample(test_results, 10)
random_sample = [(point["image"], point["value"]) for point in random_sample]

# save
random_sample_df = pd.DataFrame(random_sample, columns = ["image_path", "sm"])
random_sample_df.iloc[:5].to_csv("test_data/extracted_values.csv", index = None)

In [None]:
# To just show the image name
expected_values = random_sample_df.iloc[:5].copy()
expected_values["image_path"]  = expected_values["image_path"].apply(lambda x: Path(x).stem)
expected_values

In [None]:
test_find_pixel_values_for_coordinates()