In [1]:
from gee_scripts.extract_values import find_pixel_values_for_coordinates, get_image_index, test_find_pixel_values_for_coordinates
from pathlib import Path

# 1. Retrieve all the images

In [4]:
# We will extract the SM over all the chips without creating mosaics beforehand (not needed)

islands_path = [
    "ALL",
]

base_path = Path("bosf")

all_images = [list((base_path/island).rglob("*.tif")) for island in islands_path]

print([len(group) for group in all_images])

all_images = [image for island_images in all_images for image in island_images]
print(len(all_images))

[450]
450


# 2. Create an spatial index to all the images

In [5]:
%%time

# To improve efficiency, we will create an index that will store
# all the bounds of the images, this index will be used to check either
# the image has points to be consulted or not.

index = get_image_index(all_images)

CPU times: user 288 ms, sys: 63.6 ms, total: 352 ms
Wall time: 563 ms


# 3. EXtract the SM values from all images to all the points

In [7]:
import geopandas as gpd

In [11]:
# read geopandas file

all_points = gpd.read_file("bosf/dipwells/bosf_dipwells.shp")
all_points.head(5)

Unnamed: 0,SiteCd,PointRef,PointIndex,Instrument,BlockCd,UTMZone,Easting,Northing,Longitude,Latitude,Count of o,geometry
0,KF-01,D,1,KF-01-D-01,A,50M,224339.6,9744788.9,114.52149,-2.30679,24,POINT (114.52149 -2.30679)
1,KF-01,D,2,KF-01-D-02,A,50M,224329.5,9744842.0,114.5214,-2.30631,24,POINT (114.52140 -2.30631)
2,KF-01,D,3,KF-01-D-03,A,50M,224345.0,9744889.6,114.52154,-2.30588,24,POINT (114.52154 -2.30588)
3,KF-01,D,4,KF-01-D-04,A,50M,224327.1,9744934.9,114.52138,-2.30547,24,POINT (114.52138 -2.30547)
4,KF-01,D,5,KF-01-D-05,A,50M,224349.2,9745041.2,114.52158,-2.30451,24,POINT (114.52158 -2.30451)


In [15]:
# Create an unique id merging different coordinates

all_points["id"] = all_points["SiteCd"].astype(str) + "_" + all_points["PointRef"].astype(str) + "_" + all_points["PointIndex"].astype(str)

In [16]:
# Extract the coordinates, the function expects:
# ((x,y), id)

all_coords = [((row.geometry.x, row.geometry.y),row.id) for _, row in all_points.iterrows()]

In [17]:
len(all_coords), len(index)

(377, 450)

In [18]:
%%time

# Start the calculation, this process will take some time
results = find_pixel_values_for_coordinates(index, all_coords)

CPU times: user 19.7 s, sys: 429 ms, total: 20.1 s
Wall time: 21.4 s


In [20]:
# Convert output into a pandas dataframe
import pandas as pd
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,image,smm_value,coordinate,date,point_id
0,bosf/ALL/close_SMCmap_2017_10_13_DESC_sepal-us...,12.0,"(114.52149, -2.30679)",2017-10-13,KF-01_D_1
1,bosf/ALL/close_SMCmap_2017_10_13_DESC_sepal-us...,12.0,"(114.5214, -2.30631)",2017-10-13,KF-01_D_2
2,bosf/ALL/close_SMCmap_2017_10_13_DESC_sepal-us...,15.0,"(114.52154, -2.30588)",2017-10-13,KF-01_D_3
3,bosf/ALL/close_SMCmap_2017_10_13_DESC_sepal-us...,15.0,"(114.52138, -2.30547)",2017-10-13,KF-01_D_4
4,bosf/ALL/close_SMCmap_2017_10_13_DESC_sepal-us...,12.0,"(114.52158, -2.30451)",2017-10-13,KF-01_D_5
...,...,...,...,...,...
169645,bosf/ALL/close_SMCmap_2021_03_26_DESC_sepal-us...,21.0,"(114.46646, -2.2283)",2021-03-26,KF-25_D_4
169646,bosf/ALL/close_SMCmap_2021_03_26_DESC_sepal-us...,21.0,"(114.43207, -2.2357)",2021-03-26,KF-26_D_1
169647,bosf/ALL/close_SMCmap_2021_03_26_DESC_sepal-us...,20.0,"(114.43225, -2.23342)",2021-03-26,KF-26_D_2
169648,bosf/ALL/close_SMCmap_2021_03_26_DESC_sepal-us...,23.0,"(114.43218, -2.23127)",2021-03-26,KF-26_D_3


In [21]:
# Store the results as csv

results_df.to_csv("data/all_bosf_extracted_data.csv", index=False)

# 3. Test the value extraction with a sample of three points

The following lines ensure that the output from the find_pixel_values_for_coordinates function is <br>
consistent, we manually checked the values of some random points and images, and run the process. <br>
The returned result from the function has to be the same as the expected that was manually checked <br>
in local GIS software.

In [None]:
from shapely.geometry import Point, Polygon
import geopandas as gpd
import pandas as pd
import random

In [None]:
# read geopandas file
test_sample_points = gpd.read_file("test_data/test_sample.shp")
test_sample_points

In [None]:
# get the coordinates of each points
sample_coords = [(p.x, p.y) for p in test_sample_points["geometry"].tolist()]

In [None]:
test_results = find_pixel_values_for_coordinates(index, sample_coords)

In [None]:
# set the seed value
random.seed(42)

# select 10 values 
random_sample = random.sample(test_results, 10)
random_sample = [(point["image"], point["value"]) for point in random_sample]

# save
random_sample_df = pd.DataFrame(random_sample, columns = ["image_path", "sm"])
random_sample_df.iloc[:5].to_csv("test_data/extracted_values.csv", index = None)

In [None]:
# To just show the image name
expected_values = random_sample_df.iloc[:5].copy()
expected_values["image_path"]  = expected_values["image_path"].apply(lambda x: Path(x).stem)
expected_values

In [None]:
test_find_pixel_values_for_coordinates()