# 1) Feature value extraction - validation points

    This code processes satellite imagery to validate forest cover predictions by comparing raster data from .tif files with validation points in a .shp file, saving the results to CSV files for further analysis.

In [None]:
import os
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.plot import show
from shapely.geometry import box
import time

os.environ['USE_PYGEOS'] = '0'

shp_folder = "/path/to/tiles/validation_points/shp"
output_folder = 'path/to/save/extracted_feature_values/csv'
tif_folder = '/path/to/tif_files/sentinel2'

tif_files = [file for file in os.listdir(tif_folder) if file.endswith('.tif')]

start = time.process_time()

shp_file_name = "validation_points.shp" 
shp_file_path = os.path.join(shp_folder, shp_file_name)
pointData = gpd.read_file(shp_file_path)
pointData = pointData[pointData['geometry'].notnull()]

for tif_file_name in tif_files:
    tif_file_path = os.path.join(tif_folder, tif_file_name)
    base_name_without_extension = os.path.splitext(tif_file_name)[0]

    with rasterio.open(tif_file_path) as tif_src:
        tif_bounds = tif_src.bounds
        tif_box = box(*tif_bounds)

    pointData_in_tif = pointData[pointData.geometry.intersects(tif_box)]

    def get_raster_values(tif_file, coords):
        with rasterio.open(tif_file) as src:
            return [x[0] for x in src.sample(coords)]

    coord_list = [(x, y) for x, y in zip(pointData_in_tif["geometry"].x, pointData_in_tif["geometry"].y)]
    pointData_in_tif[base_name_without_extension] = get_raster_values(tif_file_path, coord_list)

    result_df = pd.DataFrame({
        'geometry': pointData_in_tif['geometry'],
        'target': pointData_in_tif['class'],
        'predicted': pointData_in_tif[base_name_without_extension]
    })

    output_csv_name = f'{base_name_without_extension}_predicted.csv'
    output_csv_path = os.path.join(output_folder, output_csv_name)
    result_df.to_csv(output_csv_path, index=False)

    print(f"Saved result to {output_csv_path}, Number of matched points: {len(result_df)}")

print("\nProcessing time in [s]:", time.process_time() - start)
print("\nAll results saved successfully :)")
