# 1) Feature Value Extraction from Raster Data

    This code automates the process of extracting feature values from raster files (.tif) based on coordinates defined in shapefiles (.shp), then cleans and saves the extracted data into CSV files for further analysis.

In [None]:
import os
import time
import geopandas as gpd
import rasterio
import numpy as np
import gc

start = time.process_time()

shp_folder = '/path/to/grid_points/'
tif_folder = '/path/to/mosaic_clip/'
output_csv_folder = '/path/to/save/feature_value_extraction_csv/'

shp_files = []
for root, dirs, files in os.walk(shp_folder):
    for file in files:
        if file.endswith(".shp"):
            shp_files.append(os.path.join(root, file))

for shp_file in shp_files:
    shapefile_path = shp_file
    print("Processing:", shapefile_path)

    s = time.process_time()
    gc.collect()

    try:
        grid_df = gpd.read_file(shapefile_path)
        
        tif_files = [file for file in os.listdir(tif_folder) if file.endswith(".tif")]
        tif_files_sorted = sorted(tif_files)
        
        def get_raster_values(tif_file, coords):
            with rasterio.open(os.path.join(tif_folder, tif_file)) as src:
                values = [x[0] for x in src.sample(coords)]
            return values
        
        for tif_file in tif_files_sorted:
            column_name = os.path.splitext(tif_file)[0]
            coord_list = [(x, y) for x, y in zip(grid_df["geometry"].x, grid_df["geometry"].y)]
            grid_df[column_name] = get_raster_values(tif_file, coord_list)

        grid_df_cleaned = grid_df.dropna()

        print("Cleaned DataFrame:")
        print(grid_df_cleaned)

        output_csv = os.path.join(output_csv_folder, f"{os.path.splitext(os.path.basename(shp_file))[0]}_extracted_trening_samples_2022-12.csv")
        grid_df_cleaned.to_csv(output_csv, sep=",", index=False, header=True)
        gc.collect()
        
        del grid_df

    except Exception as e:
        print("Error processing:", shapefile_path)
        print("Error:", e)

    print("Processing time for spectral indexes in [min]", (time.process_time() - s) / 60)

print("Features extracted to CSV files")
print("Processing time in [min]", (time.process_time() - start) / 60)