### SOC Stock Model Work

In [3]:
## Prepare Training Data to estimate SOC stock throughout Angola

# load libraries
import geopandas as gpd
import pandas as pd
from rasterstats import point_query
import os

# Load your harmonized SOC dataset (with X_coord, Y_coord)
soc_df = pd.read_csv("/Users/inesschwartz/Desktop/cleandata/harmonized_soc_0_30cm.csv")  # columns must include: profile, X_coord, Y_coord, SOC

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    soc_df,
    geometry=gpd.points_from_xy(soc_df["X_coord"], soc_df["Y_coord"]),
    crs="EPSG:32733"
)

In [None]:
## Define raster files 

dem_files = {
    "dem_hillshade": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/hillshade.tif",
    "dem_aspect": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/aspect.tif",
    "dem_aspect_classes": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/aspect_classes2.tif",
    "dem_slope": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/slope.tif",
    "dem_elevation": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/dem_utm.tif"
}

clim_files = {
    "clim_annual_temp": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/bioclimaticdata/annual_mean_temp.tif",
    "clim_temp_seasonality": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/bioclimaticdata/temp_seasonality.tif",
    "clim_annual_precip": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/bioclimaticdata/annual_precip2.tif",
    "clim_precip_seasonality": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/bioclimaticdata/precip_seasonality2.tif"
}

extra_files = {
    "soil_type": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/angola_soil_data_raster.tif",
    "lithology": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/lithology_rasterized.tif",
    "landform": "/Volumes/One_Touch/angola_thesis_gis/GIS_Angola/data_processed/landcover_raster.tif"
}

# Combine all rasters
all_rasters = {**dem_files, **clim_files, **extra_files}

In [None]:
# Extract raster values at points

print("Starting raster extraction at points...")

extracted_data = {}

for col_name, raster_path in all_rasters.items():
    if not os.path.exists(raster_path):
        print(f"Warning: {raster_path} does not exist, skipping.")
        continue
    
    print(f"Extracting {col_name} from {raster_path} ...")
    values = point_query(gdf, raster_path)

    # Flatten if single-band raster (dict with band numbers)
    if isinstance(values[0], dict):
        values = [v[1] for v in values]

    extracted_data[col_name] = values

print("Raster extraction completed.")