In [6]:
import pandas as pd
import matplotlib.pyplot as plt
from soilgrids import SoilGrids
from tqdm import tqdm

# Initialize the SoilGrids object
soil_grids = SoilGrids()

# Load the CSV file with sample points
df = pd.read_csv("data/spain_positive_ready_with_el_aspect.csv")

# Define correction factors for each property
conversion_factors = {
    'bdod': 100,  # Bulk density (cg/cm³ to kg/dm³)
    'cec': 10,    # Cation Exchange Capacity (mmol(c)/kg to cmol(c)/kg)
    'cfvo': 10,   # Volumetric fraction of coarse fragments (cm3/dm³ to cm³/100cm³)
    'clay': 10,   # Proportion of clay particles (g/kg to g/100g)
    'nitrogen': 100,  # Total nitrogen (cg/kg to g/kg)
    'phh2o': 10,    # Soil pH (pHx10 to pH)
    'sand': 10,     # Proportion of sand particles (g/kg to g/100g)
    'silt': 10,     # Proportion of silt particles (g/kg to g/100g)
    'soc': 10,      # Soil organic carbon content (dg/kg to g/kg)
    'ocd': 10,      # Organic carbon density (hg/m³ to kg/m³)
    'ocs': 10,      # Organic carbon stocks (t/ha to kg/m²)
}

# Initialize new columns for soil properties (e.g., pH, organic carbon, etc.)
df['bdod'] = float('nan')  # Bulk density
df['cec'] = float('nan')   # Cation Exchange Capacity
df['cfvo'] = float('nan')  # Coarse fragments fraction
df['clay'] = float('nan')  # Clay content
df['nitrogen'] = float('nan')  # Nitrogen content
df['phh2o'] = float('nan')  # Soil pH
df['sand'] = float('nan')  # Sand content
df['silt'] = float('nan')  # Silt content
df['soc'] = float('nan')   # Soil organic carbon
df['ocd'] = float('nan')   # Organic carbon density
df['ocs'] = float('nan')   # Organic carbon stocks

# Function to get soil data for a specific point
def get_soil_data(lon, lat, properties=None, grid_size=(100, 100)):
    if properties is None:
        properties = list(conversion_factors.keys())  # Use all properties by default
    
    # Define the bounding box around the point (with a small buffer size)
    buffer_size = 0.001  # Adjust as necessary for your dataset
    west = lon - buffer_size
    east = lon + buffer_size
    south = lat - buffer_size
    north = lat + buffer_size
    
    # Fetch soil data for each property
    soil_data = {}
    for property in tqdm(properties):
        try:
            # Special case for Organic Carbon Stocks (ocs) as it requires a different coverage ID
            if property == 'ocs':
                coverage_id = f'{property}_0-30cm_mean'  # Choose the mean coverage ID for ocs
            else:
                coverage_id = f'{property}_0-5cm_mean'  # Default coverage ID for other properties
            
            # Fetch the data for each property with width and height parameters
            data = soil_grids.get_coverage_data(service_id=property, coverage_id=coverage_id, 
                                                 west=west, south=south, east=east, north=north, 
                                                 crs='urn:ogc:def:crs:EPSG::4326', output="temp/soil.tif", 
                                                 width=grid_size[0], height=grid_size[1])  # Add width and height
            
            # Apply the conversion factor and store the mean value
            mean_value = data.mean()  # Get the mean value of the grid cell
            soil_data[property] = float(mean_value / conversion_factors[property])
            print(soil_data)
        except Exception as e:
            print(f"Error fetching data for {property} at ({lon}, {lat}): {e}")
            soil_data[property] = None
    
    return soil_data

# Iterate over each row and get the soil data
for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc="Fetching soil data"):
    lon, lat = row['longitude'], row['latitude']
    
    # Get the soil properties for this location
    soil_properties = get_soil_data(lon, lat)
    
    # Update the dataframe with the soil properties
    for property in soil_properties:
        df.at[idx, property] = soil_properties.get(property)
        print(df.at[idx, property])

# Save the updated DataFrame to a new CSV file
df.to_csv("data/spain_boletus_final.csv", index=False)

# Optional: Visualize a specific soil property (e.g., Soil pH)
df[['longitude', 'latitude', 'phh2o']].plot.scatter(x='longitude', y='latitude', c='phh2o', cmap='viridis')
plt.title('Soil pH (0-5 cm depth) across the locations')
plt.colorbar(label='Soil pH')
plt.show()


Fetching soil data:   0%|          | 0/222 [00:00<?, ?it/s]



{'bdod': 1.087398}




{'bdod': 1.087398, 'cec': 22.78699}
