In [1]:
from shroomradar.src.climate import append_climate_data_csv
import os

# --- Configuration --
input_path = os.path.join("..", "data", "negative_samples_within_land_10k_with_coords_topography.csv")
output_path = os.path.join("..",  "data", "negative_samples_within_land_10k_with_coords_topography_climate.csv")
climate_data_folder = "..//climate_data"


print("Starting climate data appending process...")
append_climate_data_csv(
    input_csv=input_path,
    output_csv=output_path,
    climate_base=climate_data_folder
)
print("Process finished.")


Starting climate data appending process...
Loaded 3326 rows from ..\data\negative_samples_within_land_10k_with_coords_topography.csv


Processing observations: 100%|██████████| 3326/3326 [36:57<00:00,  1.50it/s] 


✅ Data has been updated and saved to ..\data\negative_samples_within_land_10k_with_coords_topography_climate.csv
Process finished.


In [None]:
import pandas as pd
import rasterio
import numpy as np

# Load the CSV file into a DataFrame
df = pd.read_csv('data/negative_samples_el_aspect.csv')

# Define the path to the TIFF file
tif_path = 'corine.tif'

# Function to get the value from TIFF at given coordinates
def get_value_from_tif(row):
    lon, lat = row['x'], row['y']
    
    try:
        with rasterio.open(tif_path) as src:
            # Check if coordinates are within bounds
            if not (src.bounds.left <= lon <= src.bounds.right and 
                    src.bounds.bottom <= lat <= src.bounds.top):
                return np.nan  # Return NaN for coordinates outside bounds
            
            # Get the row and column indices corresponding to the given coordinates
            row_idx, col_idx = src.index(lon, lat)
            
            # Check if indices are within valid range
            if not (0 <= row_idx < src.height and 0 <= col_idx < src.width):
                return np.nan
            
            # Read the pixel value at the row and column indices
            val = src.read(1, window=((row_idx, row_idx+1), (col_idx, col_idx+1)))
            
            # Check if we got a valid result
            if val.size == 0:
                return np.nan
                
            return val[0, 0]
    except Exception as e:
        print(f"Error processing coordinates ({lon}, {lat}): {e}")
        return np.nan

# Add a new column 'LC' to the DataFrame containing values from the TIFF file
print("Processing coordinates and extracting land cover values...")
df['LC'] = df.apply(get_value_from_tif, axis=1)

# Print some statistics about the results
print(f"Total points: {len(df)}")
print(f"Points with valid LC values: {df['LC'].notna().sum()}")
print(f"Points outside bounds (NaN): {df['LC'].isna().sum()}")

# Save the DataFrame with the added column to a new CSV file
df.to_csv('data/negative_samples_el_aspect_corine.csv', index=False)


In [None]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import numpy as np

# Load the GeoJSON file into a GeoDataFrame
spain = gpd.read_file("docker//data//base_maps//siena_05_with_elevation_aspect.geojson")

# Ensure the GeoDataFrame and raster have the same CRS
spain = spain.to_crs(epsg=4623)

# Define the path to the TIFF file
tif_path = 'corine.tif'

# Function to get the mode value from TIFF within a polygon
def get_mode_value(geometry):
    with rasterio.open(tif_path) as src:
        # Mask the raster using the polygon geometry
        out_image, out_transform = mask(src, [geometry], crop=True)
        # Flatten the masked array to count occurrences of each unique value
        unique_values, counts = np.unique(out_image.flatten(), return_counts=True)
        # Get the index of the value with the highest count
        mode_index = np.argmax(counts)
        # Get the mode value
        mode_value = unique_values[mode_index]
        return mode_value

# Apply the function to each polygon and create a new column 'mode_value'
spain['mode_value'] = spain['geometry'].apply(get_mode_value)

# Save the GeoDataFrame to a new GeoJSON file
spain.to_file("docker//data//base_maps//siena_05_with_elevation_aspect_LC.geojson", driver='GeoJSON')


In [None]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import numpy as np
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm

# Load the GeoJSON file into a GeoDataFrame
spain = gpd.read_file('data/spain_grid_3km.geojson')

# Ensure the GeoDataFrame and raster have the same CRS
spain = spain.to_crs(epsg=4623)

# Define the path to the TIFF file
tif_path = 'temp/corine.tif'

# Function to get the mode value from TIFF within a polygon
def get_mode_value(geometry):
    with rasterio.open(tif_path) as src:
        # Mask the raster using the polygon geometry
        out_image, out_transform = mask(src, [geometry], crop=True)
        # Flatten the masked array to count occurrences of each unique value
        unique_values, counts = np.unique(out_image.flatten(), return_counts=True)
        # Get the index of the value with the highest count
        mode_index = np.argmax(counts)
        # Get the mode value
        mode_value = unique_values[mode_index]
        return mode_value

# Define a function to be executed in parallel
def process_polygon(polygon):
    return get_mode_value(polygon)

# Create a ProcessPoolExecutor with the number of processes equal to the number of CPU cores
with ProcessPoolExecutor() as executor:
    # Apply the function to each polygon and create a new column 'mode_value' in parallel
    results = list(tqdm(executor.map(process_polygon, spain['geometry']), total=len(spain)))

# Assign the results to the GeoDataFrame
spain['mode_value'] = results

# Save the GeoDataFrame to a new GeoJSON file
spain.to_file('data/spain_with_mode_values.geojson', driver='GeoJSON')
