In [16]:
import pandas as pd
import rasterio
from pyproj import Transformer

# Load the CSV file into a DataFrame
df = pd.read_csv('boletus_spain_negative.csv')

# Define the path to the TIFF file
tif_path = 'corine.tif'

# Define the transformer from EPSG:4326 to EPSG:3035
transformer = Transformer.from_crs("epsg:4326", "epsg:4326")

# Function to transform coordinates from EPSG:4326 to EPSG:3035
def transform_coordinates(row):
    lon, lat = row['longitude'], row['latitude']
    lon_3035, lat_3035 = transformer.transform(lon, lat)
    return lon_3035, lat_3035

# Apply transformation to coordinates
df['longitude_3035'], df['latitude_3035'] = zip(*df.apply(transform_coordinates, axis=1))

# Function to get the value from TIFF at given coordinates
def get_value_from_tif(row):
    lon_3035, lat_3035 = row['longitude_3035'], row['latitude_3035']
    with rasterio.open(tif_path) as src:
        # Get the row and column indices corresponding to the given coordinates
        row_idx, col_idx = src.index(lon_3035, lat_3035)
        # Read the pixel value at the row and column indices
        val = src.read(1, window=((row_idx, row_idx+1), (col_idx, col_idx+1)))
        # Return the value
        return val[0, 0]

# Add a new column 'LC' to the DataFrame containing values from the TIFF file
df['LC'] = df.apply(get_value_from_tif, axis=1)



# Save the DataFrame with the added column to a new CSV file
df.to_csv('negative_positive_ready.csv', index=False)


In [1]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import numpy as np

# Load the GeoJSON file into a GeoDataFrame
spain = gpd.read_file('spain_grid_3km.geojson')

# Ensure the GeoDataFrame and raster have the same CRS
spain = spain.to_crs(epsg=4623)

# Define the path to the TIFF file
tif_path = 'corine.tif'

# Function to get the mode value from TIFF within a polygon
def get_mode_value(geometry):
    with rasterio.open(tif_path) as src:
        # Mask the raster using the polygon geometry
        out_image, out_transform = mask(src, [geometry], crop=True)
        # Flatten the masked array to count occurrences of each unique value
        unique_values, counts = np.unique(out_image.flatten(), return_counts=True)
        # Get the index of the value with the highest count
        mode_index = np.argmax(counts)
        # Get the mode value
        mode_value = unique_values[mode_index]
        return mode_value

# Apply the function to each polygon and create a new column 'mode_value'
spain['mode_value'] = spain['geometry'].apply(get_mode_value)

# Save the GeoDataFrame to a new GeoJSON file
spain.to_file('spain_with_mode_values.geojson', driver='GeoJSON')


In [5]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import numpy as np
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm

# Load the GeoJSON file into a GeoDataFrame
spain = gpd.read_file('spain_grid_3km.geojson')

# Ensure the GeoDataFrame and raster have the same CRS
spain = spain.to_crs(epsg=4623)

# Define the path to the TIFF file
tif_path = 'corine.tif'

# Function to get the mode value from TIFF within a polygon
def get_mode_value(geometry):
    with rasterio.open(tif_path) as src:
        # Mask the raster using the polygon geometry
        out_image, out_transform = mask(src, [geometry], crop=True)
        # Flatten the masked array to count occurrences of each unique value
        unique_values, counts = np.unique(out_image.flatten(), return_counts=True)
        # Get the index of the value with the highest count
        mode_index = np.argmax(counts)
        # Get the mode value
        mode_value = unique_values[mode_index]
        return mode_value

# Define a function to be executed in parallel
def process_polygon(polygon):
    return get_mode_value(polygon)

# Create a ProcessPoolExecutor with the number of processes equal to the number of CPU cores
with ProcessPoolExecutor() as executor:
    # Apply the function to each polygon and create a new column 'mode_value' in parallel
    results = list(tqdm(executor.map(process_polygon, spain['geometry']), total=len(spain)))

# Assign the results to the GeoDataFrame
spain['mode_value'] = results

# Save the GeoDataFrame to a new GeoJSON file
spain.to_file('spain_with_mode_values.geojson', driver='GeoJSON')


100%|██████████| 57840/57840 [01:10<00:00, 822.14it/s] 
