In [66]:
import geopandas as gpd
import pandas as pd
import numpy as np
import rioxarray
import rasterio
import matplotlib.pyplot as plt

Works

In [None]:
# Load the temperature data (GeoPackage)
temp = gpd.read_file('../data/temp_HD/sensor_data_20240107_31days.gpkg')

# Remove rows where 'temperature' is NaN
temp = temp[temp['temperature'].notna()]

# Ensure 'dateobserved' is in datetime format
temp['dateobserved'] = pd.to_datetime(temp['dateobserved'], format="%Y-%m-%d %H:%M:%S", errors='coerce')

Works until we start working with the raster data

In [None]:
# --- Load and clean temp data
def load_temperature_data(filepath_temp):
    temp = gpd.read_file(filepath_temp)
    temp = temp[temp['temperature'].notna()]
    temp['dateobserved'] = pd.to_datetime(temp['dateobserved'], format="%Y-%m-%d %H:%M:%S", errors='coerce')
    return temp


# --- Calculate July daily mean temp per station
def calculate_july_daily_mean_temperature(temp):
    # Filter for month
    july_data = temp[temp['dateobserved'].dt.month == 7].copy()

    # Extract date
    july_data['date'] = july_data['dateobserved'].dt.date

    # Group by station and date, then calculate daily mean temperature
    july_daily_mean_temp = (
        july_data
        .groupby(['stationname', 'date'], as_index=False)
        .agg(daily_mean_temperature=('temperature', 'mean'))
    )

    # Merge back with station geometry
    station_geoms = july_data[['stationname', 'geometry']].drop_duplicates('stationname')
    merged = pd.merge(july_daily_mean_temp, station_geoms, on='stationname', how='left')

    # Return as GeoDataFrame
    return gpd.GeoDataFrame(merged, geometry='geometry', crs=temp.crs)



def calculate_july_monthly_mean_temperature(temp):
    """
    Calculates mean temperature for each station in July.
    Parameters:
    - temp_df: DataFrame with at least 'temperature', 'stationname', and 'dateobserved' columns
    Returns:
    - DataFrame with mean temperature per station for July
    """
    # Ensure dateobserved is datetime
    temp['dateobserved'] = pd.to_datetime(temp['dateobserved'], format="%Y-%m-%d %H:%M:%S", errors='coerce')

    # Filter for July
    july_data = temp[temp['dateobserved'].dt.month == 7]

    # Group by station and calculate mean temperature
    july_monthly_mean_temp = (
        july_data
        .groupby('stationname', as_index=False)
        .agg(monthly_mean_temperature=('temperature', 'mean'))
    )

    return july_monthly_mean_temp


# --- Reproject temp data
def reproject_vector(temp, target_crs):
    return temp.to_crs(target_crs)


# --- 4. Load and reproject raster
def load_and_reproject_raster(raster_path, target_crs):
    raster = rioxarray.open_rasterio(raster_path, masked=True).squeeze()
    raster = raster.rio.reproject(target_crs)
    return raster


# --- 5. Mask canopy height raster
def mask_canopy_raster(raster):
    raster = raster.where(~raster.isin([101, 102, 103]))
    return raster


# --- 6. Extract raster values to points
def extract_raster_values(temp_mean, raster, column_name: str):
    new_gdf = temp_mean.copy()  # do not change original gdf
    coords = [(point.x, point.y) for point in new_gdf.geometry]
    
    new_gdf[column_name] = [x[0] for x in raster.sample(coords)]

    return new_gdf

This test run was successful for the first couple of functions

In [None]:
# Load and clean temp data
#temp = load_temperature_data('../data/temp_HD/sensor_data_20240107_31days.gpkg')

# Preprocess temp data
temp_mean = calculate_july_daily_mean_temperature(temp)

# Specify CRS
target_crs = "EPSG:32632"

# Reproject temp data
temp_mean = reproject_vector(temp_mean, target_crs)


Currently working on this to find a solution why the reading and reprojection process doesn't work

In [73]:
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
import numpy as np

def load_and_reproject_raster(raster_path, target_crs):
    """
    Loads and reprojects a raster to match the CRS of a reference GeoDataFrame.
    
    Parameters:
        raster_path (str): Path to the input raster file.
        reference_gdf (GeoDataFrame): GeoDataFrame whose CRS will be used as the target.
    
    Returns:
        reprojected_array (np.ndarray): Numpy array of the reprojected raster.
        kwargs (dict): Metadata for the reprojected raster.
    """
    
    with rasterio.open(raster_path) as src:
        transform, width, height = calculate_default_transform(
            src.crs, target_crs, src.width, src.height, *src.bounds
        )

        kwargs = src.meta.copy()
        kwargs.update({
            'crs': target_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        reprojected_array = np.empty((src.count, height, width), dtype=src.dtypes[0])

        for i in range(1, src.count + 1):
            reproject(
                source=rasterio.band(src, i),
                destination=reprojected_array[i - 1],
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=target_crs,
                resampling=Resampling.bilinear
            )

    return reprojected_array, kwargs

dem = load_and_reproject_raster("../data/DEM/hd_elevation_4326.tif", target_crs)

In [None]:
# Step 4: Load rasters and match CRS
dem = load_and_reproject_raster("../data/DEM/hd_elevation_4326.tif", temp_mean.crs)
tch = load_and_reproject_raster("../data/Canopy_height/clip_Forest_height_2019_NAFR.tif", temp_mean.crs)
ghs_bh = load_and_reproject_raster("../data/building_height/clip_GHS_BUILT_H_AGBH_E2018_GLOBE_R2023A_54009_100_V1_0_R4_C19.tif", temp_mean.crs)

# Step 5: Clean the TCH raster
tch = mask_canopy_raster(tch)

# Step 6: Extract raster values to points
temp_mean = extract_raster_values(temp_mean, dem, 'elevation')
temp_mean = extract_raster_values(temp_mean, ghs_bh, 'building_height')
temp_mean = extract_raster_values(temp_mean, tch, 'tree_canopy_height')

# Step 7: Done â€“ Check final result
print(temp_mean.head())