# **Landscape Metrics Calculation**

## **Import Dependencies**

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import rasterio as rio
from rasterio.merge import merge
import rioxarray
import os
from glob import glob
from tqdm.auto import tqdm
from landscape_window import calculate_shdi_for_grids
import pylandstats as pls
import zipfile

import warnings
warnings.filterwarnings('ignore')

plt.rcParams['font.family'] = 'DeJavu Serif'
plt.rcParams['font.serif'] = 'Times New Roman'

data_dir = r'/beegfs/halder/jupyter_playgroundnoconda_p3.12_1/jupyter_work/project/DATA'
out_dir = r'/beegfs/halder/jupyter_playgroundnoconda_p3.12_1/jupyter_work/project/GITHUB/Landscape-Analysis/data'

## **Process LULC Data**

### **Merge LULC Patches**

In [3]:
# Store all the file paths
file_paths = glob(os.path.join(data_dir, 'ESA_WORLDCOVER_10M_2021_V200', 'MAP', '*', '*Map.tif'))
print(f'Number of file paths: {len(file_paths)}')

# Open all the ratser data
src_file_to_mosaic = [rio.open(fp) for fp in file_paths]

# Merge the rasters
mosaic, out_trans = merge(src_file_to_mosaic)

# Close all the input files
for src in src_file_to_mosaic:
    src.close()

Number of file paths: 19


### **Save the Merged LULC**

In [7]:
# Prepare metadata
meta = src.meta.copy()
meta.update({
    "driver": "GTiff",
    "height": mosaic.shape[1],
    "width": mosaic.shape[2],
    "transform": out_trans,
    "count": 1,
    "dtype": "uint8",
    "compress": "lzw",  
    "predictor": 2
})

# Write output raster
output_path = os.path.join(out_dir, "ESA_WorldCover_2021_DE_WGS84.tif")
with rio.open(output_path, "w", **meta) as dst:
    dst.write(mosaic[0], 1)

print(f"Output saved to: {output_path}")

Output saved to: /beegfs/halder/jupyter_playgroundnoconda_p3.12_1/jupyter_work/project/GITHUB/Landscape-Analysis/data/ESA_WorldCover_2021_DE_WGS84.tif


## **Calculate Landscape Metrics**

In [4]:
# Read the exported data
file_path = os.path.join(out_dir, 'raster', 'ESA_WorldCover_2021_DE.tif')

grids_gdf = gpd.read_file(os.path.join(out_dir, 'vector', 'DE_Hexbins_5sqkm.shp'))
grids_gdf = grids_gdf.to_crs('EPSG:25832')
grids_gdf = grids_gdf[['id', 'geometry']]
grids_gdf['id'] = grids_gdf['id'].astype(int)

print(grids_gdf.shape)
grids_gdf.head()

(125, 8)


Unnamed: 0,id,left,top,right,bottom,row_index,col_index,geometry
0,103.0,1247152.0,6184975.0,1347152.0,6084975.0,12.0,6.0,"POLYGON ((11.20336 48.46744, 12.10168 48.46744..."
1,100.0,1247152.0,6481975.0,1347152.0,6381975.0,9.0,6.0,"POLYGON ((11.20336 50.20569, 12.10168 50.20569..."
2,101.0,1247152.0,6382975.0,1347152.0,6282975.0,10.0,6.0,"POLYGON ((11.20336 49.63309, 12.10168 49.63309..."
3,107.0,1346152.0,7273975.0,1446152.0,7173975.0,1.0,7.0,"POLYGON ((12.09269 54.54458, 12.99101 54.54458..."
4,104.0,1247152.0,6085975.0,1347152.0,5985975.0,13.0,6.0,"POLYGON ((11.20336 47.87434, 12.10168 47.87434..."


### **Shannon Diversity Index (SHDI)**

In [5]:
# calculate_shdi_for_grids(
#     grids_gdf=grids_gdf, 
#     input_raster_path=file_path, 
#     output_dir=os.path.join(out_dir, 'output', 'SHDI_Patch_27x27'),
#     window_size=27, 
#     n_jobs=70
# )

## **Process Crop Type Data**

### **Merge and Save the Crop Type Patches**

In [32]:
# for year in tqdm(range(2017, 2022)):
#     # Define the folder where your ZIP files are located
#     zip_folder = os.path.join(data_dir, 'DE_Crop_Types_2017_2021', str(year))
#     output_folder = os.path.join(out_dir, 'output', 'Crop_Type_Map', str(year))
    
#     os.makedirs(output_folder, exist_ok=True)
    
#     for zip_path in glob(os.path.join(zip_folder, "*.zip")):
#         with zipfile.ZipFile(zip_path, 'r') as zip_ref:
#             # Extract the ZIP file
#             zip_ref.extractall(output_folder)
    
#     # Store all the file paths
#     file_paths = glob(os.path.join(output_folder, '*.tif'))
#     print(f'Number of file paths: {len(file_paths)}')
    
#     # Open all the ratser data
#     src_file_to_mosaic = [rio.open(fp) for fp in file_paths]
    
#     # Merge the rasters
#     mosaic, out_trans = merge(src_file_to_mosaic)
    
#     # Close all the input files
#     for src in src_file_to_mosaic:
#         src.close()
    
#     # Prepare metadata
#     meta = src.meta.copy()
#     meta.update({
#         "driver": "GTiff",
#         "height": mosaic.shape[1],
#         "width": mosaic.shape[2],
#         "transform": out_trans,
#         "count": 1,
#         "dtype": "uint16",
#         "compress": "lzw",  
#         "predictor": 2
#     })
    
#     # Write output raster
#     output_path = os.path.join(out_dir, 'output', 'Crop_Type_Map', f"DE_Crop_Type_{year}.tif")
#     with rio.open(output_path, "w", **meta) as dst:
#         dst.write(mosaic[0], 1)
    
#     print(f"Output saved to: {output_path}")

In [31]:
out_path = os.path.join(out_dir, 'output', 'Crop_Type_Map', 'DE_Crop_Type_2017.tif')

# Open the raster
raster = rioxarray.open_rasterio(out_path, masked=True)

# Define the target CRS
target_crs = "EPSG:25832"

# Reproject the raster
raster_reprojected = raster.rio.reproject(target_crs)

In [None]:
raster_reprojected.sum()

In [34]:
out_projected_path = os.path.join(out_dir, 'output', 'Crop_Type_Map', 'DE_Crop_Type_2017_EPSG_25832.tif')
raster_reprojected.rio.to_raster(out_projected_path)

ValueError: cannot convert float NaN to integer

In [33]:
# Read the LULC using PyLandStats package
za = pls.ZonalAnalysis(raster_reprojected, zones=grids_gdf.iloc[:5], zone_index='id', neighborhood_rule=8)
# Extract all the class metrics
class_metrics_df = za.compute_class_metrics_df().reset_index()
# Extract the landscape metrics
# landscape_metrics_df = za.compute_landscape_metrics_df().reset_index()

TypeError: invalid path or file: <xarray.DataArray (band: 1, y: 101122, x: 81261)> Size: 33GB
array([[[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]]], dtype=float32)
Coordinates:
  * x            (x) float64 650kB 2.424e+05 2.424e+05 ... 1.056e+06 1.056e+06
  * y            (y) float64 809kB 6.159e+06 6.159e+06 ... 5.147e+06 5.147e+06
  * band         (band) int64 8B 1
    spatial_ref  int64 8B 0
Attributes:
    AREA_OR_POINT:  Area
    scale_factor:   1.0
    add_offset:     0.0
    _FillValue:     nan

In [1]:
# ----------------------------------------------------------------------------------------------------
# Import Required Libraries
# ----------------------------------------------------------------------------------------------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import os
from glob import glob
import pylandstats as pls
import warnings
import rioxarray

# Suppress warnings to keep output clean
warnings.filterwarnings('ignore')

# ----------------------------------------------------------------------------------------------------
# Define File and Directory Paths
# ----------------------------------------------------------------------------------------------------
# Main directory containing raw raster and vector datasets
MAIN_DATA_DIR = r'/beegfs/halder/jupyter_playgroundnoconda_p3.12_1/jupyter_work/project/DATA'

# GitHub-linked project directory where processed data and results are stored
PROJECT_DATA_DIR = r'/beegfs/halder/jupyter_playgroundnoconda_p3.12_1/jupyter_work/project/GITHUB/Landscape-Analysis/data'

# Temporary directory used for storing intermediate files
TEMP_DIR = r'/beegfs/halder/GITHUB/Landscape-Analysis/data/temp'


# ----------------------------------------------------------------------------------------------------
# Load Hexagonal Grid (5 km² resolution) for Germany
# ----------------------------------------------------------------------------------------------------
EPSG = 25832  # Use ETRS89 / UTM Zone 32N as the projection (suitable for Germany)

# Path to grid shapefile
GRID_PATH = os.path.join(PROJECT_DATA_DIR, 'VECTOR', f'DE_Hexbins_5sqkm_EPSG_{EPSG}.shp')

# Load grid as a GeoDataFrame and retain relevant columns
grids_gdf = gpd.read_file(GRID_PATH)
grids_gdf = grids_gdf[['id', 'geometry']]
grids_gdf['id'] = grids_gdf['id'].astype(int)

print('Successfully read the grids!')


# ----------------------------------------------------------------------------------------------------
# Compute Landscape Metrics Using PyLandStats
# ----------------------------------------------------------------------------------------------------
out_dir = os.path.join(PROJECT_DATA_DIR, 'OUTPUT', 'Landscape_Metrics')
os.makedirs(out_dir, exist_ok=True)

# Path to ESA WorldCover LULC raster (10 m resolution, 2021)
lulc_file_path = os.path.join(MAIN_DATA_DIR, 'ESA_WORLDCOVER_10M_2021_V200', 'ESA_WorldCover_2021_DE.tif')

# Open and reproject the raster to match the grid CRS (ETRS89 / UTM Zone 32N)
raster = rioxarray.open_rasterio(lulc_file_path, masked=True)
target_crs = f"EPSG:{EPSG}"
raster_reprojected = raster.rio.reproject(target_crs)

# Save reprojected raster to temporary directory
reprojected_raster_path = os.path.join(TEMP_DIR, f'ESA_WorldCover_2021_DE_EPSG_{EPSG}.tif')
raster_reprojected.rio.to_raster(reprojected_raster_path)

# Create ZonalAnalysis object for computing landscape metrics per grid zone
za = pls.ZonalAnalysis(
    reprojected_raster_path,  # Use the reprojected raster
    zones=grids_gdf,
    zone_index='id',
    neighborhood_rule=8       # 8-neighbor connectivity for landscape pattern analysis
)

# Compute class-level metrics (per land cover class) for each zone
class_metrics_df = za.compute_class_metrics_df().reset_index()
class_metrics_df.to_csv(os.path.join(out_dir, 'class_metrics.csv'), index=False)

# Compute landscape-level metrics (overall structure) for each zone
landscape_metrics_df = za.compute_landscape_metrics_df().reset_index()
landscape_metrics_df.to_csv(os.path.join(out_dir, 'landscape_metrics.csv'), index=False)

print('Landscape metrics computation complete!')

Successfully read the grids!


KeyboardInterrupt: 

Exception ignored in: 'rasterio._err.log_error'
Traceback (most recent call last):
  File "/usr/lib/python3.12/logging/__init__.py", line 1529, in info
    def info(self, msg, *args, **kwargs):

KeyboardInterrupt: 


RasterioIOError: Read failed. See previous exception for details.