# 1) Calculating Land Cover Classification Statistics for Specific Regions

    This code calculates and records the statistics of land cover classifications within specified regions based on raster data and shapefiles. It reads a raster file representing land cover classification, applies masks based on vector boundaries from shapefiles, computes the area and percentage share of each land cover class within those boundaries, and then saves these statistics to a text file for each region.    

In [None]:
import os
import rasterio
import geopandas as gpd
import numpy as np
from rasterio.features import geometry_mask
from rasterio.transform import from_origin


output_folder_path = r'/path/to/save/statistics/'

folder_path = r"/path/to/merged_classification_files/"
file_name = "classification.tif"

shp_files = [r'/path/to/area_boundaries/boundaries.shp', 
             '/path/to/area_boundaries/region1.shp',
             '/path/to/area_boundaries/region2.shp']

def calculate_statistics(raster, transform, shp_file_path, region_name):
    boundaries = gpd.read_file(shp_file_path)

    raster_mask = geometry_mask(boundaries.geometry, transform=transform, out_shape=raster.shape)
    raster_masked = np.where(raster_mask, np.nan, raster)

    unique_values = np.unique(raster_masked[~np.isnan(raster_masked)])
    print(f'Unique classes for {region_name}:', unique_values)

    stats = {}

    total_area_pixels = np.sum(~np.isnan(raster_masked))
    for i in unique_values:
        class_pixels = np.sum(raster_masked == i)
        class_area_m2 = class_pixels * transform.a * -transform.e  
        class_area_km2 = class_area_m2 / 1e6 
        class_percentage = (class_pixels / total_area_pixels) * 100  

        stats[f'Class {int(i)}'] = {
            'Total area [m²]': class_area_m2,
            'Total area [km²]': class_area_km2,
            'Percentage share': class_percentage,
        }

    return stats

with rasterio.open(os.path.join(folder_path, file_name)) as src:
    raster = src.read(1) 
    transform = src.transform

    for shp_file_path in shp_files:
        region_name = os.path.splitext(os.path.basename(shp_file_path))[0]
        
        stats = calculate_statistics(raster, transform, shp_file_path, region_name)

        output_file_path = os.path.join(output_folder_path, f'statistics_{file_name}.txt') 
        with open(output_file_path, 'w', encoding='utf-8') as f:
            f.write(f'Statistics for region: {region_name}\n')
            for class_name, class_stats in stats.items():
                f.write(f'\nStatistics for {class_name}:\n')
                for stat_name, value in class_stats.items():
                    f.write(f'{stat_name}: {value:.2f}\n')