This code takes the reprojected carbon stock layers and calculates zonal statistics across the y2y region based on ecoregion and biome attributes.

In [1]:
# import packages
import geopandas as gpd
import numpy as np
import rioxarray as rxr
import xarray

import pandas as pd
from pandas.api.types import is_numeric_dtype

from geocube.api.core import make_geocube
from geocube.vector import vectorize

import warnings

In [2]:
# define print raster function to help compare reprojections
def print_raster(raster):
    print(
        f"shape: {raster.rio.shape}\n"
        f"resolution: {raster.rio.resolution()}\n"
        f"bounds: {raster.rio.bounds()}\n"
        f"sum: {raster.sum().item()}\n"
        f"CRS: {raster.rio.crs}\n"
        f"NoDataValue: {raster.rio.nodata}\n"
        f"NoDataOriginalValues: {raster.rio.encoded_nodata}\n"
    )

In [3]:
# # load 2020 landcover raster clipped to y2y and reprojected
# lc_ras = rxr.open_rasterio(
#     './land_cover/landcover-2020-classification-y2y_laea.tif')
# lc_ras.name = 'landcover'

# print_raster(lc_ras)

In [4]:
# # convert to vector
# # LC values are integers
# lc_gdf = vectorize(lc_ras.astype('int16'))
# lc_gdf

In [5]:
# # dissolve geometries based on landcover
# lc_gdf = lc_gdf.dissolve(by='landcover')

# # reset index
# lc_gdf = lc_gdf.reset_index()
# lc_gdf

In [6]:
# # write to disk since processing was slow
# lc_gdf.to_file('./land_cover/landcover-2020-classification-y2y_laea_dissolve.shp')

In [None]:
# load lc_gdf from file
lc_gdf = gpd.read_file(
    './land_cover/landcover-2020-classification-y2y_laea_dissolve.shp')
lc_gdf

Unnamed: 0,landcover,geometry
0,1.0,"MULTIPOLYGON (((-717621.367 1114755.976, -7176..."
1,2.0,"MULTIPOLYGON (((-468621.367 715505.976, -46837..."
2,5.0,"MULTIPOLYGON (((-508371.367 729755.976, -50862..."
3,6.0,"MULTIPOLYGON (((-728121.367 1121255.976, -7283..."
4,8.0,"MULTIPOLYGON (((-713621.367 1113755.976, -7141..."
5,10.0,"MULTIPOLYGON (((-687871.367 1080755.976, -6881..."
6,11.0,"MULTIPOLYGON (((-680371.367 1353505.976, -6803..."
7,12.0,"MULTIPOLYGON (((-613121.367 1102005.976, -6131..."
8,14.0,"MULTIPOLYGON (((-728621.367 1121505.976, -7283..."
9,15.0,"MULTIPOLYGON (((135878.633 -41494.024, 135878...."


In [8]:
# define function to extract zonal stats
def extract_stats(dat_fp, dat_name, vector, vect_var, stat):

    # Suppress UserWarning within this function
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning)

        # create new vect variable so don't edit data in place
        vect = vector.copy()

        # check if vect_var column is numeric
        numeric = True
        if not is_numeric_dtype(vect[vect_var]):
            vect['key'] = pd.factorize(vect[vect_var])[0]
            vect.rename(columns={vect_var: 'orig_name',
                        'key': vect_var}, inplace=True)
            numeric = False

        # clip data to vector layer
        dat = rxr.open_rasterio(dat_fp, masked=True
                                ).rio.clip(vect.geometry.values, vect.crs, from_disk=True)
        dat.name = dat_name

        # create output grid
        out_grid = make_geocube(
            vector_data=vect,
            measurements=[vect_var],
            like=dat
        )

        # merge the datacube with the data
        out_grid[dat_name] = (dat.dims, dat.values,
                              dat.attrs, dat.encoding)

        # group data by vector variable
        grouped = out_grid.drop_vars(['spatial_ref']).groupby(vect_var)

        # calculate stats
        if stat == 'sum':
            table = grouped.sum()
        if stat == 'mean':
            table = grouped.mean()

        # reset indicies and drop band
        table = table.to_dataframe()
        table.reset_index(level='band', drop=True, inplace=True)

        # reset keys to original values
        if numeric == False:
            mapping = dict(zip(vect[vect_var], vect['orig_name']))
            table.index = table.index.map(mapping)

        # return table
        return table

In [9]:
# load carbon rasters in format for zonal stats function
# file paths
carbon_fp = ['./carbon_stock_data/output_layers/carbon_sothe_spawn_t_laea.tif',
             './carbon_stock_data/output_layers/soc_0_1m_t_laea.tif',
             './carbon_flux_data/output_layers/emissions_gfw_t_yr_laea.tif',
             './carbon_flux_data/output_layers/removals_gfw_t_yr_laea.tif',
             './carbon_stock_data/output_layers/carbon_sothe_spawn_t_ha_laea.tif',
             './carbon_stock_data/output_layers/soc_0_1m_t_ha_laea.tif',
             './carbon_flux_data/output_layers/emissions_gfw_t_ha_laea.tif',
             './carbon_flux_data/output_layers/removals_gfw_t_ha_laea.tif']

carbon_names = ['carbon_t',
                'soc_t',
                'emissions_t_yr',
                'removals_t_yr',
                'carbon_t_ha',
                'soc_t_ha',
                'emissions_t_ha',
                'removals_t_ha']

stat_names = ['sum',
              'sum',
              'sum',
              'sum',
              'mean',
              'mean',
              'mean',
              'mean']

In [12]:
# extract zonal stats by landcover type
for i in range(len(carbon_fp)):
    if i == 0:
        stats = extract_stats(
            dat_fp=carbon_fp[i], dat_name=carbon_names[i], vector=lc_gdf, vect_var='landcover', stat=stat_names[i])
    else:
        stats = pd.concat([stats,
                           extract_stats(dat_fp=carbon_fp[i], dat_name=carbon_names[i], vector=lc_gdf, vect_var='landcover', stat=stat_names[i])],
                          axis=1)

In [14]:
stats2 = stats

In [15]:
# add total area to stats
lc_gdf['area'] = lc_gdf.geometry.area

# create new row to add to stats
new_row = pd.DataFrame(
    {'landcover': lc_gdf['landcover'], 'area_km2': lc_gdf['area'] / 1e+6})
new_row.set_index('landcover', inplace=True)

# concat to stats
stats = pd.concat([stats, new_row], axis=1)
stats

Unnamed: 0_level_0,carbon_t,soc_t,emissions_t_yr,removals_t_yr,carbon_t_ha,soc_t_ha,emissions_t_ha,removals_t_ha,area_km2
landcover,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.0,4131145000.0,13466380000.0,13443440.0,50881410.0,60.432924,197.230959,85.795299,19.067295,683967.0625
2.0,4575359.0,127079600.0,171797.2,24194.4,25.384296,706.193741,156.064182,5.283974,1821.4375
5.0,49329070.0,226070800.0,40091.7,347162.6,61.37222,281.785008,57.743366,10.297272,8052.0
6.0,262099900.0,1015614000.0,1059184.0,2863461.0,54.558336,211.585159,89.48311,14.392955,48121.9375
8.0,641478000.0,4897889000.0,9031610.0,5608891.0,26.05068,199.082876,75.639468,15.047124,246553.6875
10.0,260902000.0,2338343000.0,8167859.0,1848595.0,21.579889,193.614149,73.590063,12.613257,121039.5625
11.0,2123335.0,40007550.0,42039.3,3016.3,17.994243,338.742017,145.541957,6.6386,1187.75
12.0,82009.7,2551342.0,2634.0,136.3,8.782932,273.236144,156.345161,7.046479,93.625
14.0,86076490.0,1296484000.0,1178364.0,603667.3,20.158542,304.251543,121.291066,11.597789,42780.625
15.0,17845020.0,295040200.0,12500.3,36770.3,7.953352,131.718797,77.577066,14.225171,22548.25


In [16]:
# set landcover mapping key
landcover_mapping = {
    1: "Temperate or sub-polar needleleaf forest",
    2: "Sub-polar taiga needleleaf forest",
    3: "Tropical or sub-tropical broadleaf evergreen forest",
    4: "Tropical or sub-tropical broadleaf deciduous forest",
    5: "Temperate or sub-polar broadleaf deciduous forest",
    6: "Mixed forest",
    7: "Tropical or sub-tropical shrubland",
    8: "Temperate or sub-polar shrubland",
    9: "Tropical or sub-tropical grassland",
    10: "Temperate or sub-polar grassland",
    11: "Sub-polar or polar shrubland-lichen-moss",
    12: "Sub-polar or polar grassland-lichen-moss",
    13: "Sub-polar or polar barren-lichen-moss",
    14: "Wetland",
    15: "Cropland",
    16: "Barren land",
    17: "Urban and built-up",
    18: "Water",
    19: "Snow and ice"
}

# change index name to id
stats.index.name = 'id'

# create new column for lc types
stats['landcover'] = stats.index

# map landcover names
stats['landcover'] = stats['landcover'].map(landcover_mapping)
stats

Unnamed: 0_level_0,carbon_t,soc_t,emissions_t_yr,removals_t_yr,carbon_t_ha,soc_t_ha,emissions_t_ha,removals_t_ha,area_km2,landcover
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,4131145000.0,13466380000.0,13443440.0,50881410.0,60.432924,197.230959,85.795299,19.067295,683967.0625,Temperate or sub-polar needleleaf forest
2.0,4575359.0,127079600.0,171797.2,24194.4,25.384296,706.193741,156.064182,5.283974,1821.4375,Sub-polar taiga needleleaf forest
5.0,49329070.0,226070800.0,40091.7,347162.6,61.37222,281.785008,57.743366,10.297272,8052.0,Temperate or sub-polar broadleaf deciduous forest
6.0,262099900.0,1015614000.0,1059184.0,2863461.0,54.558336,211.585159,89.48311,14.392955,48121.9375,Mixed forest
8.0,641478000.0,4897889000.0,9031610.0,5608891.0,26.05068,199.082876,75.639468,15.047124,246553.6875,Temperate or sub-polar shrubland
10.0,260902000.0,2338343000.0,8167859.0,1848595.0,21.579889,193.614149,73.590063,12.613257,121039.5625,Temperate or sub-polar grassland
11.0,2123335.0,40007550.0,42039.3,3016.3,17.994243,338.742017,145.541957,6.6386,1187.75,Sub-polar or polar shrubland-lichen-moss
12.0,82009.7,2551342.0,2634.0,136.3,8.782932,273.236144,156.345161,7.046479,93.625,Sub-polar or polar grassland-lichen-moss
14.0,86076490.0,1296484000.0,1178364.0,603667.3,20.158542,304.251543,121.291066,11.597789,42780.625,Wetland
15.0,17845020.0,295040200.0,12500.3,36770.3,7.953352,131.718797,77.577066,14.225171,22548.25,Cropland


In [17]:
# export to csv
stats.to_excel(
    './outputs/y2y_carbon_landcover.xlsx', index=True)
