In [None]:
"""this script is to primarily look over the potential forest area results in Xu et al 2023 PNAS https://doi.org/10.1073/pnas.230498812.  
author:shiyu deng
email:shiyu.deng.23@ucl.ac.uk
"""
import scipy.io
from osgeo import gdal, osr
import numpy as np
import h5py

mat_file = '.../Datashare_PNAS/processed/best_suit_2100.mat' #figure 2 source data in Xu et al. 2023 PNAS  # potential forest distribution for best_sustainable
mat_file2 = '.../Datashare_PNAS/processed/best_bio_2100.mat'#potential forest distribution for best_biomass

mat_data = scipy.io.loadmat(mat_file)
mat_data2 = scipy.io.loadmat(mat_file2)
# Extract the data arrays from the loaded .mat files
data_array = mat_data['data3']
data_array2 = mat_data2['data4']

# Define your geotransform and projection
pixel_size = 30 / 3600
geotransform = (70, pixel_size, 0, 55, 0, -pixel_size)

srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)  # Replace with your EPSG code

# Create a new TIFF file
driver = gdal.GetDriverByName('GTiff')
out_ds = driver.Create('Datashare_PNAS/processed/best_suit_2100.tif', data_array.shape[1], data_array.shape[0], 1, gdal.GDT_Float32)
# Set geospatial information
out_ds.SetGeoTransform(geotransform)
out_ds.SetProjection(srs.ExportToWkt())

# Write the data array to the TIFF file
out_band = out_ds.GetRasterBand(1)
out_band.WriteArray(data_array)

# Save and close the dataset
out_band.FlushCache()
out_ds = None


out_ds2 = driver.Create('Datashare_PNAS/processed/best_bio_2100.tif', data_array2.shape[1], data_array2.shape[0], 1, gdal.GDT_Float32)
# Set geospatial information
out_ds2.SetGeoTransform(geotransform)
out_ds2.SetProjection(srs.ExportToWkt())

# Write the data array to the TIFF file
out_band2 = out_ds2.GetRasterBand(1)
out_band2.WriteArray(data_array2)

# Save and close the dataset
out_band2.FlushCache()
out_ds2 = None



# read the existing forest distribution data
mat_file_path = '/.../Datashare_PNAS/new_veg_2013_2017.mat'

# 读取.mat文件
with h5py.File(mat_file_path, 'r') as file:
    # 假设您要转换的数据存储在名为 'data' 的变量中
    data_array = file['forest_type'][()].T
        
# Define your geotransform and projection
pixel_size = 30 / 3600
geotransform = (70, pixel_size, 0, 55, 0, -pixel_size)

srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)  # Replace with your EPSG code

# Create a new TIFF file
driver = gdal.GetDriverByName('GTiff')
out_ds = driver.Create('Datashare_PNAS/processed/new_veg_2013_2017.tif',  data_array.shape[1],data_array.shape[0], 1, gdal.GDT_Float32)
# Set geospatial information
out_ds.SetGeoTransform(geotransform)
out_ds.SetProjection(srs.ExportToWkt())

# Write the data array to the TIFF file
out_band = out_ds.GetRasterBand(1)
out_band.WriteArray(data_array)

# Save and close the dataset
out_band.FlushCache()
out_ds = None

dict_keys(['__header__', '__version__', '__globals__', 'data3'])
dict_keys(['__header__', '__version__', '__globals__', 'data4'])


In [None]:
"""This script calculates the potential forest area and existing forest in each GCAM basin in China. 
The afforestation area in each GCAM basin is calculated by subtracting the existing forest area from the potential forest distribution data.
author:shiyu deng
email:shiyu.deng.23@ucl.ac.uk
"""

import geopandas as gpd
import rasterio
from rasterio.mask import mask
from rasterstats import zonal_stats
import pandas as pd
import numpy as np
import json
from rasterio.warp import calculate_default_transform, reproject, Resampling
import scipy.io
from osgeo import gdal, osr
import h5py
import matplotlib.pyplot as plt

# read the Shapefile of China's GCAM basins
shapefile = gpd.read_file(path+"/Export_Output.shp") 
shapefile.plot(linewidth=0.15, edgecolor='black')
columns = shapefile.columns
glu_data = shapefile['glu_nm']
print(glu_data)

# identify the Albers equal-area projection for China
albers_projection = "+proj=aea +lat_1=25 +lat_2=47 +lat_0=0 +lon_0=105 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"
# transform the shapefile to Albers projection
china_shp_albers = shapefile.to_crs(albers_projection)



# replace 'new_veg_2013_2017.tif' with ‘best_bio_2100.tif’ or ‘best_suit_2100.tif’ to calculate the potential forest area
with rasterio.open('Datashare_PNAS/processed/new_veg_2013_2017.tif') as src:

    src_crs = src.crs
    src_transform = src.transform
    src_width = src.width
    src_height = src.height
    transform, width, height = calculate_default_transform(
        src_crs, albers_projection, src_width, src_height, *src.bounds)

    kwargs = src.meta.copy()
    kwargs.update({
        'crs': albers_projection,
        'transform': transform,
        'width': width,
        'height': height
    })

    # creat a new tiff file for the reprojected data
    with rasterio.open('Datashare_PNAS/processed/new_veg_2013_2017_transformed.tif', 'w', **kwargs) as dst:
        # 重新投影并写入数据
        for i in range(1, src.count + 1):
            reproject(
                source=rasterio.band(src, i),
                destination=rasterio.band(dst, i),
                src_transform=src_transform,
                src_crs=src_crs,
                dst_transform=transform,
                dst_crs=albers_projection,
                resampling=Resampling.nearest)
            

# read the existing forest distribution data
tiff_path = "Datashare_PNAS/processed/new_veg_2013_2017_transformed.tif"

area_data = []  
total_non_zero_area2 = 0  
with rasterio.open(tiff_path) as src:
    # calculate each grid's area
    grid_area = src.res[0] * src.res[1]

    for index, row in china_shp_albers.iterrows():
        geometry = [row['geometry'].__geo_interface__]
        out_image, out_transform = mask(src, geometry, crop=True)
        array = out_image[0]  # Assuming the relevant data is in the first band
        # Calculate the number of softwood forest grid points
        Softwood_count = np.sum((array >= 1) & (array <= 8))
        Softwood_area = Softwood_count * grid_area/1000000000
        # Calculate the number of hardwood forest grid points
        Hardwood_count = np.sum((array >= 9) & (array <= 16))
        Hardwood_area = Hardwood_count * grid_area/1000000000
        glu_name = row['glu_nm']
        area_data.append({'GLU_Name': glu_name, 'Softwood_Forest_Area': Softwood_area,'Hardwood_Forest_Area': Hardwood_area})

df = pd.DataFrame(area_data)

# save the existing forest area results to excel
excel_path = "/new_veg_2013_2017_G7.xlsx"
df.to_excel(excel_path, index=False)


数据已保存至 C:\Users\ucbvsd0\OneDrive - University College London\论文记录\forestation\Datashare_PNAS\processed\new_veg_2013_2017_G7.1.xlsx
