In [None]:
import pandas as pd
import numpy as np
import rasterio
import os

# load file
vpd_path = r'G:\paper01\era5\VPD_mon_Clip'
sm_path = r'G:\paper01\era5\sm_layer2_mon_res\1'

workspace_path = 'G:' 
os.chdir(workspace_path)

In [None]:
# Define the date range
date_range = pd.date_range('1982-01-01', '2018-12-31', freq='M')

# The grid size is 360x720 (VPD) and 361x720 (SM)
# Initialize arrays with a time dimension (i.e., length of date_range)
northern_hemisphere_vpd = np.empty((len(date_range), 180, 720), dtype=np.float32)
southern_hemisphere_vpd = np.empty((len(date_range), 180, 720), dtype=np.float32)
northern_hemisphere_sm = np.empty((len(date_range), 180, 720), dtype=np.float32)
southern_hemisphere_sm = np.empty((len(date_range), 180, 720), dtype=np.float32)

# Fill initial arrays with NaN to avoid errors in subsequent calculations
northern_hemisphere_vpd.fill(np.nan)
southern_hemisphere_vpd.fill(np.nan)
northern_hemisphere_sm.fill(np.nan)
southern_hemisphere_sm.fill(np.nan)

# Define the growing season months for the Northern and Southern Hemispheres
northern_months = [4, 5, 6, 7, 8, 9]  
southern_months = [10, 11, 12, 1, 2, 3]  

# Latitude slices for VPD and SM (note: number of rows differ)
vpd_northern_hemisphere_rows = slice(0, 180)  
vpd_southern_hemisphere_rows = slice(180, 360)  
sm_northern_hemisphere_rows = slice(0, 180)  
sm_southern_hemisphere_rows = slice(180, 360) 


Percentiles and Standard Deviation

In [None]:
def calculate_monthly_90th_percentile(vpd_path, start_year, end_year, num_rows, num_cols):
    # Initialize arrays
    northern_vpd_90th_percentile = np.zeros((12, num_rows, num_cols))
    southern_vpd_90th_percentile = np.zeros((12, num_rows, num_cols))
    nor_vpd_std = np.zeros((12, num_rows, num_cols))
    sou_vpd_std = np.zeros((12, num_rows, num_cols))

    # Organize data: create dictionaries to collect all years of data for each month
    northern_hemisphere_vpd = {month: [] for month in northern_months}
    southern_hemisphere_vpd = {month: [] for month in southern_months}

    # Loop through each year and month to collect data
    for date in date_range:
        month = date.month
        year = date.year
        # Read VPD data
        vpd_file_path = f"{vpd_path}/VPD_X{year}.{month:02d}.tif"
        if os.path.exists(vpd_file_path):
            with rasterio.open(vpd_file_path) as vpd_src:
                vpd_data = vpd_src.read(1)  # Read the full global VPD data
                # vpd_data[vpd_data < 0] = np.nan  # Replace very small values with NaN to avoid overflow
                
                # Handle Northern Hemisphere data for April–September
                if month in northern_months:
                    northern_hemisphere_vpd[month].append(vpd_data[vpd_northern_hemisphere_rows, :])
                
                # Handle Southern Hemisphere data for October–March
                if month in southern_months:
                    southern_hemisphere_vpd[month].append(vpd_data[vpd_southern_hemisphere_rows, :])

    # Calculate the 90th percentile for each month
    for month in range(1, 13):
        if month in northern_months:
            i = month - 1  # Convert month to array index
            if northern_hemisphere_vpd[month]:  # Ensure data exists
                data_stack_north = np.stack(northern_hemisphere_vpd[month])
                northern_vpd_90th_percentile[i] = np.percentile(data_stack_north, 90, axis=0)
                nor_vpd_std[i] = np.nanstd(data_stack_north, axis=0)
        if month in southern_months:
            i = month - 1
            if southern_hemisphere_vpd[month]:
                data_stack_south = np.stack(southern_hemisphere_vpd[month])
                southern_vpd_90th_percentile[i] = np.percentile(data_stack_south, 90, axis=0)
                sou_vpd_std[i] = np.nanstd(data_stack_south, axis=0)
    return northern_vpd_90th_percentile, southern_vpd_90th_percentile, nor_vpd_std, sou_vpd_std

start_year = 1982
end_year = 2018
num_rows = 180  
num_cols = 720  

# Call the function
northern_vpd_90th_percentile, southern_vpd_90th_percentile, nor_vpd_std, sou_vpd_std = calculate_monthly_90th_percentile(
    vpd_path, start_year, end_year, num_rows, num_cols)

In [None]:
def calculate_monthly_10th_percentile(sm_path, start_year, end_year, num_rows, num_cols):
    # Initialize arrays
    northern_sm_10th_percentile = np.zeros((12, num_rows, num_cols))
    southern_sm_10th_percentile = np.zeros((12, num_rows, num_cols))
    nor_sm_std = np.zeros((12, num_rows, num_cols))
    sou_sm_std = np.zeros((12, num_rows, num_cols))

    # Organize data: create dictionaries to collect all years of data for each month
    northern_hemisphere_sm = {month: [] for month in northern_months}
    southern_hemisphere_sm = {month: [] for month in southern_months}

    # Loop through each year and month to collect data
    for date in date_range:
        month = date.month
        year = date.year
        # Read SM data
        sm_file_path = f"{sm_path}/sm_{year}{month:02d}.tif"
        if os.path.exists(sm_file_path):  
            with rasterio.open(sm_file_path) as sm_src:
                sm_data = sm_src.read(1)  # Read the full global SM data
                
                # Process Northern Hemisphere data for April–September
                if month in northern_months:
                    northern_hemisphere_sm[month].append(sm_data[sm_northern_hemisphere_rows, :])
                
                # Process Southern Hemisphere data for October–March
                if month in southern_months:
                    southern_hemisphere_sm[month].append(sm_data[sm_southern_hemisphere_rows, :])

    # Calculate the 10th percentile and standard deviation for each month
    for month in range(1, 13):
        if month in northern_months:
            i = month - 1  # Convert month to array index
            if northern_hemisphere_sm[month]:  # Ensure data is available
                data_stack_north = np.stack(northern_hemisphere_sm[month])
                northern_sm_10th_percentile[i] = np.percentile(data_stack_north, 10, axis=0)
                nor_sm_std[i] = np.nanstd(data_stack_north, axis=0)
        if month in southern_months:
            i = month - 1
            if southern_hemisphere_sm[month]:
                data_stack_south = np.stack(southern_hemisphere_sm[month])
                southern_sm_10th_percentile[i] = np.percentile(data_stack_south, 10, axis=0)
                sou_sm_std[i] = np.nanstd(data_stack_south, axis=0)
    return northern_sm_10th_percentile, southern_sm_10th_percentile, nor_sm_std, sou_sm_std

start_year = 1982
end_year = 2018
num_rows = 180  
num_cols = 720  

# Call the function
northern_sm_10th_percentile, southern_sm_10th_percentile, nor_sm_std, sou_sm_std = calculate_monthly_10th_percentile(
    sm_path, start_year, end_year, num_rows, num_cols)


In [None]:
# Initialize storage arrays with a time dimension (i.e., the length of date_range)
NH_vpd = np.empty((len(date_range), 180, 720), dtype=np.float32)
SH_vpd = np.empty((len(date_range), 180, 720), dtype=np.float32)
NH_sm = np.empty((len(date_range), 180, 720), dtype=np.float32)
SH_sm = np.empty((len(date_range), 180, 720), dtype=np.float32)

# Read VPD and SM data
for idx, date in enumerate(date_range):
    month = date.month
    year = date.year
    # Read VPD data
    vpd_file_path = f"{vpd_path}/VPD_X{year}.{month:02d}.tif"
    if os.path.exists(vpd_file_path):
        with rasterio.open(vpd_file_path) as vpd_src:
            vpd_data = vpd_src.read(1)  # Read the global VPD data
            # vpd_data[vpd_data < 0] = np.nan

            # Process Northern Hemisphere data for April–September
            if month in northern_months:
                NH_vpd[idx, :, :] = vpd_data[vpd_northern_hemisphere_rows, :]

            # Process Southern Hemisphere data for October–March
            if month in southern_months:
                SH_vpd[idx, :, :] = vpd_data[vpd_southern_hemisphere_rows, :]
    
    # Read SM data
    sm_file_path = f"{sm_path}/sm_{year}{month:02d}.tif"
    if os.path.exists(sm_file_path):
        with rasterio.open(sm_file_path) as sm_src:
            sm_data = sm_src.read(1)  # Read the global SM data
            # sm_data[sm_data < 0] = np.nan

            # Process Northern Hemisphere data for April–September
            if month in northern_months:
                NH_sm[idx, :, :] = sm_data[sm_northern_hemisphere_rows, :]

            # Process Southern Hemisphere data for October–March
            if month in southern_months:
                SH_sm[idx, :, :] = sm_data[sm_southern_hemisphere_rows, :]


In [6]:
NH_vpd[NH_vpd < -9999] = np.nan
SH_vpd[SH_vpd < -9999] = np.nan
NH_sm[NH_sm < -9999] = np.nan
SH_sm[SH_sm < -9999] = np.nan

In [7]:
northern_vpd_90th_percentile[northern_vpd_90th_percentile < -9999] = np.nan
southern_vpd_90th_percentile[southern_vpd_90th_percentile < -9999] = np.nan
northern_sm_10th_percentile[northern_sm_10th_percentile < -9999] = np.nan
southern_sm_10th_percentile[southern_sm_10th_percentile < -9999] = np.nan

Northern Hemisphere

In [None]:
def find_drought_events(vpd_data, sm_data, vpd_90th, sm_10th):
    # Initialize the event array
    event = np.empty_like(vpd_data, dtype=bool)
    
    for month_index in range(vpd_data.shape[0]):
        # Map index to month (1–12)
        month = month_index % 12 + 1
        
        if month in [4, 5, 6, 7, 8, 9]:
            # Calculate the corresponding index for percentile arrays
            percentile_index = month - 1

            # Identify drought events: VPD > 90th percentile and SM < 10th percentile
            event[month_index] = (vpd_data[month_index] > vpd_90th[percentile_index]) & (sm_data[month_index] < sm_10th[percentile_index])
            # event[month_index] = (vpd_data[month_index] > vpd_90th[percentile_index]) 
            # event[month_index] = sm_data[month_index] < sm_10th[percentile_index]
        else:
            event[month_index] = np.zeros_like(vpd_data[month_index], dtype=bool)
   
    event_duration_sum = np.zeros(event.shape[1:], dtype=np.int32)
    total_event_count = np.zeros(event.shape[1:], dtype=np.int32)
    
    # Compute for each grid cell
    for i in range(event.shape[1]):  
        for j in range(event.shape[2]):  
            # Get event time series (Boolean values) for the current grid cell
            grid_event = event[:, i, j]
            
            # Track the number of consecutive drought months
            current_duration = 0
            for month in range(grid_event.shape[0]):
                if grid_event[month]:  # If an event occurs
                    current_duration += 1
                else:
                    # If there was an ongoing event, record its duration and reset
                    if current_duration > 0:
                        total_event_count[i, j] += 1
                        event_duration_sum[i, j] += current_duration
                        current_duration = 0

            # If the last month still had an ongoing event, record it
            if current_duration > 0:
                total_event_count[i, j] += 1
                event_duration_sum[i, j] += current_duration

    # Calculate average duration, avoid division by zero
    average_event_duration = np.zeros(event.shape[1:], dtype=np.float32)
    frequency = np.zeros(event.shape[1:], dtype=np.float32)
    mask = total_event_count > 0
    average_event_duration[mask] = event_duration_sum[mask] / total_event_count[mask]
    frequency[mask] = total_event_count[mask] / 37  # Normalize by number of years

    return event, event_duration_sum, total_event_count, average_event_duration, frequency

event00, event_duration_sum00, total_event_count00, average_event_duration00, frequency00 = find_drought_events(
    NH_vpd, NH_sm, northern_vpd_90th_percentile, northern_sm_10th_percentile)


In [None]:
# Calculate intensity

def find_drought_events(vpd_data, sm_data, vpd_90th, sm_10th, vpd_std, sm_std):
    # Initialize the event array
    event = np.empty_like(vpd_data, dtype=bool)
    
    # Match monthly data to percentile indices (only April–September are considered, using month cycle)
    for month_index in range(vpd_data.shape[0]):
        # Map index to month (1–12)
        month = month_index % 12 + 1   
             
        if month in [4, 5, 6, 7, 8, 9]:
            # Convert month to percentile index
            percentile_index = month - 1

            # Drought condition: VPD > 90th percentile and SM < 10th percentile
            event[month_index] = (vpd_data[month_index] > vpd_90th[percentile_index]) & (sm_data[month_index] < sm_10th[percentile_index])
        else:
            event[month_index] = np.zeros_like(vpd_data[month_index], dtype=bool)

    event_duration_sum = np.zeros(event.shape[1:], dtype=np.int32)
    total_event_count = np.zeros(event.shape[1:], dtype=np.int32)
    total_intensity = np.zeros(event.shape[1:], dtype=np.float32)

    # Compute for each grid cell
    for i in range(event.shape[1]):  
        for j in range(event.shape[2]): 
            grid_event = event[:, i, j]
            
            # Track number of consecutive drought months
            current_duration = 0
            event_sm_sum = 0
            event_vpd_sum = 0

            for month in range(grid_event.shape[0]):
                month_in_year = month % 12  
                if grid_event[month]:  
                    current_duration += 1
                    event_sm_sum += ((sm_data[month, i, j] - sm_10th[month_in_year, i, j]) / sm_std[month_in_year, i, j]) ** 2
                    event_vpd_sum += ((vpd_data[month, i, j] - vpd_90th[month_in_year, i, j]) / vpd_std[month_in_year, i, j]) ** 2
                else:
                    # If a drought event was ongoing, record its duration and intensity, then reset
                    if current_duration > 0:
                        total_event_count[i, j] += 1
                        event_duration_sum[i, j] += current_duration
                        
                        intensity_val = np.sqrt((event_sm_sum + event_vpd_sum) / (2 * current_duration))
                        total_intensity[i, j] += intensity_val

                        # Reset cumulative values
                        current_duration = 0
                        event_sm_sum = 0
                        event_vpd_sum = 0   

            # If the event continues through the last month, record it
            if current_duration > 0:
                total_event_count[i, j] += 1
                event_duration_sum[i, j] += current_duration
                intensity_val = np.sqrt((event_sm_sum + event_vpd_sum) / (2 * current_duration))
                total_intensity[i, j] += intensity_val

    # Calculate average intensity, avoid division by zero
    average_event_intensity = np.zeros(event.shape[1:], dtype=np.float32)
    mask = total_event_count > 0
    average_event_intensity[mask] = total_intensity[mask] / total_event_count[mask]

    return average_event_intensity, total_intensity

average_event_intensity00, total_intensity00 = find_drought_events(
    NH_vpd, NH_sm, northern_vpd_90th_percentile, northern_sm_10th_percentile, nor_vpd_std, nor_sm_std)


Southern Hemisphere

In [None]:

def find_drought_events(vpd_data, sm_data, vpd_90th, sm_10th):
    # Initialize the event array
    event = np.empty_like(vpd_data, dtype=bool)
    
    for month_index in range(vpd_data.shape[0]):
        # Map index to month (1–12)
        month = month_index % 12 + 1
        # print(f"Month index: {month_index}, Corresponding percentile index: {month}")
        
        if month in [1, 2, 3, 10, 11, 12]:
            # Calculate the corresponding index for percentile arrays
            percentile_index = month - 1

            # Identify drought events: VPD > 90th percentile and SM < 10th percentile
            event[month_index] = (vpd_data[month_index] > vpd_90th[percentile_index]) & (sm_data[month_index] < sm_10th[percentile_index])
            # event[month_index] = (vpd_data[month_index] > vpd_90th[percentile_index]) 
            # event[month_index] = sm_data[month_index] < sm_10th[percentile_index]
        else:
            # For other months, set all values to False (keep default 0 state)
            event[month_index] = np.zeros_like(vpd_data[month_index], dtype=bool)
   
    event_duration_sum = np.zeros(event.shape[1:], dtype=np.int32)
    total_event_count = np.zeros(event.shape[1:], dtype=np.int32)
    
    # Compute for each grid cell
    for i in range(event.shape[1]):  
        for j in range(event.shape[2]):  
            # Get event time series (Boolean values) for the current grid cell
            grid_event = event[:, i, j]
            
            # Track the number of consecutive drought months
            current_duration = 0
            for month in range(grid_event.shape[0]):
                if grid_event[month]:  # If an event occurs
                    current_duration += 1
                else:
                    # If there was an ongoing event, record its duration and reset
                    if current_duration > 0:
                        total_event_count[i, j] += 1
                        event_duration_sum[i, j] += current_duration
                        current_duration = 0

            # If the last month still had an ongoing event, record it
            if current_duration > 0:
                total_event_count[i, j] += 1
                event_duration_sum[i, j] += current_duration

    # Calculate average duration, avoid division by zero
    average_event_duration = np.zeros(event.shape[1:], dtype=np.float32)
    frequency = np.zeros(event.shape[1:], dtype=np.float32)
    mask = total_event_count > 0
    average_event_duration[mask] = event_duration_sum[mask] / total_event_count[mask]
    frequency[mask] = total_event_count[mask] / 37  # Normalize by number of years

    return event, event_duration_sum, total_event_count, average_event_duration, frequency

event01, event_duration_sum01, total_event_count01, average_event_duration01, frequency01 = find_drought_events(SH_vpd, SH_sm, southern_vpd_90th_percentile, southern_sm_10th_percentile)


In [None]:
# Calculate intensity

def find_drought_events(vpd_data, sm_data, vpd_90th, sm_10th, vpd_std, sm_std):
    # Initialize the event array
    event = np.empty_like(vpd_data, dtype=bool)
    
    # Match monthly data to percentile indices (only April–September are considered, using month cycle)
    for month_index in range(vpd_data.shape[0]):
        # Map index to month (1–12)
        month = month_index % 12 + 1   
             
        if month in [1, 2, 3, 10, 11, 12]:
            # Convert month to percentile index
            percentile_index = month - 1

            # Drought condition: VPD > 90th percentile and SM < 10th percentile
            event[month_index] = (vpd_data[month_index] > vpd_90th[percentile_index]) & (sm_data[month_index] < sm_10th[percentile_index])
        else:
            event[month_index] = np.zeros_like(vpd_data[month_index], dtype=bool)

    event_duration_sum = np.zeros(event.shape[1:], dtype=np.int32)
    total_event_count = np.zeros(event.shape[1:], dtype=np.int32)
    total_intensity = np.zeros(event.shape[1:], dtype=np.float32)

    # Compute for each grid cell
    for i in range(event.shape[1]):  
        for j in range(event.shape[2]): 
            grid_event = event[:, i, j]
            
            # Track number of consecutive drought months
            current_duration = 0
            event_sm_sum = 0
            event_vpd_sum = 0

            for month in range(grid_event.shape[0]):
                month_in_year = month % 12  
                if grid_event[month]:  
                    current_duration += 1
                    event_sm_sum += ((sm_data[month, i, j] - sm_10th[month_in_year, i, j]) / sm_std[month_in_year, i, j]) ** 2
                    event_vpd_sum += ((vpd_data[month, i, j] - vpd_90th[month_in_year, i, j]) / vpd_std[month_in_year, i, j]) ** 2
                else:
                    # If a drought event was ongoing, record its duration and intensity, then reset
                    if current_duration > 0:
                        total_event_count[i, j] += 1
                        event_duration_sum[i, j] += current_duration
                        
                        intensity_val = np.sqrt((event_sm_sum + event_vpd_sum) / (2 * current_duration))
                        total_intensity[i, j] += intensity_val

                        # Reset cumulative values
                        current_duration = 0
                        event_sm_sum = 0
                        event_vpd_sum = 0   

            # If the event continues through the last month, record it
            if current_duration > 0:
                total_event_count[i, j] += 1
                event_duration_sum[i, j] += current_duration
                intensity_val = np.sqrt((event_sm_sum + event_vpd_sum) / (2 * current_duration))
                total_intensity[i, j] += intensity_val

    # Calculate average intensity, avoid division by zero
    average_event_intensity = np.zeros(event.shape[1:], dtype=np.float32)
    mask = total_event_count > 0
    average_event_intensity[mask] = total_intensity[mask] / total_event_count[mask]

    return average_event_intensity, total_intensity

average_event_intensity01, total_intensity01 = find_drought_events(SH_vpd, SH_sm, southern_vpd_90th_percentile, southern_sm_10th_percentile, sou_vpd_std, sou_sm_std)



save the results

In [None]:
# output resluts
from rasterio.transform import from_origin

# Loop through each year and month to collect data
for date in date_range:
    month = date.month
    year = date.year
    vpd_file_path = f"{vpd_path}/VPD_X{year}.{month:02d}.tif.tif"
    if os.path.exists(vpd_file_path):
        with rasterio.open(vpd_file_path) as vpd_src:
            vpd_data = vpd_src.read(1)  

# Use the spatial reference and transform from the original data
transform = vpd_src.transform
crs = vpd_src.crs

# Output file path
output_path = r"G:\paper01\TEST\frequency00.tif"

# Create the output raster file
with rasterio.open(
    output_path, 'w',
    driver='GTiff',         
    height=frequency00.shape[0],  
    width=frequency00.shape[1],   
    count=1,               
    dtype=rasterio.float32,  
    crs=crs,              
    transform=transform,   
    nodata=np.nan        
) as dst:
    dst.write(frequency00.astype(np.float32), 1)  

print("Data successfully written to:", output_path)
