# Import libraries

In [1]:
import numpy as np
import os
import xarray as xr
import numpy as np

# Set root directory

In [2]:
# Load all files
root_dir = r"D:\Ghana\Merged_files_IMERG_SEVIRI\Condition_set_0.02_20_all"
#root_dir = r"Z:\cluster_projects\ro\1149_10\earthformer-multisource-to-inca\data_adapted_condition_0.01_5_all"

# Calculate mean and standard deviation

In [3]:
# Traverse through all subdirectories and files in the given path
files_path = []
for root, dirs, files in os.walk(root_dir):
    for file in files:
        if file.endswith(".hdf5"):
            files_path.append(os.path.join(root, file))

In [4]:
seviri_channel_means = np.zeros((11, len(files_path)))
imerg_means = []
  

for i, file in enumerate(files_path):
    #print(file)
    ds = xr.open_dataset(file, engine='netcdf4')

    # Extract the IMERG and SEVIRI variables
    imerge = ds["IMERG"]  # Shape: (1, 248, 184)
    seviri = ds["SEVIRI"]  # Shape: (9, 248, 184, 11)

    # Compute the mean for IMERG over all dimensions (time, height, width)
    imerg_mean = imerge.mean(dim=("phony_dim_0", "phony_dim_1", "phony_dim_2")).values  # Single value
    imerg_means.append(imerg_mean)

    # Compute the mean for SEVIRI over time and spatial dimensions, separately for each channel
    seviri_channel_mean = []
    for channel in range(seviri.shape[-1]):  # Loop through the 11 channels
        channel_mean = seviri.mean(dim=("phony_dim_1", "phony_dim_2", "phony_dim_3")).values
        seviri_channel_means[:, i]  = channel_mean

In [5]:
# # Final results
print('IMERG Mean:', np.mean(imerg_means))
print('SEVIRI Channel Means:', np.mean(seviri_channel_means, axis = 1))

IMERG Mean: 0.6195765
SEVIRI Channel Means: [ 10.25720179  12.5617076    8.52811886 273.61663637 230.79820342
 244.10878277 263.99073923 253.54030633 263.60335884 260.92204897
 247.74221263]


In [7]:
global_mean = np.mean(imerg_means)

# Pass 2: Calculate global variance
squared_diff_sum = 0
total_count = 0

for file in files_path:
    with xr.open_dataset(file, engine='netcdf4') as h5file:
        data = h5file['IMERG'][0, :, :]  
        squared_diff_sum += np.sum((data - global_mean) ** 2)
        total_count += data.size

global_variance = squared_diff_sum / total_count
global_std = np.sqrt(global_variance)

print("Global Standard Deviation for IMERG-Final:", global_std)

Global Standard Deviation for IMERG-Final: <xarray.DataArray 'IMERG' ()> Size: 4B
np.float32(2.1691806)


In [13]:
# Assuming `seviri_channel_means` is a precomputed array with means for each channel
squared_diff_sum_seviri = np.zeros(len(seviri_channel_means))  # To store squared differences per channel
pixels_per_channel = 0  # To count pixels for one channel

seviri_channel_mean_mean = np.mean(seviri_channel_means, axis = 1)
total_count = 0
# Loop through files
for file in files_path:
    #print(file)
    with xr.open_dataset(file, engine='netcdf4') as h5file:
        data_seviri = h5file['SEVIRI'][:, :, :, :]  # Replace with your dataset name

        # Get pixel count for one channel (same for all channels in this dataset)
        if pixels_per_channel == 0:  # Calculate once
            pixels_per_channel = data_seviri.shape[0] * data_seviri.shape[1] * data_seviri.shape[2]

        # Loop through channels and time steps
        for channel in range(data_seviri.shape[-1]):  # Loop through channels
            for time in range(data_seviri.shape[0]):  # Loop through time
                # Compute squared differences and accumulate for the current channel
                squared_diff_sum_seviri[channel] += np.sum(
                    (data_seviri[time, :, :, channel] - seviri_channel_mean_mean[channel]) ** 2
                )

        total_count += pixels_per_channel

# Calculate global variance and standard deviation for each channel
global_variance_seviri = squared_diff_sum_seviri / total_count
global_std_seviri = np.sqrt(global_variance_seviri)

print("Global Standard Deviation for SEVIRI Channels:", global_std_seviri)

Global Standard Deviation for SEVIRI Channels: [16.86940958 18.52551459 11.51099045 28.85905664 11.1876696  17.51180968
 28.80451948 19.34965801 29.88327903 29.32706102 21.25034604]
