In [3]:
import h5py
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd

# Set the path to the normalized data
data_path = Path("/mnt/data/Normalized_Data")

# Function to read and display information from h5 files
def read_h5_files(path):
    # Get all h5 files in the directory
    h5_files = list(path.glob("*.h5"))
    
    if not h5_files:
        print(f"No .h5 files found in {path}")
        return
    
    print(f"Found {len(h5_files)} .h5 files")
    
    # Read the first file to see its structure
    sample_file = h5_files[0]
    print(f"\nExamining sample file: {sample_file.name}")
    
    with h5py.File(sample_file, 'r') as f:
        # Print the keys/groups in the file
        print("\nFile structure:")
        for key in f.keys():
            print(f"- {key}")
            
            # If it's a dataset, show its shape and dtype
            if isinstance(f[key], h5py.Dataset):
                print(f"  Shape: {f[key].shape}, Type: {f[key].dtype}")
            
            # If it's a group, show its keys
            elif isinstance(f[key], h5py.Group):
                print(f"  Subgroups/datasets: {list(f[key].keys())}")
    
    return h5_files

# Read the h5 files
h5_files = read_h5_files(data_path)

Found 38 .h5 files

Examining sample file: aia131_batch5_data_20250710_043518.h5

File structure:
- normalized_data_0
  Shape: (13866348,), Type: float64
- normalized_data_1
  Shape: (13935107,), Type: float64
- normalized_data_10
  Shape: (13782150,), Type: float64
- normalized_data_100
  Shape: (13896950,), Type: float64
- normalized_data_101
  Shape: (13970019,), Type: float64
- normalized_data_102
  Shape: (13783233,), Type: float64
- normalized_data_103
  Shape: (13856191,), Type: float64
- normalized_data_104
  Shape: (13979851,), Type: float64
- normalized_data_105
  Shape: (13964276,), Type: float64
- normalized_data_106
  Shape: (13758919,), Type: float64
- normalized_data_107
  Shape: (13822880,), Type: float64
- normalized_data_108
  Shape: (13904279,), Type: float64
- normalized_data_109
  Shape: (13828810,), Type: float64
- normalized_data_11
  Shape: (13864790,), Type: float64
- normalized_data_110
  Shape: (13980940,), Type: float64
- normalized_data_111
  Shape: (137858

Task: Calculate the 99th percentile pixel intensity for each AIA wavelength from the given H5 files. This will help identify the threshold values for extreme brightness in the solar images across different wavelengths.

In [7]:
h5_files[1]

PosixPath('/mnt/data/Normalized_Data/aia171_batch2_data_20250710_050922.h5')

In [8]:
with h5py.File(h5_files[0], 'r') as f:
    # Print the structure
    print("File structure:")


    def print_structure(name, obj):
        if isinstance(obj, h5py.Dataset):
            print(f"{name}: shape={obj.shape}, dtype={obj.dtype}")


    f.visititems(print_structure)

    # Load the first dataset found
    first_dataset = next(iter(f.values()))
    data = first_dataset[:]
    print("\nSample data:")
    print(data[:5])  # Show first 5 elements

File structure:
normalized_data_0: shape=(13866348,), dtype=float64
normalized_data_1: shape=(13935107,), dtype=float64
normalized_data_10: shape=(13782150,), dtype=float64
normalized_data_100: shape=(13896950,), dtype=float64
normalized_data_101: shape=(13970019,), dtype=float64
normalized_data_102: shape=(13783233,), dtype=float64
normalized_data_103: shape=(13856191,), dtype=float64
normalized_data_104: shape=(13979851,), dtype=float64
normalized_data_105: shape=(13964276,), dtype=float64
normalized_data_106: shape=(13758919,), dtype=float64
normalized_data_107: shape=(13822880,), dtype=float64
normalized_data_108: shape=(13904279,), dtype=float64
normalized_data_109: shape=(13828810,), dtype=float64
normalized_data_11: shape=(13864790,), dtype=float64
normalized_data_110: shape=(13980940,), dtype=float64
normalized_data_111: shape=(13785868,), dtype=float64
normalized_data_112: shape=(13916100,), dtype=float64
normalized_data_113: shape=(13773842,), dtype=float64
normalized_data_11

This code will:
1. Open the first H5 file in read mode
2. Print the complete structure of the file showing all datasets and their properties 
3. Load the first dataset found in the file
4. Show a sample of the loaded data (first 5 elements)

This will help us understand the structure and content of the H5 files before further processing.


In [13]:

from tqdm import tqdm
import numpy as np

# Dictionary to store 99th percentiles for each wavelength
percentiles = {}

# Process each h5 file
for h5_file in tqdm(h5_files, desc="Processing H5 files"):
    # Extract wavelength from filename
    wavelength = h5_file.name.split('_')[0].replace('aia', '')

    with h5py.File(h5_file, 'r') as f:
        # Assuming data is stored in a dataset called 'data'
        for key in f.keys():
            if isinstance(f[key], h5py.Dataset):
                data = f[key][:]
                break
            elif isinstance(f[key], h5py.Group):
                # Try to get the first dataset in the group
                for subkey in f[key].keys():
                    if isinstance(f[key][subkey], h5py.Dataset):
                        data = f[key][subkey][:]
                        break
                break

        # Calculate 99th percentile
        p99 = np.percentile(data, 99)

        if wavelength in percentiles:
            percentiles[wavelength].append(p99)
        else:
            percentiles[wavelength] = [p99]

# Calculate mean of 99th percentiles for each wavelength
mean_percentiles = {wave: np.mean(vals) for wave, vals in percentiles.items()}

# Create DataFrame for better visualization
df_percentiles = pd.DataFrame({
    'Wavelength': list(mean_percentiles.keys()),
    '99th Percentile': list(mean_percentiles.values())
})
df_percentiles = df_percentiles.sort_values('Wavelength')
df_percentiles

Processing H5 files: 100%|██████████| 38/38 [00:09<00:00,  4.15it/s]


Unnamed: 0,Wavelength,99th Percentile
0,131,29.702076
1,171,730.570419
3,193,922.675491
2,94,9.519744


In [None]:
plt.figure(figsize=(10, 6))
plt.bar(df_percentiles['Wavelength'], df_percentiles['99th Percentile'])
plt.title('99th Percentile Pixel Intensity by AIA Wavelength')
plt.xlabel('Wavelength (Å)')
plt.ylabel('99th Percentile Intensity')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
