#  Decipher NOAA JPSS Files



In [None]:
import os, re
sdr_files = sorted( [ f for f in os.listdir(".") if f[:5] in [ "GATMO", "SATMS" ] and f[-3:]==".h5" ] )
other_files = sorted( [ f for f in os.listdir(".") if f[:5] not in [ "GATMO", "SATMS" ] and f[-3:]==".h5" ] )
print( "\n".join( sdr_files + other_files ) )

## h5py

Try to make sense of files using Python package h5py. First, define a recursive function that can 
list datasets in an HDF5 file. 

In [None]:
def printkeys( obj, prefix=None ): 
    if prefix is None: 
        p = []
        try: 
            keys = set( obj.keys() )
        except: 
            return
    else: 
        p = [ prefix ]
        print( prefix )
        try: 
            keys = set( obj[prefix].keys() )
        except: 
            return
        
    for key in keys: 
        printkeys( obj, prefix="/".join( p + [key] ) )
        
    return

Open an HDF5 file and generate a list of its datasets/objects. 

In [None]:
import os
import h5py
import numpy as np

for file in sdr_files: 
    print( f"\n==========\nfile = {file}" )
    d = h5py.File( file, 'r' )
    printkeys( d )
    d.close()
    

Probe for brightness temperature data, geolocation data

In [None]:
import h5py

d = h5py.File( "SATMS_j02_d20250410_t0000298_e0001014_b12509_c20250410004550539000_oeac_ops.h5", 'r' )
e = d['All_Data/ATMS-SDR_All/BrightnessTemperature']
print( e.attrs.keys() )

## satpy

Experiment with satpy for same files. 



In [None]:
import satpy
d = satpy.Scene( sdr_files, reader="atms_sdr_hdf5" )

if True: 
    print( "\nhelp( d ) = " )
    help( d )
if True: 
    print( "\navailable_dataset_names = " )
    print( d.available_dataset_names() )
if True: 
    print( "\navailable_dataset_ids = " )
    print( d.available_dataset_ids() )


In [None]:
d.load( [ str(i+1) for i in range(22) ] )
e = d.to_xarray_dataset()
# print( e )
# print( e.coords.keys() )
# print( e.data_vars.keys() )
print( e.data_vars['1'][:] )

## NetCDF

In [None]:
from netCDF4 import Dataset

file = "SFR_v2r0_n21_s202504100000298_e202504100001014_c202504100046490.nc"
d = Dataset( file, 'r' )

print( d )
print()
print( d.variables )

## Pan's code. 

In [None]:
import convert_satms_hdf5torad as pan
from netCDF4 import Dataset
import os

inputfile = [ f for f in sdr_files if f[:5] == "SATMS" ][0]
inputfile_geo = [ f for f in sdr_files if f[:5] == "GATMO" ][0]
outputfile = "out_atms_sdr.nc"

sdr = pan.sdrReader( inputfile, inputfile_geo )
rad = pan.sdr2rad( sdr )
pan.radWriter( outputfile, rad )

d = Dataset( outputfile, 'r' )
print( d )
print( "\n\nVariables\n=========\n")
for name, var in d.variables.items(): 
    print( var )
    print( )

print( d.variables['date'][:20] )

d.close()
