# <center>**Delayed replications**</center> 

In this case, we want to encode a variable number of observations. We will use delayed replication to do so. We are going to create a BUFR sounding by retrieving the data from the University of  Wyoming 


In [76]:
import pandas as pd 
import eccodes as ecc
import numpy as np
from datetime import datetime


In [77]:
URL="https://weather.uwyo.edu/wsgi/sounding?datetime=2025-10-07%2000:00:00&id=72305&type=TEXT:CSV&src=BUFR"

In [78]:
df_sounding=pd.read_csv(URL)
print(f" number of measurements {df_sounding.index.size}")

 number of measurements 5927


In [79]:
print(df_sounding.keys())

Index(['time', 'longitude', 'latitude', 'pressure_hPa',
       'geopotential height_m', 'temperature_C', 'dew point temperature_C',
       'ice point temperature_C', 'relative humidity_%', 'humidity wrt ice_%',
       'mixing ratio_g/kg', 'wind direction_degree', 'wind speed_m/s'],
      dtype='object')


In [80]:

df=df_sounding[ ['time','longitude','latitude','pressure_hPa','temperature_C']]
print(df)

                     time  longitude  latitude  pressure_hPa  temperature_C
0     2025-10-06 23:09:56   -76.8777   34.7758        1023.0           22.6
1     2025-10-06 23:09:57   -76.8778   34.7758        1022.0           22.7
2     2025-10-06 23:09:58   -76.8779   34.7758        1021.1           22.9
3     2025-10-06 23:09:59   -76.8779   34.7758        1020.2           23.0
4     2025-10-06 23:10:00   -76.8780   34.7758        1019.2           23.1
...                   ...        ...       ...           ...            ...
5922  2025-10-07 00:48:38   -76.6612   34.5066          14.1          -48.1
5923  2025-10-07 00:48:39   -76.6613   34.5066          14.1          -48.1
5924  2025-10-07 00:48:40   -76.6613   34.5066          14.1          -48.1
5925  2025-10-07 00:48:41   -76.6613   34.5066          14.0          -48.1
5926  2025-10-07 00:48:42   -76.6614   34.5066          14.0          -48.0

[5927 rows x 5 columns]


In [86]:
NN= 200# take level 0,600,1200 ... 
w_df=df[0::NN]
w_df
print(f" number of levels {w_df.index.size}")
print(w_df.head())

 number of levels 30
                    time  longitude  latitude  pressure_hPa  temperature_C
0    2025-10-06 23:09:56   -76.8777   34.7758        1023.0           22.6
200  2025-10-06 23:13:16   -76.8952   34.7800         902.9           15.6
400  2025-10-06 23:16:36   -76.9043   34.7837         795.4           11.7
600  2025-10-06 23:19:56   -76.9058   34.7864         704.0           10.1
800  2025-10-06 23:23:16   -76.9010   34.7813         629.3            2.6


To encode, we need a sequence of descriptors to use. We may need to convert some variables to the proper units ( pressure_hPa should be pressure in Pa for example). 

<table>
    <th>
        <tr>
           <td> variable</td> <td> bufr key</td><td>bufr descriptor </td>
        </tr>
    </th>
    <tr>
    <td> pressure</td><td> pressureReducedToMeanSeaLevel </td><td> 10051</td> </tr>
    <td> temperature_C</td> <td> temperature</td><td>12023 in Celsius</td> </tr>
    <tr><td> longitude</td><td> longitude</td><td> 6001</td></tr>
    <tr><td> latitude</td><td> latitude</td><td>5001</td></tr>
    <tr> <td> time </td> <td> combination of descriptors </td><td>4001, 4002 ...</td></tr>
</table>

As we may not know the number of levels, we can use delayed replications to make the sequence flexible. To this end, we 

In [87]:
def encode_sounding(df,outputFilename):
    nlevels=df.index.size
    print(f" number of levels : {nlevels}")
    #get the handle to the bufr message
    bid=ecc.codes_bufr_new_from_samples('BUFR4')
    #populate the header keys TablesVersion etc
    ecc.codes_set(bid,'masterTablesVersionNumber',42)
    ecc.codes_set(bid,'localTablesVersionNumber',0)
    # IMPORTANT populate the inputDelayedDescriptorReplicationFactor before the unexpandedDescriptors 
    # as this information will be in the data section ( section 4) 
    ecc.codes_set_array(bid,'inputDelayedDescriptorReplicationFactor',(nlevels,))
    ecc.codes_set(bid,'numberOfSubsets',1)
    ecc.codes_set(bid,"compressedData",0) # I decided to use compressedData=0
    unexpandedDesc=[ 109000,31001, 4001,4002,4003,4004,4005,5001,6001,10051,12023]
    ecc.codes_set_array(bid,'unexpandedDescriptors',unexpandedDesc)
    # iterate over the dataframe selecting one row at the time and populating the corresponding keys
    k=1 # will be the key rank
    for i,row in df.iterrows():
        dtTime=datetime.strptime(row['time'],'%Y-%m-%d %H:%M:%S')
        ecc.codes_set(bid,f"#{k}#year",dtTime.year)
        ecc.codes_set(bid,f"#{k}#month",dtTime.month)
        ecc.codes_set(bid,f"#{k}#day",dtTime.day)
        ecc.codes_set(bid,f"#{k}#hour",dtTime.hour)
        ecc.codes_set(bid,f"#{k}#minute",dtTime.minute)
        
        lat=row['latitude']
        lon=row['longitude']
        HPA_TO_PA=100 # convert HPA to PA
        pressure=row['pressure_hPa']*HPA_TO_PA
        tempC=row['temperature_C']
        # each key has its own rank #2#latitude etc 
        ecc.codes_set(bid,f'#{k}#latitude',lat)
        ecc.codes_set(bid,f"#{k}#longitude",lon)
        ecc.codes_set(bid,f"#{k}#pressureReducedToMeanSeaLevel",pressure)
        ecc.codes_set(bid,f"#{k}#temperature",tempC)
        k+=1 # set the next key number           
    # close the message by calling pack
    ecc.codes_set(bid,'pack',1)
    # write to the output file
    with open(outputFilename,'wb') as fout:
        ecc.codes_write(bid,fout)
    ecc.codes_release(bid)# IMPORTANT TO RELEASE THE BUFR HANDLE TO AVOID MEMORY LEAKS
    print(f" creating output BUFR file {outputFilename}")
    return

In [88]:
def main():
    outputFile='sounding.b'
    encode_sounding(w_df,outputFile)

if __name__=="__main__":
    main()

 number of levels : 30
 creating output BUFR file sounding.b


In [49]:
!bufr_dump sounding.b

/usr/bin/bash: bufr_dump: command not found
