# <center>**Adding more data to the BUFR file** </center>

Here,we will add more observations to the BUFR file. For example we may observations for different times. 

In [16]:
import pandas as pd
fname="nassau3.csv"
df_obs=pd.read_csv(fname)
print(df_obs.head())

      id       ymd  hhmm  latitude  longitude  wheight  avg_period  pressure
0  41049  20251001  1540     27.51     -62.27      6.6         9.9    1009.4
1  41049  20251001  1600     27.54     -62.77      5.5         7.5    1012.2
2  41049  20251001  1710     27.51     -62.77      5.2         7.8    1014.5


so we have now 3 more observations taken at different times from the same station
Our sequence has to accommodate this. There are two ways of doing this
1) using compressedData=1 ( compressed data,the values of each descriptor are grouped together across all subsets)
2) using compressedData=0 ( individual subsets are stored individually and one after the other)

In [17]:
import eccodes as ecc
from datetime import datetime
import numpy as np

In [18]:
def encode_bufr(df_obs,outputFilename):
    '''
    encodes df_obs into a BUFR file refereed by outputFilename, uses compressedData=1
    '''
    nobs=df_obs.index.size
    bid=ecc.codes_bufr_new_from_samples('BUFR4') # creates a bufr edition 4 ( WMO recommended)
    
    
    ecc.codes_set(bid,'masterTablesVersionNumber',42)
    ecc.codes_set(bid,'localTablesVersionNumber',0)
    
    ecc.codes_set(bid,'numberOfSubsets',nobs)
    # here, we read the YMD and HHMM to build the time stamp
    # this timestamp is used to populate the BUFR time keys
    ymd=df_obs['ymd'].values[0]
    hhmm=df_obs['hhmm'].values[0] 
    str_ymdhm=str(ymd)+str(hhmm)
    timeStamp=datetime.strptime(str_ymdhm,'%Y%m%d%H%M')
    ecc.codes_set(bid,"typicalYear",timeStamp.year)
    ecc.codes_set(bid,"typicalMonth",timeStamp.month)
    ecc.codes_set(bid,"typicalDay",timeStamp.day)
    ecc.codes_set(bid,"typicalHour",timeStamp.hour)
    ecc.codes_set(bid,"typicalMinute",timeStamp.minute)
    ecc.codes_set(bid,'numberOfSubsets',nobs)
    ecc.codes_set(bid,"observedData",1)
    ecc.codes_set(bid,'compressedData',1) # data is 'compressed'
    # here we define the BUFR structure through the descriptors we want to use
    unexpandedDescr=[ 1015,4001,4002,4003,4004,4005,
                     5001,6001,22021,22011,10051]
    ecc.codes_set_array(bid,'unexpandedDescriptors',unexpandedDescr)
    station_id=np.array([str(x) for x in df_obs['id'].values])
    ecc.codes_set_array(bid,'stationOrSiteName',station_id)

    ymd_values  = df_obs['ymd'].values
    hhmm_values = df_obs['hhmm'].values
    dt_dates    = [datetime.strptime(str(x)+str(y),'%Y%m%d%H%M') for x,y in zip(ymd_values,hhmm_values)]

    years      = [t.year for t in dt_dates]
    months     = [t.month for t in dt_dates]
    days       = [t.day for t in dt_dates]
    hours      = [t.hour for t in dt_dates]
    minutes    = [t.minute for t in dt_dates]
    ecc.codes_set_array(bid,'year',years)
    ecc.codes_set_array(bid,'month',months)
    ecc.codes_set_array(bid,'day',days)
    ecc.codes_set_array(bid,'hour',hours)
    ecc.codes_set_array(bid,'minute',minutes)
   
    lat=df_obs['latitude'].values
    lon=df_obs['longitude'].values
    ecc.codes_set_array(bid,"latitude",lat)
    ecc.codes_set_array(bid,"longitude",lon)
    waveHeight=df_obs['wheight'].values
    ecc.codes_set_array(bid,"heightOfWaves",waveHeight)
    avg_period=df_obs['avg_period'].values
    ecc.codes_set_array(bid,"periodOfWaves",avg_period)
    pressure=df_obs['pressure'].values
    ecc.codes_set_array(bid,'pressureReducedToMeanSeaLevel',pressure)
    ecc.codes_set(bid,"pack",1)
    with open(outputFilename,"wb") as fout:
        ecc.codes_write(bid,fout)
    ecc.codes_release(bid)
    print(f" created file {outputFilename}")
    return 

def main():
    inputFile='/home/marg/ECCODES_2025/ecTrain/nassau3.csv'
    outputFile='/home/marg/ECCODES_2025/ecTrain/nassau3.b'
    df=pd.read_csv(inputFile)
    print(df.head())
    encode_bufr(df,outputFile)

if __name__=="__main__":
    main()


      id       ymd  hhmm  latitude  longitude  wheight  avg_period  pressure
0  41049  20251001  1540     27.51     -62.27      6.6         9.9    1009.4
1  41049  20251001  1600     27.54     -62.77      5.5         7.5    1012.2
2  41049  20251001  1710     27.51     -62.77      5.2         7.8    1014.5
 created file /home/marg/ECCODES_2025/ecTrain/nassau3.b


If we use compressedData=1 (compressed data) we then see arrays of data for each descriptor

 {
                        "key" : "periodOfWaves",
                        "value" :
                        [
                          10, 8, 8 
                        ],
                        "units" : "s"
                      },
                      {
                        "key" : "pressureReducedToMeanSeaLevel",
                        "value" :
                        [
                          1010, 1010, 1010 
                        ],
                        "units" : "Pa"
                      }


we can modify the code above to use compressedData=0 so the data will be spit in individual subsets each containing all the descriptors 

In [19]:
def encode_bufr_uncompressed(df_obs,outputFilename):
    '''
    encodes df_obs into a bufr file referred by outputFilename uncompressed data
    '''
    nobs=df_obs.index.size
    bid=ecc.codes_bufr_new_from_samples('BUFR4') # creates a bufr edition 4 ( WMO recommended)
    
    
    ecc.codes_set(bid,'masterTablesVersionNumber',42)
    ecc.codes_set(bid,'localTablesVersionNumber',0)
    
    ecc.codes_set(bid,'numberOfSubsets',nobs)
    # here, we read the YMD and HHMM to build the time stamp
    # this timestamp is used to populate the BUFR time keys
    ymd=df_obs['ymd'].values[0]
    hhmm=df_obs['hhmm'].values[0] 
    str_ymdhm=str(ymd)+str(hhmm)
    timeStamp=datetime.strptime(str_ymdhm,'%Y%m%d%H%M')
    ecc.codes_set(bid,"typicalYear",timeStamp.year)
    ecc.codes_set(bid,"typicalMonth",timeStamp.month)
    ecc.codes_set(bid,"typicalDay",timeStamp.day)
    ecc.codes_set(bid,"typicalHour",timeStamp.hour)
    ecc.codes_set(bid,"typicalMinute",timeStamp.minute)
    ecc.codes_set(bid,'numberOfSubsets',nobs)
    ecc.codes_set(bid,"observedData",1)
    ecc.codes_set(bid,'compressedData',0) # data is  NOT 'compressed'
    # here we define the BUFR structure through the descriptors we want to use
    unexpandedDescr=[ 1015,4001,4002,4003,4004,4005,
                     5001,6001,22021,22011,10051]
    ecc.codes_set_array(bid,'unexpandedDescriptors',unexpandedDescr)
    
    for i,row in df_obs.iterrows():
        st_id  = int(row['id'])
        ecc.codes_set(bid,f'#{i+1}#stationOrSiteName',str(st_id))
        ymd    = str(df_obs['ymd'].values[0])
        hhmm   = str(df_obs['hhmm'].values[0])
    
        date   = datetime.strptime(ymd+hhmm,"%Y%m%d%H%M")

 
        ecc.codes_set(bid,f'#{i+1}#year',date.year)
        ecc.codes_set(bid,f'#{i+1}#month',date.month)
        ecc.codes_set(bid,f'#{i+1}#day',date.day)
        ecc.codes_set(bid,f'#{i+1}#hour',date.hour)
        ecc.codes_set(bid,f'#{i+1}#minute',date.minute)
   
        lat=df_obs['latitude'].values[0]
        lon=df_obs['longitude'].values[0]
        ecc.codes_set(bid,f"#{i+1}#latitude",lat)
        ecc.codes_set(bid,f"#{i+1}#longitude",lon)
        waveHeight=df_obs['wheight'].values[0]
        ecc.codes_set(bid,f"#{i+1}#heightOfWaves",waveHeight)
        avg_period=df_obs['avg_period'].values[0]
        ecc.codes_set(bid,f"#{i+1}#periodOfWaves",avg_period)
        pressure=df_obs['pressure'].values[0]
        ecc.codes_set(bid,f'#{i+1}#pressureReducedToMeanSeaLevel',pressure)
    ecc.codes_set(bid,"pack",1)
    with open(outputFilename,"wb") as fout:
        ecc.codes_write(bid,fout)
    ecc.codes_release(bid)
    print(f" created file {outputFilename}")
    return 

def main():
    inputFile='/home/marg/ECCODES_2025/ecTrain/nassau3.csv'
    outputFile='/home/marg/ECCODES_2025/ecTrain/nassau3_unc.b'
    df=pd.read_csv(inputFile)
    print(df.head())
    encode_bufr_uncompressed(df,outputFile)

if __name__=="__main__":
    main()


      id       ymd  hhmm  latitude  longitude  wheight  avg_period  pressure
0  41049  20251001  1540     27.51     -62.27      6.6         9.9    1009.4
1  41049  20251001  1600     27.54     -62.77      5.5         7.5    1012.2
2  41049  20251001  1710     27.51     -62.77      5.2         7.8    1014.5
 created file /home/marg/ECCODES_2025/ecTrain/nassau3_unc.b


In this case, as the data is uncompressed, we have it split into individual subsets


    {
          "key" : "subsetNumber",
          "value" : 1
        },
        {
          "key" : "stationOrSiteName",
          "value" : "41049",
          "units" : "CCITT IA5"
        },
        [

          {
            "key" : "year",
            "value" : 2025,
            "units" : "a"
          },
          [

            {
              "key" : "month",
              "value" : 10,
              "units" : "mon"
