# <center>**Bitmap adding quality information**</center>

In this case we want to add some extra information (error) to some keys of our message. To do this we need to define a **bitmap** prior to setting the **unexpandedDescriptors**.
If we use delayed replication, these replications must be defined before we set the unexpandedDescriptors. 
If we are using compressed or uncompressed data  (compressedData=1/0) this has an effect on the way we define the bitmap 

In [10]:
import eccodes as ecc
import numpy as np
import pandas as pd
from datetime import datetime 

In [11]:
inputFile="/home/marg/ECCODES_2025/ecTrain/bitmap.csv"
df=pd.read_csv(inputFile)
print(df.head())

      id       ymd  hhmm  latitude  longitude  wheight  avg_period  pressure  \
0  41047  20250929  1810     27.56     -71.48      4.1         9.9    1009.4   
1  40081  20250929  1915     28.01     -72.00      4.4         9.2    1002.4   
2  42052  20250929  2015     27.20     -73.40      4.3         9.7    1001.4   
3  40087  20250929  2310     27.20     -72.90      4.2         9.8    1001.4   

   pressure_err  
0           125  
1           150  
2           147  
3           147  


In [12]:
def bufr_encode(df,outputFile):
    SEQUENCE_LENGTH=11 # found by counting the unexpandedDescriptors 
    nobs=df.index.size
    bid = ecc.codes_bufr_new_from_samples('BUFR4')
    ecc.codes_set(bid, 'edition', 4)
    dpi=np.array([1,1,1,1,1,1,1,1,1,1,0],dtype=int)
    dpi=np.tile(dpi,nobs)
    print(f" bitmap size :  {dpi.size} \n bit map {dpi}")
    ecc.codes_set_array(bid, 'inputDataPresentIndicator', dpi)
    
    delayedRep=np.array([nobs,nobs*SEQUENCE_LENGTH,nobs])
    ecc.codes_set_array(bid, 'inputDelayedDescriptorReplicationFactor', delayedRep)
    
    
    ecc.codes_set(bid, 'masterTablesVersionNumber', 42)
    ecc.codes_set(bid,'numberOfSubsets',nobs) # important to set the number of subsets 
    unexpandedDesc = (
        111000, 31001, 1015, 4001, 4002,
        4003, 4004, 4005, 5001, 6001,
        22021, 22011, 10051, 224000, 236000,
        101000, 31001, 31031, 8023, 101000,
        31001, 224255,) # 22 descriptors
    ecc.codes_set(bid, 'compressedData', 1) # compressed data 

    # Create the structure of the data section
    ecc.codes_set_array(bid, 'unexpandedDescriptors', unexpandedDesc)
    timeStamps= [ datetime.strptime(f"{x}{y}",'%Y%m%d%H%M') for x,y in zip(df['ymd'].values,df['hhmm'].values) ]
    
    stations= [str(x) for x in df['id'].values]
    for i in range(0,nobs): 
        ecc.codes_set(bid,f"#{i+1}#stationOrSiteName",stations[i])
        ecc.codes_set(bid,f"#{i+1}#year",timeStamps[i].year)
        ecc.codes_set(bid,f"#{i+1}#month",timeStamps[i].month)
        ecc.codes_set(bid,f"#{i+1}#day",timeStamps[i].day)
        ecc.codes_set(bid,f"#{i+1}#hour",timeStamps[i].hour)
        ecc.codes_set(bid,f"#{i+1}#minute",timeStamps[i].minute)
        ecc.codes_set(bid,f"#{i+1}#latitude",df['latitude'].values[i])
        ecc.codes_set(bid,f"#{i+1}#longitude",df['longitude'].values[i])
        ecc.codes_set(bid,f"#{i+1}#heightOfWaves",df['wheight'].values[i])
        ecc.codes_set(bid,f"#{i+1}#periodOfWaves",df['avg_period'].values[i])
        ecc.codes_set(bid,f"#{i+1}#pressureReducedToMeanSeaLevel",df['pressure'].values[i])
        ecc.codes_set(bid,f"#{i+1}#pressureReducedToMeanSeaLevel->firstOrderStatisticalValue",df['pressure_err'].values[i])
    ecc.codes_set(bid,'firstOrderStatistics',9)    #set fos to best estimate of standard deviation
    #close and release
    ecc.codes_set(bid,"pack",1)
    with open(outputFile,'wb') as fout:
        ecc.codes_write(bid,fout)
    print(f" encoded bufr into file : {outputFile}")
    ecc.codes_release(bid)
    return 


def main():
    inputFile="/home/marg/ECCODES_2025/ecTrain/bitmap.csv"
    outputFile="/home/marg/ECCODES_2025/ecTrain/bitmap.b"
    df=pd.read_csv(inputFile)
    bufr_encode(df,outputFile)

if __name__=="__main__":
    main()
    

 bitmap size :  44 
 bit map [1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1
 1 1 1 1 1 1 0]
 encoded bufr into file : /home/marg/ECCODES_2025/ecTrain/bitmap.b


If we want to add statistical information to other keys, we just need to add more zeros (0) to the bitmap and close the accordingly at the end providing as many 224255 as 0 we have in our bitmap. 

In [13]:
def bufr_encode_with2e(df,outputFile):
   
    nobs=df.index.size
    print(f" number of obs: {nobs}")
    bid = ecc.codes_bufr_new_from_samples('BUFR4')
    ecc.codes_set(bid, 'edition', 4)
    ## define the bitmap to affect the last 2 keys
    dpi=np.array([1,1,0,0],dtype=int)
    
    print(f" bitmap size :  {dpi.size} \n bit map {dpi}")
    ecc.codes_set_array(bid, 'inputDataPresentIndicator', dpi)
    # replications  nobs-> for the levels  2 *nobs to set two 0s for 4 obs 
    delayedRep=np.array([nobs,2])
    ecc.codes_set_array(bid, 'inputDelayedDescriptorReplicationFactor', delayedRep)
    
    
    ecc.codes_set(bid, 'masterTablesVersionNumber', 42)
    ecc.codes_set(bid,'numberOfSubsets',nobs) # important to set the number of subsets 
    ecc.codes_set(bid,"compressedData",1)
    unexpandedDesc = (
        1015, 4001, 4002,
        4003, 4004, 4005, 5001, 6001,
        22021,22011, 10051,224000,236000,101000,31001,31031,8023,101000,31001,224255)
     
        #,224000, 236000,
        #101000, 31001, 31031, 8023, 101000,
        #31001, 224255,) # 22 values
    ecc.codes_set(bid, 'compressedData', 1) # compressed data 

    # Create the structure of the data section
    ecc.codes_set_array(bid, 'unexpandedDescriptors', unexpandedDesc)
    timeStamps= [ datetime.strptime(f"{x}{y}",'%Y%m%d%H%M') for x,y in zip(df['ymd'].values,df['hhmm'].values) ]
    stations= [str(x) for x in df['id'].values] 
    ecc.codes_set_array(bid,"stationOrSiteName",stations)  
    ecc.codes_set_array(bid,'year',[t.year for t in timeStamps] )
    ecc.codes_set_array(bid,'month',[t.month for t in timeStamps])
    ecc.codes_set_array(bid,'day',[t.day for t in timeStamps])
    ecc.codes_set_array(bid,'hour',[t.hour for t in timeStamps])
    ecc.codes_set_array(bid,'minute',[t.minute for t in timeStamps])
    ecc.codes_set_array(bid,'latitude',df['latitude'].values)
    ecc.codes_set_array(bid,'longitude',df['longitude'].values)
   
    ecc.codes_set_array(bid,'heightOfWaves',df['wheight'].values)
    ecc.codes_set_array(bid,'periodOfWaves',df['avg_period'].values)
    ecc.codes_set_array(bid,'pressureReducedToMeanSeaLevel',df['pressure'].values)
    ecc.codes_set_array(bid,'pressureReducedToMeanSeaLevel->firstOrderStatisticalValue',df['pressure_err'].values)
    ecc.codes_set_array(bid,'periodOfWaves->firstOrderStatisticalValue',2*np.ones(nobs))
    ecc.codes_set(bid,'firstOrderStatistics',9) # set the fos to 9 best estimate of standard deviation
    #close and release
    ecc.codes_set(bid,"pack",1)
    with open(outputFile,'wb') as fout:
        ecc.codes_write(bid,fout)
    print(f" encoded bufr into file : {outputFile}")
    ecc.codes_release(bid)
    return 


def main():
    inputFile="/home/marg/ECCODES_2025/ecTrain/bitmap.csv"
    outputFile="/home/marg/ECCODES_2025/ecTrain/bitmap_2e.b"
    df=pd.read_csv(inputFile)
    bufr_encode_with2e(df,outputFile)

if __name__=="__main__":
    main()

 number of obs: 4
 bitmap size :  4 
 bit map [1 1 0 0]
 encoded bufr into file : /home/marg/ECCODES_2025/ecTrain/bitmap_2e.b


The possible values for the first order statistics 8023
2 Maximum value
3 Minimum value
4 Mean value
...

9 best estimate of standard deviation ( N-1) 