# Convert CSV to BUFR

This is inspired by example:
https://github.com/wmo-im/CSV2BUFR


In [1]:
# Let's first install CONDA on the google Colab to make easier to install ecCodes
################################################################################
# INSTALL CONDA ON GOOGLE COLAB
################################################################################
! wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh
! chmod +x Miniconda3-py37_4.8.2-Linux-x86_64.sh
! bash ./Miniconda3-py37_4.8.2-Linux-x86_64.sh -b -f -p /usr/local
import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')

--2021-11-03 19:48:00--  https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh
Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.130.3, 104.16.131.3, 2606:4700::6810:8303, ...
Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.130.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 85055499 (81M) [application/x-sh]
Saving to: ‘Miniconda3-py37_4.8.2-Linux-x86_64.sh’


2021-11-03 19:48:01 (165 MB/s) - ‘Miniconda3-py37_4.8.2-Linux-x86_64.sh’ saved [85055499/85055499]

PREFIX=/usr/local
Unpacking payload ...
Collecting package metadata (current_repodata.json): - \ | done
Solving environment: - done

## Package Plan ##

  environment location: /usr/local

  added / updated specs:
    - _libgcc_mutex==0.1=main
    - asn1crypto==1.3.0=py37_0
    - ca-certificates==2020.1.1=0
    - certifi==2019.11.28=py37_0
    - cffi==1.14.0=py37h2e261b9_0
    - chardet==3.0.4=py37_1003
    - conda-package-handling==1.6.0=py37h7b6447c_0
    

In [2]:
# Install ecCodes
# That make take a little while
!conda install -c conda-forge eccodes

!pip install eccodes

Collecting package metadata (current_repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - done
Solving environment: | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - done


  current version: 4.8.2
  latest version: 4.10.3

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /usr/local

  added / updated specs:
    - eccodes


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _libgcc_mutex-0.1          |      conda_forge           3 KB  conda-forge
    _openmp_mute

# Import Packages

In [3]:
from eccodes import *

import pandas as pd
import os
from ipywidgets import interact_manual

import json
import re

# Review BUFR Tables

In [4]:
# Search BUFR Tables on  github
pd.options.display.max_rows = 200
table = ['A','B','C','D']
search = ''
def get_bufr_table(table,search):
    bufr_link = f"https://raw.githubusercontent.com/wmo-im/BUFR4/master/txt/BUFR_Table{table}_en.txt"
    df_bufr = pd.read_csv(bufr_link)
    if search:
        df_bufr = df_bufr.query(search)
    return df_bufr

interact_manual(get_bufr_table,table=table,search=search)

interactive(children=(Dropdown(description='table', options=('A', 'B', 'C', 'D'), value='A'), Text(value='', d…

<function __main__.get_bufr_table>

# Test ecCodes locally 

In [6]:
# Just Test it locally and see if the installation worked
ibufr = codes_bufr_new_from_samples('BUFR4')       # Creates a new valid message id from a BUFR sample
codes_set(ibufr, 'edition', 4)                     # BUFR edition number
codes_set(ibufr, 'masterTableNumber', 0)           # BUFR master table. Zero: standard WMO FM 94 BUFR tables
codes_set(ibufr, 'masterTablesVersionNumber', 31)  # Version number of master table used
 
ivalues = (307092)                                 # Template to be used
codes_set(ibufr, 'unexpandedDescriptors', ivalues) # Key name to encode the sequence number is unexpandedDescriptors

fout = open('reference.bufr', 'wb')                 # Open output file
codes_write(ibufr, fout)                           # Write the message to output file
codes_release(ibufr)                               # Release the BUFR message from memory
fout.close()                                       # Close the file

# Convert BUFR generated file to a plain ASCII readable format 
!bufr_dump -p reference.bufr > reference.plain
!bufr_dump -j s reference.bufr > reference.json

# Print the plain format
with open('reference.plain') as f:
    bufr_plain = f.read()
    print(bufr_plain)

# And a python code to encode such data template
!bufr_dump -js -Epython reference.bufr > enconding.py 

delayedDescriptorReplicationFactor= {
      1, 1, 1, 1}
shortDelayedDescriptorReplicationFactor= {
      1, 1, 1, 1, 1, 1, 1, 1, 1}
edition=4
masterTableNumber=0
bufrHeaderCentre=98
bufrHeaderSubCentre=0
updateSequenceNumber=0
dataCategory=1
internationalDataSubCategory=255
dataSubCategory=110
masterTablesVersionNumber=31
localTablesVersionNumber=0
typicalYear=2012
typicalMonth=10
typicalDay=31
typicalHour=0
typicalMinute=2
typicalSecond=0
numberOfSubsets=1
observedData=1
compressedData=0
unexpandedDescriptors=307092
wigosIdentifierSeries=MISSING
wigosIssuerOfIdentifier=MISSING
wigosIssueNumber=MISSING
wigosLocalIdentifierCharacter=MISSING
blockNumber=MISSING
stationNumber=MISSING
longStationName=MISSING
year=MISSING
month=MISSING
day=MISSING
hour=MISSING
minute=MISSING
latitude=MISSING
longitude=MISSING
heightOfStationGroundAboveMeanSeaLevel=MISSING
observationSequenceNumber=MISSING
heightOfBarometerAboveMeanSeaLevel=MISSING
nonCoordinatePressure=MISSING
nonCoordinatePressure->associa

In [21]:
# Try to generate a json file use to map BUFR data to a dataset variables and attributes
mapping_dict = {key:value.replace('\n','') for key, value in re.findall('(.*)\=(.*\{.*\n*.*\}|.*)',bufr_plain) if '->' not in key}

# Write Mapping to a json file
with open('mapping.json','w') as f:
    mapping_json = json.dumps(mapping_dict,indent=4)
    f.write(mapping_json)

print(mapping_json)

# TODO the JSON s format as more information that could be usefull while building the mapping

{
    "delayedDescriptorReplicationFactor": " {      1, 1, 1, 1}",
    "shortDelayedDescriptorReplicationFactor": " {      1, 1, 1, 1, 1, 1, 1, 1, 1}",
    "edition": "4",
    "masterTableNumber": "0",
    "bufrHeaderCentre": "98",
    "bufrHeaderSubCentre": "0",
    "updateSequenceNumber": "0",
    "dataCategory": "1",
    "internationalDataSubCategory": "255",
    "dataSubCategory": "110",
    "masterTablesVersionNumber": "31",
    "localTablesVersionNumber": "0",
    "typicalYear": "2012",
    "typicalMonth": "10",
    "typicalDay": "31",
    "typicalHour": "0",
    "typicalMinute": "2",
    "typicalSecond": "0",
    "numberOfSubsets": "1",
    "observedData": "1",
    "compressedData": "0",
    "unexpandedDescriptors": "307092",
    "wigosIdentifierSeries": "MISSING",
    "wigosIssuerOfIdentifier": "MISSING",
    "wigosIssueNumber": "MISSING",
    "wigosLocalIdentifierCharacter": "MISSING",
    "blockNumber": "MISSING",
    "stationNumber": "MISSING",
    "longStationName": "MISSIN

# Try a with a CIOOS dataset on ERDDAP


## Retrieve data through ERDDAP

In [8]:
# Single Surface Buoy Dataset Real-time Data
erddap =  "https://catalogue.hakai.org/erddap"
dataset_id = "HakaiKCBuoyResearch"

df = pd.read_csv(f"{erddap}/tabledap/{dataset_id}.csv",skiprows=[1])
meta = pd.read_csv(f"{erddap}/info/{dataset_id}/index.csv")

In [None]:
# QARTOD Could potentially run some QARTOD test here

## Define BUFR Template

### Define Mapping

In [44]:
# Define mapping to BUFR within a dictionary
# Copy and paste the mapping.json file here
bufr_mapping ={
    "delayedDescriptorReplicationFactor": ( 1, 1, 1, 1,),
    "shortDelayedDescriptorReplicationFactor": (1, 1, 1, 1, 1, 1, 1, 1, 1,),
    "edition": "4",
    "masterTableNumber": "0",
    "bufrHeaderCentre": "98",
    "bufrHeaderSubCentre": "0",
    "updateSequenceNumber": "0",
    "dataCategory": "1",
    "internationalDataSubCategory": "255",
    "dataSubCategory": "110",
    "masterTablesVersionNumber": "31",
    "localTablesVersionNumber": "0",
    "typicalYear": "2012",
    "typicalMonth": "10",
    "typicalDay": "31",
    "typicalHour": "0",
    "typicalMinute": "2",
    "typicalSecond": "0",
    "numberOfSubsets": "1",
    "observedData": "1",
    "compressedData": "0",
    "unexpandedDescriptors": "307092",
    "wigosIdentifierSeries": "MISSING",
    "wigosIssuerOfIdentifier": "MISSING",
    "wigosIssueNumber": "MISSING",
    "wigosLocalIdentifierCharacter": "MISSING",
    "blockNumber": "MISSING",
    "stationNumber": "MISSING",
    "longStationName": "MISSING",
    "year": "MISSING",
    "month": "MISSING",
    "day": "MISSING",
    "hour": "MISSING",
    "minute": "MISSING",
    "latitude": "MISSING",
    "longitude": "MISSING",
    "heightOfStationGroundAboveMeanSeaLevel": "MISSING",
    "observationSequenceNumber": "MISSING",
    "heightOfBarometerAboveMeanSeaLevel": "MISSING",
    "nonCoordinatePressure": "MISSING",
    "pressureReducedToMeanSeaLevel": "MISSING",
    "pressure": "MISSING",
    "nonCoordinateGeopotentialHeight": "MISSING",
    "#1#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform": "MISSING",
    "#1#surfaceQualifierForTemperatureData": "MISSING",
    "airTemperature": "MISSING",
    "dewpointTemperature": "MISSING",
    "#1#relativeHumidity": "MISSING",
    "#2#relativeHumidity": "MISSING",
    "#2#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform": "MISSING",
    "#2#surfaceQualifierForTemperatureData": "MISSING",
    "#1#depthBelowLandSurface": "MISSING",
    "soilTemperature": "MISSING",
    "soilMoisture": "MISSING",
    "#2#depthBelowLandSurface": "MISSING",
    "attributeOfFollowingValue": "MISSING",
    "horizontalVisibility": "MISSING",
    "cloudCoverTotal": "MISSING",
    "#1#verticalSignificanceSurfaceObservations": "MISSING",
    "cloudAmount": "MISSING",
    "heightOfBaseOfCloud": "MISSING",
    "#2#verticalSignificanceSurfaceObservations": "MISSING",
    "stateOfGround": "MISSING",
    "totalSnowDepth": "MISSING",
    "#1#timePeriod": "MISSING",
    "presentWeather": "MISSING",
    "#2#timePeriod": "MISSING",
    "totalPrecipitationOrTotalWaterEquivalent": "MISSING",
    "#3#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform": "MISSING",
    "#1#timeSignificance": "MISSING",
    "#3#timePeriod": "MISSING",
    "windDirection": "MISSING",
    "windSpeed": "MISSING",
    "#2#timeSignificance": "MISSING",
    "maximumWindGustDirection": "MISSING",
    "maximumWindGustSpeed": "MISSING",
    "#4#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform": "MISSING",
    "#4#timePeriod": "MISSING",
    "totalSunshine": "MISSING",
    "#5#timePeriod": "MISSING",
    "#1#longWaveRadiationIntegratedOverPeriodSpecified": "MISSING",
    "#2#longWaveRadiationIntegratedOverPeriodSpecified": "MISSING",
    "shortWaveRadiationIntegratedOverPeriodSpecified": "MISSING",
    "globalSolarRadiationIntegratedOverPeriodSpecified": "MISSING",
    "diffuseSolarRadiationIntegratedOverPeriodSpecified": "MISSING",
    "directSolarRadiationIntegratedOverPeriodSpecified": "MISSING",
    "#6#timePeriod": "MISSING",
    "#1#spectrographicWavelength": "MISSING",
    "#1#spectrographicWidth": "MISSING",
    "#1#globalUvIrradiation": "MISSING",
    "#2#spectrographicWavelength": "MISSING",
    "#2#spectrographicWidth": "MISSING",
    "#2#globalUvIrradiation": "MISSING"
}

In [45]:
def encode_bufr(mapping,df):


    for id,row in df.iterrows():
        ibufr = codes_bufr_new_from_samples('BUFR4')
        for key, value in mapping.items():
            print(f"{key}: {value}")
            if key in ['inputDelayedDescriptorReplicationFactor','inputShortDelayedDescriptorReplicationFactor']:
                codes_set_array(ibufr,key,value)
            elif value in df:
                codes_set(ibufr,key,row[value])
            else:
                codes_set(ibufr,key,value)

        # Encode the keys back in the data section
        codes_set(ibufr, 'pack', 1)

        outfile = open('outfile.bufr','wb')
        codes_write(ibufr, oufile)
        codes_release(ibufr)
        


In [48]:
# Let's give a try
encode_bufr(bufr_mapping,df)

delayedDescriptorReplicationFactor: (1, 1, 1, 1)


GribInternalError: ignored