# Convert ERDDAP to BUFR 
## Explore BUFR Format

This is inspired by example:
https://github.com/wmo-im/CSV2BUFR

## GOOGLE COLAB Installation


In [None]:
# Let's first install CONDA on the google Colab to make easier to install ecCodes
################################################################################
# INSTALL CONDA ON GOOGLE COLAB
################################################################################
! wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh
! chmod +x Miniconda3-py37_4.8.2-Linux-x86_64.sh
! bash ./Miniconda3-py37_4.8.2-Linux-x86_64.sh -b -f -p /usr/local
import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')

In [None]:
# Install ecCodes
# That make take a little while
!conda install -c conda-forge eccodes

!pip install eccodes

# Import Packages

In [None]:
from eccodes import *

import pandas as pd
from ipywidgets import interact_manual

import json
import re

# Review BUFR Tables

Here's a small interactive tool to review the different BUFR tables hosted on the wmo-im github.

In [None]:
# Search BUFR Tables on  github
pd.options.display.max_rows = 800
table = ["A", "C", "D"]
search = ""


def get_bufr_table(table, search):
    bufr_link = f"https://raw.githubusercontent.com/wmo-im/BUFR4/master/txt/BUFR_Table{table}_en.txt"
    df_bufr = pd.read_csv(bufr_link)
    if search:
        df_bufr = df_bufr.query(search)
    return df_bufr


interact_manual(get_bufr_table, table=table, search=search)


In [None]:
# List the different templates in Table D related to ocean stuff
pd.set_option("display.max_colwidth", None)
df_tableD = pd.read_csv(
    f"https://raw.githubusercontent.com/wmo-im/BUFR4/master/txt/BUFR_TableD_en.txt"
)
df_tableD[df_tableD["CategoryOfSequences_en"].str.contains("Ocean")][
    ["Category", "CategoryOfSequences_en", "FXY1", "Title_en"]
].drop_duplicates()


# Test ecCodes locally 

Let's use by default the template **Sequence for the representation of data from moored buoys** (315008) 

In [None]:
# Just Test it locally and see if the installation worked
ibufr = codes_bufr_new_from_samples('BUFR4')       # Creates a new valid message id from a BUFR sample
codes_set(ibufr, 'edition', 4)                     # BUFR edition number
codes_set(ibufr, 'masterTableNumber', 0)           # BUFR master table. Zero: standard WMO FM 94 BUFR tables
codes_set(ibufr, 'masterTablesVersionNumber', 31)  # Version number of master table used
 
ivalues = (315008)                                 # Template to be used
codes_set(ibufr, 'unexpandedDescriptors', ivalues) # Key name to encode the sequence number is unexpandedDescriptors

fout = open('reference.bufr', 'wb')                 # Open output file
codes_write(ibufr, fout)                           # Write the message to output file
codes_release(ibufr)                               # Release the BUFR message from memory
fout.close()                                       # Close the file

# Convert BUFR generated file to a plain ASCII readable format 
!bufr_dump -p reference.bufr > reference.plain
!bufr_dump -j s reference.bufr > reference.json

# Print the plain format
with open('reference.plain') as f:
    bufr_plain = f.read()
    print(bufr_plain)

# And a python code to encode such data template
!bufr_dump -js -Epython reference.bufr > enconding.py 

In [None]:
bufr_plain = (
    bufr_plain.replace("{\n", "{")
    .replace("  ", " ")
    .replace("{", "(")
    .replace("}", ")")
    .replace(": MISSING", ': "MISSING"')
)


In [None]:
# Try to generate a json file use to map BUFR data to a dataset variables and attributes
mapping_dict = {
    key: value.replace("\n", "")
    for key, value in re.findall("(.*)\=(.*\{.*\n*.*\}|.*)", bufr_plain)
    if "->" not in key
}

# Write Mapping to a json file
with open("mapping.json", "w") as f:
    mapping_json = json.dumps(mapping_dict, indent=4)
    f.write(mapping_json)

print(mapping_json)

# TODO the JSON s format as more information that could be usefull while building the mapping


# Try a with a CIOOS dataset on ERDDAP


## Retrieve data through ERDDAP

In [None]:
# Single Surface Buoy Dataset Real-time Data
erddap = "https://catalogue.hakai.org/erddap"
dataset_id = "HakaiKCBuoyResearch"

df = pd.read_csv(f"{erddap}/tabledap/{dataset_id}.csv", skiprows=[1])
meta = pd.read_csv(f"{erddap}/info/{dataset_id}/index.csv")


In [None]:
# QARTOD Could potentially run some QARTOD test here


## Define BUFR Template

### Workflow
- Load mapping.json file
- Try to make an educated guess on the mapping if the different terms to a metadata or data variable 
     - Use standard_name
     - Confirm units are the same or add a scale_factor term
     - Ideally all the terms retrieved should based on some metadata standards. >= CF 1.6, >=ACDD 1.3 or >=IOOS 1.2 Profile Standard
- Save the resulting json mapping in a public repo


Once the mapping is available, use mapping to convert to BUFR from ERDDAP. This could potentially be automated if all the terms are matched to some standard global and variable attributes.

## What IOOS do
The following link has a lot of information on how IOOS is ingesting ERDDAP data to GTS and the required ERDDAP dataset format and attributes:

https://ioos.github.io/ioos-metadata/ioos-metadata-profile-v1-2.html#requirements-for-ioos-dataset-ndbcgts-ingest

## Manual mapping

In [None]:
# Define mapping to BUFR within a dictionary
# Copy and paste the mapping.json file here
bufr_mapping = {
    "delayedDescriptorReplicationFactor": (1, 1, 1, 1),
    "shortDelayedDescriptorReplicationFactor": (1, 1, 1, 1, 1, 1, 1),
    "edition": "4",
    "masterTableNumber": "0",
    "bufrHeaderCentre": "98",
    "bufrHeaderSubCentre": "0",
    "updateSequenceNumber": "0",
    "dataCategory": "1",
    "internationalDataSubCategory": "255",
    "dataSubCategory": "110",
    "masterTablesVersionNumber": "31",
    "localTablesVersionNumber": "0",
    "typicalYear": "2012",
    "typicalMonth": "10",
    "typicalDay": "31",
    "typicalHour": "0",
    "typicalMinute": "2",
    "typicalSecond": "0",
    "numberOfSubsets": "1",
    "observedData": "1",
    "compressedData": "0",
    "unexpandedDescriptors": "315008",
    "marineObservingPlatformIdentifier": "MISSING",
    "stationOrSiteName": "MISSING",
    "dataBuoyType": "MISSING",
    "year": "MISSING",
    "month": "MISSING",
    "day": "MISSING",
    "hour": "MISSING",
    "minute": "MISSING",
    "latitude": "MISSING",
    "longitude": "MISSING",
    "nonCoordinatePressure": "MISSING",
    "pressureReducedToMeanSeaLevel": "MISSING",
    "#1#heightOfSensorAboveWaterSurface": "MISSING",
    "airTemperature": "MISSING",
    "dewpointTemperature": "MISSING",
    "relativeHumidity": "MISSING",
    "#2#heightOfSensorAboveWaterSurface": "MISSING",
    "#1#timeSignificance": "MISSING",
    "#1#timePeriod": "MISSING",
    "windDirection": "MISSING",
    "windSpeed": "MISSING",
    "#2#timeSignificance": "MISSING",
    "#2#timePeriod": "MISSING",
    "maximumWindGustSpeed": "MISSING",
    "#3#timePeriod": "MISSING",
    "#3#heightOfSensorAboveWaterSurface": "MISSING",
    "#1#temperatureObservationPrecision": "MISSING",
    "#1#depthBelowWaterSurface": "MISSING",
    "seaSurfaceTemperature": "MISSING",
    "horizontalVisibility": "MISSING",
    "#4#timePeriod": "MISSING",
    "totalPrecipitationOrTotalWaterEquivalent": "MISSING",
    "#5#timePeriod": "MISSING",
    "longWaveRadiationIntegratedOverPeriodSpecified": "MISSING",
    "shortWaveRadiationIntegratedOverPeriodSpecified": "MISSING",
    "netRadiationIntegratedOverPeriodSpecified": "MISSING",
    "globalSolarRadiationIntegratedOverPeriodSpecified": "MISSING",
    "diffuseSolarRadiationIntegratedOverPeriodSpecified": "MISSING",
    "directSolarRadiationIntegratedOverPeriodSpecified": "MISSING",
    "#1#durationOfWaveRecord": "MISSING",
    "significantWaveHeight": "MISSING",
    "maximumWaveHeight": "MISSING",
    "averageWavePeriod": "MISSING",
    "spectralPeakWavePeriod": "MISSING",
    "directionFromWhichDominantWavesAreComing": "MISSING",
    "directionalSpreadOfDominantWave": "MISSING",
    "#2#durationOfWaveRecord": "MISSING",
    "maximumNonDirectionalSpectralWaveDensity": "MISSING",
    "wavebandCentralFrequency": "MISSING",
    "spectralWaveDensity": "MISSING",
    "meanDirectionFromWhichWavesAreComing": "MISSING",
    "principalDirectionFromWhichWavesAreComing": "MISSING",
    "firstNormalizedPolarCoordinateFromFourierCoefficients": "MISSING",
    "secondNormalizedPolarCoordinateFromFourierCoefficients": "MISSING",
    "#2#temperatureObservationPrecision": "MISSING",
    "#1#indicatorForDigitization": "MISSING",
    "#2#depthBelowWaterSurface": "MISSING",
    "#1#oceanographicWaterTemperature": "MISSING",
    "#3#temperatureObservationPrecision": "MISSING",
    "#2#indicatorForDigitization": "MISSING",
    "methodOfSalinityOrDepthMeasurement": "MISSING",
    "#3#depthBelowWaterSurface": "MISSING",
    "#2#oceanographicWaterTemperature": "MISSING",
    "salinity": "MISSING",
    "durationAndTimeOfCurrentMeasurement": "MISSING",
    "#4#depthBelowWaterSurface": "MISSING",
    "currentDirection": "MISSING",
    "speedOfCurrent": "MISSING",
}


In [None]:
#TODO Make this work !
def encode_bufr(mapping, df):
    """
    Encode bufr file from a mapping dictionary and a pandas dataframe
    """

    for id, row in df.iterrows():
        ibufr = codes_bufr_new_from_samples("BUFR4")
        for key, value in mapping.items():
            print(f"{key}: {value}")
            if type(value) in (list,tuple):
                codes_set_array(ibufr, key, value)
            elif value in df:
                codes_set(ibufr, key, row[value])
            else:
                codes_set(ibufr, key, value)

        # Encode the keys back in the data section
        codes_set(ibufr, "pack", 1)

        outfile = open("outfile.bufr", "wb")
        codes_write(ibufr, outfile)
        codes_release(ibufr)


In [None]:
# Let's give a try
encode_bufr(bufr_mapping, df)
