# Change long names
This notebook is designed to modify the `long_name` attribute of the variables, using the `altLabel` from the P35 Vocabulary.
In addition the `long_name` is modified so that the variable name appears first.

## Examples
```bash
Water body dissolved inorganic nitrogen -> Inorganic nitrogen_dissolved 
Deepest depth for Water body dissolved oxygen concentration → Oxygen deepest depth
```

## Input
The notebook will loop over the netCDF files located inside the directory `datadir`.

<div class="alert alert-block alert-info">
<b>Info:</b> If you need to modify the variable names, use the script <code>./src/bash/change_varnames.bash</code>.
</div>

In [1]:
using Pkg
Pkg.activate("../")
Pkg.instantiate()
using NCDatasets
using Glob
using DIVAnd

[32m[1m  Activating[22m[39m project at `~/Projects/EMODnet/EMODnet-Chemistry/src/julia`


In [37]:
standard_names_dict =  Dict(
    "Water_body_ammonium" => "mole_concentration_of_ammonium_in_sea_water",
    "Water_body_chlorophyll-a" => "mass_concentration_of_chlorophyll_a_in_sea_water",
    "Water_body_phosphate" => "mole_concentration_of_phosphate_in_sea_water",
    "Water_body_dissolved_oxygen_concentration" => "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water",
    "Water_body_silicate" => "mole_concentration_of_silicate_in_sea_water",
    "Water_body_dissolved_inorganic_nitrogen_(DIN)" => "mole_concentration_of_dissolved_inorganic_nitrogen_in_sea_water",
    "Water_body_dissolved_inorganic_nitrogen" => "mole_concentration_of_dissolved_inorganic_nitrogen_in_sea_water",
    "Water_body_dissolved_oxygen_saturation" => "fractional_saturation_of_oxygen_in_sea_water",
    "Water body_ammonium" => "mole_concentration_of_ammonium_in_sea_water",
    "Water body chlorophyll-a" => "mass_concentration_of_chlorophyll_a_in_sea_water",
    "Water body phosphate" => "mole_concentration_of_phosphate_in_sea_water",
    "Water body dissolved oxygen concentration" => "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water",
    "Water body silicate" => "mole_concentration_of_silicate_in_sea_water",
    "Water body dissolved inorganic nitrogen (DIN)" => "mole_concentration_of_dissolved_inorganic_nitrogen_in_sea_water",
    "Water body dissolved inorganic nitrogen" => "mole_concentration_of_dissolved_inorganic_nitrogen_in_sea_water",
    "Water body dissolved oxygen saturation" => "fractional_saturation_of_oxygen_in_sea_water")

Dict{String, String} with 16 entries:
  "Water_body_ammonium"     => "mole_concentration_of_ammonium_in_sea_water"
  "Water_body_dissolved_ox… => "fractional_saturation_of_oxygen_in_sea_water"
  "Water body chlorophyll-… => "mass_concentration_of_chlorophyll_a_in_sea_wate…
  "Water_body_phosphate"    => "mole_concentration_of_phosphate_in_sea_water"
  "Water body dissolved in… => "mole_concentration_of_dissolved_inorganic_nitro…
  "Water body dissolved ox… => "fractional_saturation_of_oxygen_in_sea_water"
  "Water_body_silicate"     => "mole_concentration_of_silicate_in_sea_water"
  "Water_body_dissolved_in… => "mole_concentration_of_dissolved_inorganic_nitro…
  "Water_body_dissolved_ox… => "mole_concentration_of_dissolved_molecular_oxyge…
  "Water_body_dissolved_in… => "mole_concentration_of_dissolved_inorganic_nitro…
  "Water body dissolved ox… => "mole_concentration_of_dissolved_molecular_oxyge…
  "Water body dissolved in… => "mole_concentration_of_dissolved_inorganic_nitro…
  "Wate

## Generate list of files

In [56]:
#datadir = "/home/ctroupin/data/EMODnet-Chemistry/Eutrophication2024/Results/ogs04/All_European_Seas-water_body/"

# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Mediterranean_Sea/"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Arctic_Ocean"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Baltic_Sea/"
datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Northeast_Atlantic_Ocean/"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Mediterranean_Sea_-_Po_River"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Baltic_Sea_-_Gulf_of_Riga/"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Black_Sea-_Danube_Delta"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Northeast_Atlantic_Ocean_-_Loire_River"

datafilelist = glob("*nc", datadir);
@show(datafilelist);

datafilelist = ["/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Northeast_Atlantic_Ocean/Water_body_chlorophyll-a.4Danl.nc", "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Northeast_Atlantic_Ocean/Water_body_dissolved_inorganic_nitrogen.4Danl.nc", "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Northeast_Atlantic_Ocean/Water_body_dissolved_oxygen_concentration.4Danl.nc", "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Northeast_Atlantic_Ocean/Water_body_phosphate.4Danl.nc", "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Northeast_Atlantic_Ocean/Water_body_silicate.4Danl.nc"]


### Change the long_name attribute

In [57]:
for datafile in datafilelist
    @info("Working on file $(datafile)")
    NCDataset(datafile, "a") do ds
        varlist = keys(ds)

        # Identify the main variable name
        mainvar_ = first(varlist)
        # mainvar_ = first(split(last(keys(ds)), "_"))
        mainvarshort = replace(mainvar_, "Water_body_" => "", "Water body " => "", "_" => " ")
        @info("Main variable: $(mainvar_), $(mainvarshort)")

        # Get the parameter URN
        parameter_keyword_urn = ds.attrib["parameter_keyword_urn"]
        @info(parameter_keyword_urn)

        # Generate alternative label using Vocab
        newlabel = Vocab.altLabel(parameter_keyword_urn)
        @info("The new label is $(newlabel)")
        @info("")

        # Loop on variables
        for varname in varlist
            @debug(varname)
            
            # Work only on variables related to parameter (not coordinates etc)
            if startswith(varname, mainvar_)

                varlongname = ds[varname].attrib["long_name"]
                @debug("Long name: $(varlongname)")

                # replace "for" with "of"
                newvarlongname = replace(varlongname, "for " => "of ", "_" => " ")
                newvarlongname = replace(newvarlongname, "Water body " => "")
                @debug(newvarlongname)

                namesplit = split(newvarlongname, mainvarshort)
                if namesplit[end] == ""
                    newvarlongname = "$(newlabel) $(replace(first(namesplit), " of " => ""))"
                else
                    newvarlongname = "$(newlabel) $(replace(first(namesplit), " of " => "")) $(last(namesplit))"
                end
                newvarlongname = replace(newvarlongname, "   " => " ", "  " => " ", "(DIN) " => "", "(din) " => "")
                newvarlongname = uppercasefirst(lowercase(newvarlongname))

                @info("$(varlongname) → $(newvarlongname)")

                # Perform change
                ds[varname].attrib["long_name"] = rstrip(newvarlongname)
            end
        end
    end
end

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWorking on file /media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Northeast_Atlantic_Ocean/Water_body_chlorophyll-a.4Danl.nc
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mMain variable: Water_body_chlorophyll-a, chlorophyll-a
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mSDN:P35::EPC00105
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mThe new label is Chlorophyll-a_particulate
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mChlorophyll-a_particulate → Chlorophyll-a_particulate chlorophyll-a particulate chlorophyll-a particulate
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mChlorophyll-a_particulate masked using relative error threshold 0.3 → Chlorophyll-a_particulate chlorophyll-a particulate masked using relative error threshold 0.3 chlorophyll-a particulate masked using relative error threshold 0.3
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m

## Modify the standard names
https://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html

In [54]:
for datafile in datafilelist
    @info("Working on file $(datafile)")
    NCDataset(datafile, "a") do ds
        varlist = keys(ds)

        # Identify the main variable name
        mainvar_ = ds.attrib["parameter_keyword"]
        mainvar_ = replace(mainvar_, " " => "_")
        # mainvar_ = first(varlist)
        # mainvar_ = first(split(last(keys(ds)), "_"))

        # Loop on variables
        for varname in varlist
            @debug("Variable name: $(varname)")
            
            # Work only on variables related to parameter (not coordinates etc)
            if startswith(varname, mainvar_)
                varattribs = ds[varname].attrib

                # Check if there is a standard name
                if haskey(varattribs, "standard_name")
                    stdname = varattribs["standard_name"]
                    @info("++++++++++++++++++++");

                    # Perform change
                    @info("$(stdname) => $(standard_names_dict[stdname])")
                    ds[varname].attrib["standard_name"] = standard_names_dict[stdname]
                end  
            end
        end
    end
end

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWorking on file /media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Northeast_Atlantic_Ocean_-_Loire_River/Water_body_chlorophyll-a.nc
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m++++++++++++++++++++
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a => mass_concentration_of_chlorophyll_a_in_sea_water
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m++++++++++++++++++++
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a => mass_concentration_of_chlorophyll_a_in_sea_water
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m++++++++++++++++++++
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a => mass_concentration_of_chlorophyll_a_in_sea_water
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m++++++++++++++++++++
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a => mass_concentration_of_chlorophyll_a_in_sea_water
[36m[1m[