# Change long names
This notebook is designed to modify the `long_name` attribute of the variables, using the `altLabel` from the P35 Vocabulary.
In addition the `long_name` is modified so that the variable name appears first.

## Examples
```bash
Water body dissolved inorganic nitrogen -> Inorganic nitrogen_dissolved 
Deepest depth for Water body dissolved oxygen concentration → Oxygen deepest depth
```

## Input
The notebook will loop over the netCDF files located inside the directory `datadir`.

<div class="alert alert-block alert-info">
<b>Info:</b> If you need to modify the variable names, use the script <code>./src/bash/change_varnames.bash</code>.
</div>

In [70]:
using Pkg
Pkg.activate("../")
Pkg.instantiate()
using NCDatasets
using Glob
using DIVAnd
include("../emodnetchemistry.jl")

[32m[1m  Activating[22m[39m project at `~/Projects/EMODnet/EMODnet-Chemistry/src/julia`


Main.EMODnetChemistry

## Generate list of files

In [83]:
#datadir = "/home/ctroupin/data/EMODnet-Chemistry/Eutrophication2024/Results/ogs04/All_European_Seas-water_body/"

# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Mediterranean_Sea/"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Arctic_Ocean"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Baltic_Sea/"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Northeast_Atlantic_Ocean/"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/North_Sea/"
datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/"

# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Mediterranean_Sea_-_Po_River"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Baltic_Sea_-_Gulf_of_Riga/"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Black_Sea-_Danube_Delta"
# datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/Coastal_areas-water_body/Northeast_Atlantic_Ocean_-_Loire_River"

datafilelist = glob("*nc", datadir);
datafilelist = EMODnetChemistry.get_file_list(datadir);
@info("Found $(length(datafilelist)) files")

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mNo variable selected
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mNo season selected
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mFound 27 files


In [84]:
datafilelist

27-element Vector{Any}:
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 104 bytes ⋯ [22m[39m"_oxygen_concentration.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 83 bytes ⋯ [22m[39m"/Water_body_phosphate.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 82 bytes ⋯ [22m[39m"n/Water_body_silicate.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 85 bytes ⋯ [22m[39m"er_body_chlorophyll-a.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 100 bytes ⋯ [22m[39m"ed_inorganic_nitrogen.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 102 bytes ⋯ [22m[39m"_oxygen_concentration.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 81 bytes ⋯ [22m[39m"/Water_body_phosphate.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 80 bytes ⋯ [22m[39m"a/Water_body_silicate.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 84 bytes ⋯ [22m[39m"er_body_chlorophyll-a.4Danl.nc"
 "/media/ctroupin/T7 Shield/data/"[93m[1m ⋯ 99 bytes ⋯ [22m[39m"ed_in

### Change the long_name attribute

In [65]:
@info("Working in directory $(datadir)");
for datafile in datafilelist
    @info("Working on file $(basename(datafile))")
    NCDataset(datafile, "a") do ds
        varlist = keys(ds)

        # Identify the main variable name
        mainvar_ = first(varlist)
        # mainvar_ = first(split(last(keys(ds)), "_"))
        mainvarshort = replace(mainvar_, "Water_body_" => "", "Water body " => "", "_" => " ")
        @info("Main variable: $(mainvar_), $(mainvarshort)")

        # Get the parameter URN
        parameter_keyword_urn = ds.attrib["parameter_keyword_urn"]
        @info(parameter_keyword_urn)

        # Generate alternative label using Vocab
        newlabel = Vocab.altLabel(parameter_keyword_urn)
        @info("The new label is $(newlabel)")
        @info("")

        # Loop on variables
        for variable in varlist
            if startswith(variable, mainvar_)
                @info(" ")
                @info(variable)
                    
                if endswith(variable, "relerr")
                    @debug("Relative error")
                    new_longname = "$(newlabel) relative error"
                    @info(new_longname)
                elseif endswith(variable, "relerr")
                    @debug("Relative error")
                    new_longname = "$(newlabel) relative error"
                    @info(new_longname)
                elseif endswith(variable, "deepest")
                    @debug("Deepest value")
                    new_longname = "$(newlabel) deepest values"
                    @info(new_longname)
                elseif endswith(variable, "deepest_depth")
                    @debug("Deepest depth")
                    new_longname = "$(newlabel) deepest depth"
                    @info(new_longname)
                elseif endswith(variable, "deepest_L2")
                    @debug("Deepest masked at 50%")
                    new_longname = "$(newlabel) deepest values masked using relative error threshold 0.5"
                    @info(new_longname)
                elseif endswith(variable, "deepest_L1")
                    @debug("Deepest masked at 30%")
                    new_longname = "$(newlabel) deepest values masked using relative error threshold 0.3"
                    @info(new_longname)
                elseif endswith(variable, "_L2")
                    @debug("Masked at 50%")
                    new_longname = "$(newlabel) masked using relative error threshold 0.5"
                    @info(new_longname)
                elseif endswith(variable, "_L1")
                    @debug("Masked at 30%")
                    new_longname = "$(newlabel) masked using relative error threshold 0.3"
                    @info(new_longname)
                else
                    @debug("Full variable")
                    new_longname = "$(newlabel)"
                    @info(new_longname)
                end
                
                # Perform change
                ds[variable].attrib["long_name"] = rstrip(new_longname)
            end
    
        end
    end
    @info("");
end

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWorking in directory /media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/North_Sea/
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWorking on file Water_body_chlorophyll-a.4Danl.nc
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mMain variable: Water_body_chlorophyll-a, chlorophyll-a
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mSDN:P35::EPC00105
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mThe new label is Chlorophyll-a_particulate
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m 
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mChlorophyll-a_particulate
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m 
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a_L1
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mChlorophyll-a_particulate masked using relative error threshold 

## Modify the standard names
https://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html

In [66]:
for datafile in datafilelist
    @info("Working on file $(datafile)")
    NCDataset(datafile, "a") do ds
        varlist = keys(ds)

        # Identify the main variable name
        mainvar_ = ds.attrib["parameter_keyword"]
        mainvar_ = replace(mainvar_, " " => "_")
        # mainvar_ = first(varlist)
        # mainvar_ = first(split(last(keys(ds)), "_"))

        # Loop on variables
        for varname in varlist
            @debug("Variable name: $(varname)")
            
            # Work only on variables related to parameter (not coordinates etc)
            if startswith(varname, mainvar_)
                varattribs = ds[varname].attrib

                # Check if there is a standard name
                if haskey(varattribs, "standard_name")
                    stdname = varattribs["standard_name"]
                    @info("++++++++++++++++++++");

                    # Perform change
                    @info("$(stdname) => $(standard_names_dict[stdname])")
                    ds[varname].attrib["standard_name"] = EMODnetChemistry.standard_names_dict[stdname]
                end  
            end
        end
    end
end

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWorking on file /media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/North_Sea/Water_body_chlorophyll-a.4Danl.nc
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m++++++++++++++++++++
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a => mass_concentration_of_chlorophyll_a_in_sea_water
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m++++++++++++++++++++
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a => mass_concentration_of_chlorophyll_a_in_sea_water
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m++++++++++++++++++++
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a => mass_concentration_of_chlorophyll_a_in_sea_water
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39m++++++++++++++++++++
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_chlorophyll-a => mass_concentration_of_chlorophyll_a_in_sea_water
[36m[1m[ [22m[39m[36m[1mIn

## Create the JSON files

In [77]:
for datafile in datafilelist
    @info("Working on file $(datafile)")
    NCDataset(datafile, "a") do ds
        varlist = keys(ds)

        # Identify the main variable name
        mainvar_ = ds.attrib["parameter_keyword"]
        mainvar_ = replace(mainvar_, " " => "_")
        @info(mainvar_)

        jsonfile = datafile * ".json"
        @info("Writing JSON file $(basename(jsonfile))");
        EMODnetChemistry.write_json(jsonfile, mainvar_);
        
    end
end

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWorking on file /media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Arctic_Ocean/Water_body_dissolved_oxygen_concentration.4Danl.nc
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_dissolved_oxygen_concentration
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWriting JSON file Water_body_dissolved_oxygen_concentration.4Danl.nc.json
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWorking on file /media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Arctic_Ocean/Water_body_phosphate.4Danl.nc
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWater_body_phosphate
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWriting JSON file Water_body_phosphate.4Danl.nc.json
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mWorking on file /media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Arctic_Ocean/Water_body_sil

## Create list of DOIs

In [105]:
for datafile in datafilelist
    fname = basename(datafile)
    ds = NCDataset(datafile)
    varname = replace(first(keys(ds)), "_" => " ")
    regionname = replace(last(split(dirname(datafile), "/")), "_" => " ")
    doi = replace(ds.attrib["doi"], "https://doi.org/" => "")
    println("$(regionname)\t & $(varname) \t & \\doi{$(doi)} \\\\")
    close(ds)
end

Arctic Ocean	 & Water body dissolved oxygen concentration 	 & \doi{10.13120/ac099579-5e6a-4c57-b458-df45cebb36e0} \\
Arctic Ocean	 & Water body phosphate 	 & \doi{10.13120/f13fe89f-e231-4956-9d37-b8170827896f} \\
Arctic Ocean	 & Water body silicate 	 & \doi{10.13120/f6e78f49-c9c2-45a6-8f59-35dde0a934c8} \\
Baltic Sea	 & Water body chlorophyll-a 	 & \doi{10.13120/7aac0cc4-0001-4505-830f-a5d1169c6ba5} \\
Baltic Sea	 & Water body dissolved inorganic nitrogen 	 & \doi{10.13120/08218893-c36c-4cde-b728-3437c8674900} \\
Baltic Sea	 & Water body dissolved oxygen concentration 	 & \doi{10.13120/7ec4c23e-84eb-4e9c-97b4-b2a1c02137db} \\
Baltic Sea	 & Water body phosphate 	 & \doi{10.13120/e34d5805-8f85-46b9-8fa7-0e3fa5d2bd05} \\
Baltic Sea	 & Water body silicate 	 & \doi{10.13120/: 63ec087e-be7c-4eba-b74d-7d4baddd01a0} \\
Black Sea	 & Water body chlorophyll-a 	 & \doi{10.13120/8e74967a-5655-44fc-90b7-7eca192b7851} \\
Black Sea	 & Water body dissolved inorganic nitrogen 	 & \doi{10.13120/e910dea4-

## Edit DOI [optional]

In [106]:
datadir = "/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Baltic_Sea/"
datafile = joinpath(datadir, "Water_body_silicate.4Danl.nc")

"/media/ctroupin/T7 Shield/data/EMODnet-Chemistry/Eutrophication2024/Results/By_sea_regions-water_body/Baltic_Sea/Water_body_silicate.4Danl.nc"

NCDatasets.NetCDFError: NetCDF error: [31mNetCDF: Not a valid ID[39m (NetCDF error code: -33)

In [112]:
NCDataset(datafile, "a") do ds
    new_product_id = replace(ds.attrib["product_id"], " " => "", ":" => "")
    ds.attrib["product_id"] = new_product_id
    product_doi = "https://doi.org/10.13120/$(new_product_id)"
    ds.attrib["doi"] = product_doi
end

"https://doi.org/10.13120/63ec087e-be7c-4eba-b74d-7d4baddd01a0"

In [109]:
zz = ": 63ec087e-be7c-4eba-b74d-7d4baddd01a0"

": 63ec087e-be7c-4eba-b74d-7d4baddd01a0"

In [110]:
replace(zz, " " => "", ":" => "")

"63ec087e-be7c-4eba-b74d-7d4baddd01a0"