Explain how to convert an ODV spreadsheet data file to netCDf format.

In [None]:
include("./ODVspreadsheet.jl")
using Logging
using NetCDF
using ODVspreadsheet

# Configuration

## Logging

In [8]:
Logging.configure(level=INFO);

# Input file

In [3]:
datadir = "./test/data/";
ODVfile = joinpath(datadir, "BlackSea_2profiles.txt");

Check if the file exists:

In [4]:
if isfile(ODVfile)
    info("Working on file $ODVfile")
else
    err("File $ODVfile doesn't exist")
end

# Read the file

The function will return an object that stores all the information contained in the spreadsheet.

In [5]:
ODVdata = readODVspreadsheet(ODVfile);

# Create new netCDf file

Following documentation: https://github.com/JuliaGeo/NetCDF.jl.

## Global attributes

In [None]:
nccreate("output.nc", 1);
ncputatt("./output.nc", "Global", ODVdata.metadata);

## Define a function to find the indices of the columns

In [16]:
global ColumnIndex
type ColumnIndex
    cruise::Int
    station::Int
    stationtype::Int
    depth::Int
    lon::Int
    lat::Int
    temperature::Int
    salinity::Int
end



In [17]:
function getColumnIndex(columnLabels)
    """
    The function will return a composite type that stores
    the indices of the different columns

    Input

    *`columnlabels`: Array{SubString{String},1  containing the names of the ODV file columns, 
        as obtained by using function `readODVspreadsheet`.

    Output

    *`ColumnIndex`: a composite type storing the indices (Integers) of the
        columns specified by their title.
        If there is no match between the title and the columns, the function
        will return 0.

    """
    cruiseindex = findfirst(columnLabels .== "Cruise")
    stationindex = findfirst(columnLabels .== "Station")
    typeindex = findfirst(columnLabels .== "Type")
    depthindex = findfirst(columnLabels .== "Depth [m]")
    latindex = findfirst(x -> contains(x, "Latitude"), columnLabels)
    lonindex = findfirst(x -> contains(x, "Longitude"), columnLabels)
    temperatureindex = findfirst(x -> contains(x, "Temperature"), columnLabels)
    salinityindex = findfirst(x -> contains(x, "Salinity"), columnLabels)
    return ColumnIndex(cruiseindex, stationindex, typeindex, depthindex, 
        lonindex, latindex, temperatureindex, salinityindex)
end



getColumnIndex (generic function with 1 method)

## Extract coordinates

In [9]:
nprofiles = length(ODVdata.profileList)
info("Number of profiles: " * string(nprofiles))

19-juin 16:02:32:INFO:root:Number of profiles: 2


Loop on the profiles in order to create list of variables.<br>
The indices are obtained using the `getColumnIndex` function. 

In [18]:
colInd = getColumnIndex(ODVdata.columnLabels)

ColumnIndex(1,2,3,10,5,6,12,14)

In [25]:
lon = []
lat = []
stationlist = []
timelist = []
datelist = []
dateref = "1970-01-01 00:00:00"
epochref = Dates.datetime2epochms(Dates.DateTime(dateref, "yyyy-mm-dd HH:MM:SS"))

for i = 1:nprofiles
    push!(datelist, ODVdata.profileList[i][4][1])
    push!(stationlist, ODVdata.profileList[i][colInd.station][1])
    push!(lon, parse(Float64, ODVdata.profileList[i][colInd.lon][1]))
    push!(lat, parse(Float64, ODVdata.profileList[i][colInd.lat][1]))
    
    # Convert dates to seconds since epoch
    dd = Dates.DateTime(ODVdata.profileList[i][4][1], "yyyy-mm-ddTHH:MM")
    push!(timelist, Dates.datetime2epochms(dd) - epochref)
end

In [26]:
stationlist

2-element Array{Any,1}:
 "11570900"
 "11570901"

## Define some attributes of the variable (optional)

In [30]:
stationatts = Dict(
    "long_name" => "Unique identifier for each feature instance",
    "cf_role" => "timeseries_id"
)

tempatts = Dict(
    "standard_name" => "sea_water_temperature",
    "long_name" => "Temperature",
    "units"    => "C"
)

lonatts = Dict(
    "long_name" => "Longitude",
    "standard_name" => "longitude",
    "units" => "degrees east",
    "axis" => "Y",
    "valid_min" => -180.0, 
    "valid_max" => 180.0, 
    "_FillValue" => -999.9
);

latatts = Dict(
    "long_name" => "Latitude",
    "standard_name" => "latitude",
    "units" => "degrees_north",
    "axis" => "Y",
    "valid_min" => -90.0, 
    "valid_max" => 90.0, 
    "_FillValue" => -999.9
);

timatts = Dict(
    "long_name" => "Time",
    "standard_name" => "time",
    "units" => string("seconds since ", dateref),
    "calendar" => "julian",
    "axis" => "T",
    "_FillValue" => 0.0
);

depthatts = Dict(
    "long_name" => "Depth",
    "standard_name" => "depth",
    "units" => "m",
    "axis" => "Z",
    "_FillValue" => -9999.9

)

Dict{String,Any} with 5 entries:
  "units"         => "m"
  "long_name"     => "Depth"
  "axis"          => "Z"
  "standard_name" => "depth"
  "_FillValue"    => -9999.9

## Create dimensions

According to the [reference](https://www.nodc.noaa.gov/data/formats/netcdf/v1.1/timeSeriesProfileIncomVIncomT.cdl), we define 3 dimensions:
1. nzMax, the maximal number of depth,
2. station, the station identifier,
3. ntimeMax, the maximal number of time instances.

In [29]:
depthDim = NcDim("depthDim", collect(1:20), depthatts)
stationDim = NcDim("stationDim", collect(1:2), stationatts)
timeDim = NcDim("timeDim", collect(1:2), timeatts)

LoadError: UndefVarError: depthatts not defined

## Create variables

In [None]:
lonovar = NcVar("lon", [londim, latdim, timdim], varatts, Float32)

In [None]:
nccreate("./output.nc", varname, dimensions ..., atts=atts,gatts=gatts,compress=compress,t=t,mode=mode)