Explain how to read an ODV spreadsheet data file using `Julia`

In [1]:
using Logging
Logging.configure(level=INFO);



# Input file

In [2]:
datadir = "./data/";
ODVfile = joinpath(datadir, "SmallODV2.txt");

Check if the file exists:

In [3]:
if isfile(ODVfile)
    info("Working on file $ODVfile")
else
    err("File $ODVfile doesn't exist")
end

14-juin 16:05:43:INFO:root:Working on file ./data/SmallODV2.txt


# Read the file

Loop on the file lines.<br>
According to ODV guide, there are 3 types of lines:
1. comment lines, 
2. the column labels line, and 
3. data lines. 

First we define a function that will create a profile:

In [4]:
function initProfileList(line)
    """
    Create an empty list of lists, 
    the number of internal lists is the number of columns
    
    List of lists is preferred because the length of each list is 
    not always the same.
    """
    debug("Creating new profile (list of list) with empty values")
    
    # Compute number of columns
    ncolumns = length(line);
    debug("No. of columns: " * string(ncolumns))
    
    profile = []
    for i in 1:ncolumns
        push!(profile, [line[i]]) 
    end
    
    return profile
end

initProfileList (generic function with 1 method)

We need to be able to distinguish between:
* *header* lines, i.e. lines that contain metadata about a given profiles,
* *data* lines, which really contain the measured values.
To do so, we count the number of empty values and compare it to the expect number of columns, obtained as the 1st line after the comments. 

In [5]:
function getNonEmptyInd(line)
    nonempty(x) = length(x) > 0;
    nonempty_ind = find(nonempty, line);
    return nonempty_ind;
end

getNonEmptyInd (generic function with 1 method)

In [6]:
global ODVspreadsheet3
type ODVspreadsheet3
        metadata::Dict{String,String}
        columnLabels::Array{SubString{String},1}
        profileList::Array{Any,1}
end

In [7]:
function readODVspreadsheet(datafile)
    
    """
    The function will return a composite type that will store:
    1. The general metadata of the spreadsheet
    2. The labels of the columns
    3. The individual profiles
    """

    # metadata will be stored in a dictionary
    # ODV doc: Comment lines start with two slashes  // as first two characters
    metadata = Dict{String, String}()

    # Context manager
    open(datafile, "r") do f
        line = readline(f)

        # Read the metadata (lines starting with //)
        while line[1:2] == "//"

            # Identify metadata fields using regex
            # (name of the field is between < > and </ >)
            m = match(r"<(\w+)>(.+)</(\w+)>", line)

            if m != nothing
                debug("Match found")
                println(m[1] * ": " * m[2])
                # Add key - value in the dictionnary
                metadata[String(m[1])] = String(m[2])
            end
            line = readline(f);
        end

        # Read the column labels and set number of columns
        #ODV doc: must be the first non-comment line in the file
        #ODV doc: must provide columns for all mandatory meta-variables
        columnline = line
        columnLabels = split(chomp(columnline), '\t')
        ncols = length(columnLabels);
        debug("No. of columns: " * string(ncols))

        # Create an array that will store all the profiles
        profileList = []

        # Loop on the lines
        jj = 0
        profile = [];
        nprofiles = 0;

        while !eof(f)
            jj += 1;
            line = split(chomp(readline(f)), "\t");

            # Count empty values
            nonempty_ind = getNonEmptyInd(line);
            debug("Indices of the non-empty columns :")
            debug(nonempty_ind);

            # If the first value (Station) is not empty, 
            # then it's a header line
            if (nonempty_ind[1] == 1)
                debug("Working with a header line")
                debug("Adding the profile to the array")
                push!(profileList, profile)

                # Initiate a profile (list of lists)
                nprofiles += 1;
                debug("Create a new, empty profile")
                profile = initProfileList(line)
            else
                debug("Adding values to the existing profile")
                for ii in nonempty_ind
                    push!(profile[ii], line[ii]);
                end
            end
        end

        info("No. of profiles in the file: " * string(nprofiles))
        ODVdata = ODVspreadsheet3(metadata, columnLabels, profileList)
        return ODVdata
    end
end

readODVspreadsheet (generic function with 1 method)

In [8]:
ODVstuff = readODVspreadsheet(ODVfile)

Version: ODV Spreadsheet V4.0


14-juin 16:05:43:INFO:root:No. of profiles in the file: 2


Creator: charles@gher13.phys.ulg.ac.be
CreateTime: 2011-12-19T12:42:52
Software: Ocean Data View Version 4.2.1 - 2009
Source: /home/charles/DIVA/BlackSea4diva/New_ODV4_Collection.odv
SourceLastModified: 2011-12-19T12:42:03
DataField: Ocean
DataType: Profiles
MissingValueIndicators: -99


ODVspreadsheet3(Dict("DataType"=>"Profiles","Source"=>"/home/charles/DIVA/BlackSea4diva/New_ODV4_Collection.odv","DataField"=>"Ocean","Software"=>"Ocean Data View Version 4.2.1 - 2009","SourceLastModified"=>"2011-12-19T12:42:03","Version"=>"ODV Spreadsheet V4.0","Creator"=>"charles@gher13.phys.ulg.ac.be","CreateTime"=>"2011-12-19T12:42:52","MissingValueIndicators"=>"-99"),SubString{String}["Cruise","Station","Type","yyyy-mm-ddThh:mm:ss.sss","Longitude [degrees_east]","Latitude [degrees_north]","Bot. Depth [m]:METAVAR:FLOAT:4","Originator's Cruise:METAVAR:TEXT:20","Originator's Station:METAVAR:TEXT:20","Depth [m]","QV:WOD","Temperature [C]","QV:WOD","Salinity [psu]","QV:WOD","QV:ODV:SAMPLE"],Any[Any[],Any[SubString{String}["WOD05_BG000003"],SubString{String}["11570900"],SubString{String}["B"],SubString{String}["1991-09-03T16:25"],SubString{String}["28.3333"],SubString{String}["43.167"],SubString{String}["36"],SubString{String}[""],SubString{String}["CoMSBlack91"],SubString{String}["0.0"

In [9]:
ODVstuff.metadata

Dict{String,String} with 9 entries:
  "DataType"               => "Profiles"
  "Source"                 => "/home/charles/DIVA/BlackSea4diva/New_ODV4_Collec…
  "DataField"              => "Ocean"
  "Software"               => "Ocean Data View Version 4.2.1 - 2009"
  "SourceLastModified"     => "2011-12-19T12:42:03"
  "Version"                => "ODV Spreadsheet V4.0"
  "Creator"                => "charles@gher13.phys.ulg.ac.be"
  "CreateTime"             => "2011-12-19T12:42:52"
  "MissingValueIndicators" => "-99"