Explain how to read an ODV spreadsheet data file using `Julia`

In [54]:
using Logging
Logging.configure(level=DEBUG);

# Input file

In [51]:
datadir = "./data/";
ODVfile = joinpath(datadir, "SmallODV2.txt");

Check if the file exists:

In [52]:
if isfile(ODVfile)
    info("Working on file $ODVfile")
else
    err("File $ODVfile doesn't exist")
end

# Read the file

Loop on the file lines.<br>
According to ODV guide, there are 3 types of lines:
1. comment lines, 
2. the column labels line, and 
3. data lines. 

In [61]:
# metadata will be stored in a dictionary
# ODV doc: Comment lines start with two slashes // as first two characters
metadata = Dict{String, String}()

# Context manager
open(ODVfile, "r") do f
    line = readline(f)
    
    # Read the metadata (lines starting with //)
    while line[1:2] == "//"
        
        # Identify metadata fields using regex
        # (name of the field is between < > and </ >)
        m = match(r"<(\w+)>(.+)</(\w+)>", line)
        
        if m != nothing
            debug("Match found")
            println(m[1] * ": " * m[2])
            # Add key - value in the dictionnary
            metadata[String(m[1])] = String(m[2])
        end
        line = readline(f)
    end
    
    # Read the column labels and set number of columns
    #ODV doc: must be the first non-comment line in the file
    #ODV doc: must provide columns for all mandatory meta-variables
    global columnnames
    global columnline
    global ncolumns
    columnline = line
    columnnames = split(line, '\t');
    ncolumns = length(columnnames);
    
    # Read metadata of the first profile
    line = readline(f)
    global metadataprofile
    metadataprofile = line
    
    global dataline
    dataline = readline(f)
    
end

13-juin 09:25:16:DEBUG:root:Match found
13-juin 09:25:16:DEBUG:root:Match found
13-juin 09:25:16:DEBUG:root:Match found
13-juin 09:25:16:DEBUG:root:Match found
13-juin 09:25:16:DEBUG:root:Match found
13-juin 09:25:16:DEBUG:root:Match found
13-juin 09:25:16:DEBUG:root:Match found
13-juin 09:25:16:DEBUG:root:Match found
13-juin 09:25:16:DEBUG:root:Match found


Version: ODV Spreadsheet V4.0
Creator: charles@gher13.phys.ulg.ac.be
CreateTime: 2011-12-19T12:42:52
Software: Ocean Data View Version 4.2.1 - 2009
Source: /home/charles/DIVA/BlackSea4diva/New_ODV4_Collection.odv
SourceLastModified: 2011-12-19T12:42:03
DataField: Ocean
DataType: Profiles
MissingValueIndicators: -99


In [71]:
columnnames

16-element Array{SubString{String},1}:
 "Cruise"                              
 "Station"                             
 "Type"                                
 "yyyy-mm-ddThh:mm:ss.sss"             
 "Longitude [degrees_east]"            
 "Latitude [degrees_north]"            
 "Bot. Depth [m]:METAVAR:FLOAT:4"      
 "Originator's Cruise:METAVAR:TEXT:20" 
 "Originator's Station:METAVAR:TEXT:20"
 "Depth [m]"                           
 "QV:WOD"                              
 "Temperature [C]"                     
 "QV:WOD"                              
 "Salinity [psu]"                      
 "QV:WOD"                              
 "QV:ODV:SAMPLE\n"                     

In [73]:
split(metadataprofile, '\t')

16-element Array{SubString{String},1}:
 "WOD05_BG000003"  
 "11570900"        
 "B"               
 "1991-09-03T16:25"
 "28.3333"         
 "43.167"          
 "36"              
 ""                
 "CoMSBlack91"     
 "0.0"             
 "0"               
 "22.208"          
 "0"               
 "15.7020"         
 "0"               
 "1\n"             

In [74]:
profile.cruise = 'ok'

LoadError: syntax: invalid character literal

In [27]:
split(dataline)

7-element Array{SubString{String},1}:
 "10.0"   
 "0"      
 "22.260" 
 "0"      
 "15.7380"
 "0"      
 "1"      

In [30]:
columnnames

25-element Array{SubString{String},1}:
 "Cruise"                 
 "Station"                
 "Type"                   
 "yyyy-mm-ddThh:mm:ss.sss"
 "Longitude"              
 "[degrees_east]"         
 "Latitude"               
 "[degrees_north]"        
 "Bot."                   
 "Depth"                  
 "[m]:METAVAR:FLOAT:4"    
 "Originator's"           
 "Cruise:METAVAR:TEXT:20" 
 "Originator's"           
 "Station:METAVAR:TEXT:20"
 "Depth"                  
 "[m]"                    
 "QV:WOD"                 
 "Temperature"            
 "[°C]"                   
 "QV:WOD"                 
 "Salinity"               
 "[psu]"                  
 "QV:WOD"                 
 "QV:ODV:SAMPLE"          

Check what's inside the dictionnary.

In [168]:
metadata

Dict{String,String} with 9 entries:
  "DataType"               => "Profiles"
  "Source"                 => "/home/charles/DIVA/BlackSea4diva/New_ODV4_Collec…
  "DataField"              => "Ocean"
  "Software"               => "Ocean Data View Version 4.2.1 - 2009"
  "SourceLastModified"     => "2011-12-19T12:42:03"
  "Version"                => "ODV Spreadsheet V4.0"
  "Creator"                => "charles@gher13.phys.ulg.ac.be"
  "CreateTime"             => "2011-12-19T12:42:52"
  "MissingValueIndicators" => "-99"

In [175]:
columnnames

"Cruise  Station Type    yyyy-mm-ddThh:mm:ss.sss Longitude [degrees_east]        Latitude [degrees_north]        Bot. Depth [m]:METAVAR:FLOAT:4  Originator's Cruise:METAVAR:TEXT:20     Originator's Station:METAVAR:TEXT:20    Depth [m]       QV:WOD  Temperature [°C]        QV:WOD  Salinity [psu]  QV:WOD  QV:ODV:SAMPLE\n"

In [178]:
columnnames = split(columnnames)

25-element Array{SubString{String},1}:
 "Cruise"                 
 "Station"                
 "Type"                   
 "yyyy-mm-ddThh:mm:ss.sss"
 "Longitude"              
 "[degrees_east]"         
 "Latitude"               
 "[degrees_north]"        
 "Bot."                   
 "Depth"                  
 "[m]:METAVAR:FLOAT:4"    
 "Originator's"           
 "Cruise:METAVAR:TEXT:20" 
 "Originator's"           
 "Station:METAVAR:TEXT:20"
 "Depth"                  
 "[m]"                    
 "QV:WOD"                 
 "Temperature"            
 "[°C]"                   
 "QV:WOD"                 
 "Salinity"               
 "[psu]"                  
 "QV:WOD"                 
 "QV:ODV:SAMPLE"          

In [179]:
columnnames[1]

"Cruise"