-
Notifications
You must be signed in to change notification settings - Fork 1
/
Write_SEABASS_Level2_UVP_zoo.m
587 lines (524 loc) · 24.8 KB
/
Write_SEABASS_Level2_UVP_zoo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
function Write_SEABASS_Level2_UVP_zoo
%% FUNCTION WRITE_SEABASS_LEVEL2_UVP_ZOO
% Description:
% Reads in detailed ODV formatted file exported from Ecotaxa Particle
% Module and creates a file for submission to NASA's SEABASS database
% https://seabass.gsfc.nasa.gov/wiki/Data_Submission/
%
% This will be Level 2 data (abundance and biovolumes)
%
% Steps:
% 1. USER writes metadata m file to define SeaBASS required headers
% format: {cruiseid}_UVP_metadata.m
% 2. USER defines projectdir, cruiseid, odv_rfile (read file), sb_wfile
% (write file), and r2r_elog (if available and must edit
% UVP_merge_R2R.m for cruise specific r2r log formatting).
% 3. Script reads in detailed ODV ZOO file
% 4. Script formats fields and column names following SeaBASS format
% 5. Script queries WoRMS database for taxa scientificNames and scientificNameIDs
% 6. Script formats metadata header that will be printed to .sb file
% 7. Script writes sb file with all taxonomic data
%
% References:
% Picheral M, Colin S, Irisson J-O (2017). EcoTaxa, a tool for the
% taxonomic classification of images. http://ecotaxa.obs-vlfr.fr.
%
% Neeley, A., S. Beaulieu, C. Proctor, I. Cetinic, J. Futrelle, I.
% Ramos-Santos, H. Sosik, E. Devred, L. Karp-Boss, M. Picheral, N.
% Poulton, C. Roesler, and A. Shepherd. (2021) Standards and practices for
% reporting plankton and other particle observations from images. 38 pp.
% DOI:10.1575/1912/27377
%
% Code availabile: https://github.com/britairving/UVP_submission_formatting
%
% Author:
% Brita Irving <bkirving@alaska.edu>
% with input from
% Andrew McDonnell <amcdonnell@alaska.edu>
% Emmanuel Boss <emmanuel.boss@maine.edu>
% Lee Karp-Boss <lee.karp-boss@maine.edu>
%% Uncertainty description
cfg.include_uncertainty_description = 1; % 1 = uncertainty description written to file header
cfg.remove_temporary_fields = 0; % 1 = removes all temporary fields from odv data before writting
%% Define read and write filenames
%% EXPORTSNA cruise on the Sarmiento De Gamboa UVP6-LP deployed on a float
cruiseid = 'SG2105_UVP6';
odv_rfile = fullfile('export_detailed_20220628_22_58','export_detailed_20220628_22_58_ZOO_odv.txt');
sb_wfile = 'EXPORTS-EXPORTSNA_UVP6-TaxonomicLevel2_sdg_20210504-20210519_R1.sb';
r2r_elog = 'EXPORTSNA_SarmientoDeGamboa_r2r_logs_original.xlsx';
%% EXPORTSNP survey cruise R/V Sally Ride
% cruiseid = 'SR1812';
% odv_rfile = fullfile('export_detailed_20210201_19_26','export_detailed_20210201_19_26_ZOO_odv.txt');
% sb_wfile = 'EXPORTS-EXPORTSNP_UVP5-TaxonomicLevel2_survey_20180814-20180909_R0.sb';
%% EXPORTSNP process cruise R/V Roger Revelle
% cruiseid = 'RR1813';
% odv_rfile = fullfile('export_detailed_20210128_08_11','export_detailed_20210128_08_11_ZOO_odv.txt');
% sb_wfile = 'EXPORTS-EXPORTSNP_UVP5-TaxonomicLevel2_process_20180814-20180909_R0.sb';
%% USER INPUT REQUIRED: Specify full path
if ismac
projectdir = fullfile('/Users/bkirving/Documents/MATLAB/UVP_project_data',cruiseid);
else
projectdir = fullfile('D:','MATLAB','UVP_project_data',cruiseid);
end
odv_rfile = fullfile(projectdir,odv_rfile);
r2r_elog = fullfile(projectdir,r2r_elog);
%% Specify write filename
% Release number R0 suggested if data_type=preliminary
% specify write file, this is not computer specific because uses same path
% Write files to submit folder
submit_dir = fullfile(projectdir,'submit');
if ~isdir(submit_dir)
try
mkdir(submit_dir);
catch
error('cannot create "submit" directory, maybe a permissions error?')
end
end
% Delete file if it is release R0 - i.e. preliminary
if exist(fullfile(submit_dir,sb_wfile),'file') && contains(sb_wfile,'R0')
delete(fullfile(submit_dir,sb_wfile))
else % rename if file already exists so don't overwrite
release_num = 1;
while exist(fullfile(submit_dir,sb_wfile),'file')
sb_wfile = strrep(sb_wfile,['R' num2str(release_num) '.sb'],['R' num2str(release_num+1) '.sb']);
release_num = release_num + 1;
end
end
%% Read in basic metadata for project
try
% Change directory to project folder so can easily call metadata file
pwd_now = pwd;
cd(projectdir)
eval(['hdr = ' cruiseid '_UVP_metadata'])
% Change directory back to previous working directory
cd(pwd_now);
catch % catch and explain why script stopped
fprintf('Cannot load hdr for this project\n')
fprintf('Need to set up %s_UVP_metadata.m script, see example provided\n',cruiseid)
keyboard
end
% Test that hdr loaded
if ~exist('hdr','var')
error('*** Need "hdr" structure - called from [cruiseid]_UVP_metadata.m')
end
%% Read ODV zoo data
fprintf('\n Reading ODV Zooplankton data from... %s\n',odv_rfile)
% check that file exists
if ~exist(odv_rfile,'file')
fprintf('File does not exist - make sure path and name are correct: %s\n',odv_rfile)
end
% STEP1 - read header and table options
% Get file options delimiter can be ';' or '\t' (tab)
table_options = detectImportOptions(odv_rfile);
% Read header
% column names usually at line 7, but go to line 20 in case extra
% header information is in file
hdr_odv = cell(20,1);
fileID = fopen(odv_rfile);
for nline = 1:20
hdr_odv{nline} = fgetl(fileID);
end
fclose(fileID);
column_hdrline = find(contains(hdr_odv,'Cruise'));
% resize header
hdr_odv = hdr_odv(1:column_hdrline);
cols = strsplit(hdr_odv{column_hdrline},table_options.Delimiter); % split hdr_odv by odv delimiter
% store original column names
cols_orig = cols;
for ic = 1:numel(cols)
cols{ic} = strrep(cols{ic},' ','_');
cols{ic} = strrep(cols{ic},'__','_'); % clean up unnecessary underscores
end
% ecotaxa changed the format of the unit part of column names
% this happend sometime in 2019 (I think)
% old: [#/m3] [ppm]
% new: [# m-3] [mm3 l-1]
% For simplicity, change column names back to original format
cols = strrep(cols,'_[#_m-3]','_[#/m^3]');
cols = strrep(cols,'_[mm3_l-1]','_[mm^3/L]');
%% Reformat variable names
meta_idx = find(contains(cols,'METAVAR'));
% remove unnecessary text from table column names
remove_text_from_columns = {'_METAVAR_TEXT_40','_METAVAR_TEXT_20', '_METAVAR_TEXT_10', '__METAVAR_DOUBLE','__PRIMARYVAR_DOUBLE','_degrees_east','_degrees_north'};
table_options.VariableNames = erase(table_options.VariableNames,remove_text_from_columns);
% now remove unnecessary text from original column names
remove_text_from_columns = strrep(remove_text_from_columns,'_',':');
remove_text_from_columns = strrep(remove_text_from_columns,'::',':');
cols = erase(cols,remove_text_from_columns);
% make sure all variables read in correctly as number and not strings
table_options.VariableTypes(meta_idx(end):end) = {'double'};
table_options.VariableNames(1:meta_idx(end)) = lower(table_options.VariableNames(1:meta_idx(end)));
table_options.VariableNames{contains(table_options.VariableNames,'yyyy_mm_ddhh_mm')} = 'datetime';
table_options.VariableNames{contains(table_options.VariableNames,'station')} = 'profile';
table_options.VariableNames{contains(table_options.VariableNames,'Depth_m')} = 'Depth';
% STEP2 - read ODV particle data from file
odv = readtable(odv_rfile,table_options);
%% Read R2R eventlog
try
% Change directory to project folder so can easily call metadata file
pwd_now = pwd;
cd(projectdir)
eval(['[odv,cols] = ' cruiseid '_UVP_R2R_merge(odv,cols);'])
% Change directory back to previous working directory
cd(pwd_now);
catch % catch and explain why script stopped
fprintf('Cannot load R2R merge script for this project\n')
%fprintf('Need to set up %s_UVP_R2R_merge.m script, see example provided\n',cruiseid)
%odv.R2R_Event = [];
%keyboard
end
%% Define new table
% this is just to preserve original data so any manipulations can be
% tracked
odv2 = odv;
cols2 = cols;
%% Fill empty meta variables
% This is necessary because ODV format has multiple rows of data for each
% profile but after the first row, all subsequent rows for that profile are
% empty
fnames = fieldnames(odv2);
for iv = 1:numel(meta_idx)
vname = fnames{meta_idx(iv)};
odv2.(vname) = fillmissing(odv2.(vname),'previous');
end % loop through meta variables
% also fill out r2r event
if ismember('R2R_Event',fnames)
% first, need to replace 0x0 double entries with ''
odv2.R2R_Event(cellfun('isempty', odv2.R2R_Event)) = {''};
odv2.R2R_Event = fillmissing(odv2.R2R_Event,'previous');
end
% %% Identify all empty variables - i.e. no real values
% rm_fields = [];
% for irm = meta_idx(end)+1:size(odv2,2)
% nam = fnames{irm};
% if all(isnan(odv2.(nam)))
% rm_fields = [rm_fields; irm];
% elseif all(odv2.(nam) == 0)
% rm_fields = [rm_fields; irm];
% end
% end
%
% %% Remove empty variables
% odv2(:,rm_fields) = [];
% cols2(rm_fields) = [];
%% Remove temporary fields
if cfg.remove_temporary_fields
temporary_fields = find(contains(cols2,'temporary'));
odv2(:,temporary_fields) = [];
cols2(temporary_fields) = [];
end
%% Remove unwanted columns
% define variables that we want to remove
remove_fields = {'DataOwner' 'Cruise' 'Rawfilename' 'Instrument' 'SN' 'CTDrosette'};
remthis = find(contains(cols2,remove_fields));
odv2(:,remthis) = [];
cols2(remthis) = [];
%% Create date and time variables and sort by datetime
fnames = fieldnames(odv2);
% Convert datetime variable to string
idx_date_field = find(contains(cols2,'yyyy'));
date_field = fnames{idx_date_field};
if isdatetime(odv2.(date_field))
% sort odv2 table by datetime field
odv2 = sortrows(odv2,idx_date_field);
odv2.date = datestr(odv2.(date_field),'yyyymmdd');
odv2.time = datestr(odv2.(date_field),'HH:MM:SS');
% now convert original date_field to string
odv2.(date_field) = datestr(odv2.(date_field),'yyyy-mm-dd HH:MM:SS');
cols2{1,end+1}='date';
cols2{1,end+1}='time';
else % add catch so can fix before errors are generated to avoid confusion
fprintf('Unexpected format! Stopping here so can figure out why datetime variable is not in datetime format\n')
keyboard
end
%% switch order of columns and remove datetime
ncols = numel(odv2.Properties.VariableNames);
% find index location of variables we want to reorder
i_datetime = find(strcmp(odv2.Properties.VariableNames,'datetime'));
i_date = find(strcmp(odv2.Properties.VariableNames,'date'));
i_time = find(strcmp(odv2.Properties.VariableNames,'time'));
new_order = [1:i_datetime i_date i_time i_datetime+1:ncols];
% Reorder cell array with full column names
cols2 = cols2(new_order); % replaced cols2=[cols2(1,1:dt-1) , cols2(1,d) , cols2(1,d+1) , cols2(1,dt+1:d-1) , cols2(1,d+2:end)];
% added catch for older versions of MATLAB
try % movevars introduced in R2018a
odv2=movevars(odv2,'date','After','datetime');
odv2=movevars(odv2,'time','After','date');
catch % if older version of MATLAB
% this will work for all versions of matlab but kept the above method for
% clarity
odv2 = odv2(:,new_order);
% delete redundant variables that result from reordering
odv2.date_1 = [];
odv2.time_1 = [];
end
% sort by datetime then remove the datetime column
odv2.datetime = [];
cols2(i_datetime) = [];
cols2(end-1:end) = []; % date and time at the end
% Now move r2r event to just after depth
if ismember('R2R_Event',cols2)
ncols = numel(odv2.Properties.VariableNames);
% find index location of variables we want to reorder
i_r2revent = find(strcmp(odv2.Properties.VariableNames,'R2R_Event'));
i_time = find(strcmp(odv2.Properties.VariableNames,'time'));
new_order = [1:i_time i_r2revent i_time+1:ncols];
% Reorder cell array with full column names
cols2 = cols2(new_order); % replaced cols2=[cols2(1,1:dt-1) , cols2(1,d) , cols2(1,d+1) , cols2(1,dt+1:d-1) , cols2(1,d+2:end)];
odv2 = odv2(:,new_order);
% delete redundant variables that result from reordering
odv2.R2R_Event_1 = [];
cols2(i_r2revent+1) = [];
end
%% Query WoRMS to pull out AphiaID match for each taxonomic name
% save_taxa_filename = [cruiseid 'taxa_' date '.mat'];
save_taxa_filename = fullfile(projectdir,[cruiseid 'taxa_level2.mat']);
if ~exist(save_taxa_filename,'file')
% First step | parse taxonomic names from Ecotaxa fields
% Only need to look at abundance names, because column names are the same
% for biovolume and avgesd.
original_fields = cols2(contains(cols2,'[#/m^3]'));
original_fields = erase(original_fields,'_[#/m^3]');
% Initialize table used as input to WoRMS_AphiaID_taxa_match.m script.
% Inputs:
% taxa | table with fields Name, Name_parent, & Name_original
% Example
% taxa = table();
% taxa.Name = {};
% taxa.Name_parent = {};
% taxa.Name_original = {};
taxa = table();
taxa.ID = {}; % suffix added to abun_zoop_, biovol_, avgesd_ fields. This field is not required for WoRMS_AphiaID_taxa_match.m
taxa.Name = {}; % taxa name, lowest level. Required field in WoRMS_AphiaID_taxa_match.m.
taxa.Name_parent = {}; % taxa parent name, if available. Required field in WoRMS_AphiaID_taxa_match.m.
taxa.Name_original = {}; % name of original variable. This field is not required for WoRMS_AphiaID_taxa_match.m.
% Loop through and prase column names new fieldnames with associated ID
for n_id = 1:numel(original_fields)
taxa.ID{n_id} = [num2str(n_id) 'id'];
taxa.Name_original{n_id} = original_fields{n_id};
% First, parse ecotaxa name to pull out taxa name and parent name
names = strsplit(original_fields{n_id},'(');
% Save name to table
taxa.Name{n_id} = names{1};
% See if parent name available
if numel(names) > 1
taxa.Name_parent{n_id} = erase(names{2},')');
else
taxa.Name_parent{n_id} = '';
end
end
% Call script to go through and query AphiaID matches
check_children_manual = false; % true = checks full classifcation of parent if child doesn't have a match and asks user. false = skips this.
taxa = WoRMS_AphiaID_taxa_match(taxa,check_children_manual,'ecotaxa');
fprintf('Saving taxa matches to %s\n',[pwd filesep save_taxa_filename])
save(save_taxa_filename,'taxa');
else % Load data instead of rerunning everything
fprintf('Loading..%s\n',save_taxa_filename)
load(save_taxa_filename);
end
%% Limit data to specific taxa as defined by cfg.limit_to_taxa
% Cell array with taxonomic names selected by the user
% For example: cfg.limit_to_taxa = {'Salpida' 't008'};
% Used for EXPORTSNP UVP Dataset: salps and salp pellets is fine with me
% as it was carefully checked by experts (Deb, Karen).
if isfield(cfg,'limit_to_taxa')
idx_taxa = ismember(taxa.Name,cfg.limit_to_taxa);
taxa = taxa(idx_taxa,:);
% Pull out abundance data [#/m^3],biovolume data [mm^3/L], & esd data [mm]
idx_taxa = find(contains(cols2,{'_[#/m^3]' '_biovolume_[mm^3/L]' '_avgesd_[mm]'}));
idx_remove_taxa = ~contains(cols2(idx_taxa),cfg.limit_to_taxa); % do not include metadata columns
idx_remove_taxa = idx_taxa(idx_remove_taxa); % reindex to full size
cols2(idx_remove_taxa) = [];
odv2(:,idx_remove_taxa) = [];
% Redo taxa ID, in case any were removed
% First step | parse taxonomic names from Ecotaxa fields
% Only need to look at abundance names, because column names are the same
% for biovolume and avgesd.
original_fields = cols2(contains(cols2,'[#/m^3]'));
original_fields = erase(original_fields,'_[#/m^3]');
for n_id = 1:numel(original_fields)
taxa.ID{n_id} = [num2str(n_id) 'id'];
end
end
%% Configure fieldnames to fit expected SeaBASS fields
%Correct some variable names to match with SEABASS requirements
cols2 = strrep(cols2,'Site','station');
cols2 = strrep(cols2,'Station','station_alt_id');
cols2 = strrep(cols2,'Latitude_[degrees_north]','lat');
cols2 = strrep(cols2,'Longitude_[degrees_east]','lon');
% Variable SHOULD be bin_depth but fcheck throws and error
% "Measurement depth required as either a header value or a column in the
% data block. All data MUST have an associated depth"
%cols2= strrep(cols2,'Depth','bin_depth'); % bin_depth = Nominal or center depth for each data bin
cols2 = strrep(cols2,'Depth_[m]','depth'); % depth = Depth of measurement
cols2 = strrep(cols2,'Sampled_volume_[L]','volume');
% Set up units
units = cell(size(cols2));
if ismember('R2R_Event',cols2)
units(1:9) = {'none' 'none' 'yyyymmdd' 'hh:mm:ss' 'none' 'degrees' 'degrees' 'm' 'L'};
descr(1:9) = {'Station' 'Profile name' 'Date' 'Time (UTC)' 'Rolling Deck to Repository (R2R) Unique sample event number' 'Latitude in decimal degrees north' 'Longitude in decimal degrees north' 'Depth, midpoint of 5m depth bin' 'Total volume of water sampled within each depth bin'};
else
units(1:8) = {'none' 'none' 'yyyymmdd' 'hh:mm:ss' 'degrees' 'degrees' 'm' 'L'};
descr(1:8) = {'Station' 'Profile name' 'Date' 'Time (UTC)' 'Latitude in decimal degrees north' 'Longitude in decimal degrees north' 'Depth, midpoint of 5m depth bin' 'Total volume of water sampled within each depth bin'};
end
% Notes about variable names from NASA
% I did a few modifications to the field names in the taxonomic file to
% adjust to our system who can only handle one type of unit per field.
% Abun was taken with cell/L units, so we created a new field call
% abun_zoop, same for biovol.
%
% For the avgsed, we went with a long torturing one! The IFCB had set
% equivalent_spherical_diameter for the level-1b in um units, so we created
% yours as equivalent_spherical_diameter_avgmm.
% Pull out abundance data [#/m^3]
idx_abun = contains(cols2,'_[#/m^3]');
cols2(idx_abun) = strcat('abun_zoop_',taxa.ID'); % Build variables as abun_1id,abun_2id... etc
units(idx_abun) = {'organisms/L'}; % NASA prefers 'organisms/L' to 'number/m^3'
abun = table2array(odv2(:,idx_abun)); % first need to convert to an array so can use the * operator
odv2(:,idx_abun) = array2table(abun./1000.0); % convert to organisms/L = number/m^3 / 1000.00.
abun_desc = strcat('Abundance of',{' '},taxa.Name_original);
descr(idx_abun) = abun_desc; %strcat(abun_desc,{' '},erase(cols2(idx_abun),'abundance_'));
idx_biovol = contains(cols2,'_biovolume_[mm^3/L]');
cols2(idx_biovol) = strcat('biovol_zoop_',taxa.ID'); % Build variables as abun_1id,abun_2id... etc
% units(idx_biovol) = {'mm^3/L'};
% convert biovolume from [mm^3/L] to [µL/m^3] --> VSD * 1000.0; (1uL=1mm^3, 1000L=1m^3)
units(idx_biovol) = {'uL/m^3'};
biovol = table2array(odv2(:,idx_biovol)); % first need to convert to an array so can use the * operator
odv2(:,idx_biovol) = array2table(biovol.*1000.0); % [µL/m^3]
biovol_desc = strcat('Biovolume of',{' '},taxa.Name_original);
descr(idx_biovol) = biovol_desc; %strcat(biovol_desc,{' '},erase(cols2(idx_biovol),'biovolume_'));
% pull out esd data [mm]
idx_esd = contains(cols2,'_avgesd_[mm]');
cols2(idx_esd) = strcat('equivalent_spherical_diameter_avgmm_',taxa.ID'); % Build variables as abun_1id,abun_2id... etc
units(idx_esd) = {'mm'};
esd_desc = strcat('Average equivalent spherical diameter of',{' '},taxa.Name_original);
descr(idx_esd) = esd_desc; %strcat(esd_desc,{' '},erase(cols2(idx_esd),'biovolume_'));
% Combine all units into a single string
colunits = strjoin(units,','); %[colstr,cols2{i},','];
% Combine column names into a single string
colstr = strjoin(cols2,','); % colstr=[colstr,cols2{i},','];
%% Generate SeaBASS header text
% Pull out raw filename
[~,original_file,ext] = fileparts(odv_rfile);
original_file = [original_file ext];
% pull out max/min latitude, longitude, date, and time
latmax = max(odv2.latitude);
latmin = min(odv2.latitude);
lonmax = max(odv2.longitude);
lonmin = min(odv2.longitude);
% Find the first and last date/time for SEABASS metadata header
% since odv2 table has already been sorted by datetime (early on), just
% pull out first and last
datemax = odv2.date(end,:);
datemin = odv2.date(1 ,:);
timemax = odv2.time(end,:);
timemin = odv2.time(1 ,:);
hdr_SEABASS={'/begin_header';
['/investigators=' hdr.investigators];
['/affiliations=' hdr.affiliations];
['/contact=' hdr.contact];
['/experiment=' hdr.experiment];
['/cruise=' hdr.cruise];
['/station=' hdr.station];
['/data_file_name=' sb_wfile];
['/associated_files=' original_file]; % The original name of the data file exported from Ecotaxa
'/associated_file_types=raw';
['/documents=' strjoin(hdr.documents.ZOO,',')];
['/data_type=' hdr.data_type];
['/data_status=' hdr.data_status.ZOO];
['/start_date=' datemin];
['/end_date=' datemax];
['/start_time=' timemin '[GMT]'];
['/end_time=' timemax '[GMT]'];
['/north_latitude=' num2str(latmax) '[DEG]'];
['/south_latitude=' num2str(latmin) '[DEG]'];
['/east_longitude=' num2str(lonmax) '[DEG]'];
['/west_longitude=' num2str(lonmin) '[DEG]'];
'/water_depth=NA';
['/missing=' hdr.missing];
['/delimiter=' hdr.delimiter];
['/instrument_manufacturer=' hdr.inst_mfr];
['/instrument_model=' hdr.inst_model];
['/calibration_files=' hdr.calfiles];
['/calibration_date=' hdr.caldates];
'!'};
if cfg.include_uncertainty_description
hdr.comments.ZOO = [hdr.comments.ZOO; ...
'!';...
'! Uncertainties can be calculated based on counting statistics';...
'! For example:' ;...
'! relative uncertainty [none]';...
'! relative_unc = sqrt(abun_zoop_{#}id*volume)/(abun_zoop_{#}id*volume)';
'! uncertainty in the abundance [organisms/L]';...
'! abun_zoop_{#}id_unc = relative_unc*abun_zoop_{#}id';
'! uncertainty in the biovolume [uL/m^3]';...
'! biovol_zoop_{#}id_unc = relative_unc*biovol_zoop_{#}id'];%...
%'! uncertainty in the average equivalent spherical diameter [mm]';...
%'! avgesd_id{#}_unc = ... not sure'};];
end
% Insert comments, then finish with /fields and /units
hdr_SEABASS = [hdr_SEABASS; hdr.comments.ZOO;...
'!';
['/fields=' colstr];
['/units=' colunits];
'/end_header'];
% check if there is whitespace in any metadata headers
% whitespace in comments is OKAY
if any(contains(hdr_SEABASS,' ') & ~contains(hdr_SEABASS,'!'))
fprintf('White space in metadata header, must remove to pass fcheck\n')
keyboard
end
%% Insert taxa definitions to comments
% AphiaID returns NULL if no match found, so replace all NaNs with NULL
% actually, use 'none' instead of 'NULL' so more human readable
taxa.AphiaID(strcmp(taxa.AphiaID,'NULL')) = {'none'};
taxa.AphiaID_parent(strcmp(taxa.AphiaID_parent,'NULL')) = {'none'};
% First, build text cells to insert
id_fields_definitions = '';
insert_ID_desc = cell(size(taxa,1)+2,1); % +2 because want to add note and column names
insert_ID_desc{1} = '! Taxomonic identifications have been matched to WoRMS AphiaIDs when possible.';
insert_ID_desc{2} = '! ID scientificName AphiaID data_provider_category scientificNameID';
for nn = 1:size(taxa,1)
insert_ID_desc{nn+2} = ['! ' pad(taxa.ID{nn},5) ' ' pad(taxa.Name{nn},18) ' ' pad(taxa.AphiaID{nn},8) ' ' pad(taxa.Name_original{nn},31) ' ' taxa.scientificNameID{nn}];
id_fields_definitions = [id_fields_definitions [taxa.ID{nn} ':' taxa.Name{nn}] ','];
end
% remove the last comma from the metadata header /id_fields_definitions
id_fields_definitions(end) = [];
id_fields_definitions = ['/id_fields_definitions=' id_fields_definitions];
idx_fields = find(contains(hdr_SEABASS,'/fields='));
hdr_SEABASS = [hdr_SEABASS(1:idx_fields-1); insert_ID_desc; '!'; id_fields_definitions; hdr_SEABASS(idx_fields:end)];
%% Specify file writing format
col_n = length(cols2);
col_n_text = find(contains(cols2,'lat')) - 1; %Latitude is the first DOUBLE column,
fmt_txt = repmat( '%s,', [1 col_n_text] );
fmt_dbl = repmat( '%f,', [1 (col_n - col_n_text)]);
% replace delimiter with new line at end of format string
fmt_dbl(end) = []; fmt_dbl = [fmt_dbl '\n'];
fmt = [fmt_txt fmt_dbl];
% reformat odv2 table to temporary variable to enable simply write to file
odv_write = table2cell(odv2); % convert to cell array to handle different variable types
odv_write = odv_write'; % transpose because fprintf function prints data columnwise
% convert NaNs to missing identifier
% odv_write(cellfun(@(x) isnumeric(x) && isnan(x), odv_write)) = {''}; % convert NaNs to '' for ODV format
odv_write(cellfun(@(x) isnumeric(x) && isnan(x), odv_write)) = {-9999}; % missing=-9999 SeaBASS required header field
%% Write odv table to file
fprintf('\n Deleting data file, if it exist (if it does not exist, you may get a WARNING, but this is OK: %s\n',sb_wfile);
eval(['delete ' sb_wfile])
fprintf('\n Writing data in ODV format to: %s\n',fullfile(submit_dir,sb_wfile));
fileID = fopen(fullfile(submit_dir,sb_wfile),'w'); % open file
if fileID < 0
fprintf(' *** Error opening file %s\n',fullfile(submit_dir,sb_wfile))
keyboard
end
fprintf(fileID,'%s\n',hdr_SEABASS{:});% write header
fprintf(fileID,fmt,odv_write{:}); % write data
fclose(fileID); % close file
% To troubleshoot why not printing correctly, comment above and just print to screen
%fprintf('%s\n',hdr_SEABASS{:}) % write header
%fprintf(fmt,odv_write{:}) % write data
%% END OF MAIN FUNCTION - SUBFUNCTIONS BELOW
fprintf('workflow is finished\n')
%% FUNCTION META = FORMAT_UVP_SEABASS_METADATA_HEADER
function hdr_SEABASS = format_UVP_SeaBASS_metadata_header
end %% FUNCTION META = FORMAT_UVP_SEABASS_METADATA_HEADER
end %% MAIN FUNCTION