# Proccess CTD data
<img src="CDTVikingPenguin.webp" alt="Notebook Mascot" width="400"/>

## HOWTO
The cell below will create ODV compatiable data files for each vp2 file in the specified directory.
To execute the cell press the play button in the menu above or select the cell and use ``CTRL+Enter``

In [47]:
#######################
### Input Variables ###
#######################

# Variables that change each time
voyage_id = 'XPO250205'
voyage_type = 'Antarctic Explorer'
input_dir = './'
output_dir = './' # If None then input_dir is used

# Names of the file created. These files will be placed in the output_dir
# These are not f-strings as data_filename will be different for each file.
# You can use the following placeholders to define the file names
# ``{filename}`` will insert the name of the vp2 file (minus the extension)
# ``{voyage_id}`` will insert the voyage id given above
data_filename = '{filename}_ODV.csv'
all_data_filename = '{voyage_id}_all_ODV.csv'

# Data in the HEADER section to add to each row in the DATA section
# There needs to be a unit string for each HEADER item. Use empty string to skip units
metadata_to_include = ['Latitude', 'Longitude']
metadata_units = ['DD', 'DD']

#################
### Main Code ###
#################
# This could be put in a python file and called from the notebook
# This means if something happens to the notebook all the code isnt lost ;)

import csv, os
import pyperclip

month_names = {'01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun', 
               '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'}

def save_data(data, filepath):
    print(f'Saving data: {filepath}')
    nrows = {len(v) for v in data.values()}
    if len(nrows) != 1:
        raise ValueError('Not all columns have the same number of rows')
    else:
        nrows = nrows.pop()

    with open(filepath, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file, dialect='excel') 

        # Writes the column labels
        writer.writerow(list(data.keys()))

        # Writes the data
        for i in range(nrows):
           writer.writerow([data[k][i] for k in data.keys()])
        

def process_vp2(voyage_id, voyage_type, 
                input_dir, output_dir, 
                data_filename, all_data_filename, 
                metadata_to_include, metadata_units):
    # Make sure varables are correct
    # data_filename and all_data_filenames should end with csv
    if not data_filename.endswith('.csv'):
        data_filename += '.csv'
    
    if not all_data_filename.endswith('.csv'):
        all_data_filename += '.csv'
    
    # Directory paths should end with a slash
    if not input_dir.endswith('/'):
        input_dir = input_dir + '/'
    
    if output_dir is None: 
        output_dir = input_dir
    elif not output_dir.endswith('/'):
        output_dir = output_dir + '/'
    
    # A list of (filename, filepath) tuples made up of all vp2 files in the specified directory
    vp2files = [(file.removesuffix('.vp2'), f"{input_dir}{file}") for file in os.listdir(input_dir) if file.endswith('.vp2')]
    
    # We define these here as they are shared amongst all files
    all_data = {}
    summary = []
    all_data_size = 0
    
    # Iterate though all files
    for filename, filepath in vp2files:
        with open(filepath, 'r') as csv_file:
            reader = csv.reader(csv_file, delimiter='\t')
    
            # This reads the HEADER section
            header_metadata = {} # We define this here so its reset for everyfile
            for header_row in reader:
                if len(header_row) >= 1 and header_row[0] == '[HEADER]':
                    for row in reader:
                        if len(row) == 0 or row[0] == '':
                            break # end of section
                        else:
                            k, v = row[0].split('=', 1)
                            header_metadata[k] = v
    
                # This reads the DATA section
                if len(header_row) >= 1 and header_row[0] == '[DATA]':
                    column_titles = [c.strip() for c in next(reader)] # First row is the column titles
                    column_units = [c.strip() for c in next(reader)] # Second row is the column units
    
                    # Here we create a column label that consists of the title and unit, if there is one
                    column_labels = column_titles[:] # creates a copy of the list
                    for i, unit in enumerate(column_units):
                        if unit != '':
                            column_labels[i] += f' [{unit}]'
                    
                    # Create a empty dictionary to store the data in
                    data = {label: [] for label in column_labels}
    
                    # Add the metadata columns we want to include
                    metadata_labels = metadata_to_include[:]
                    for c, metadata_column in enumerate(metadata_to_include):
                        # Add unit to labels
                        if metadata_units[c] != '':
                            metadata_labels[c] += f' [{metadata_units[c]}]'
                        
                        
                        data[metadata_labels[c]] = []
    
                    # Here we read all the data, row by row
                    number_of_rows = 0
                    for row in reader:
                        if len(row) == 0 or row[0] == '':
                            break # end of section
                        else:
                            number_of_rows += 1
                            # Read all the values in the row and add it to the corresponding column
                            for c, title in enumerate(column_titles):
                                if c < len(row):
                                    data[column_labels[c]].append(row[c].strip())
                                else:
                                    # In case there is data missing we dont get an error
                                    data[column_labels[c]].append('')
    
                            # The metadata is constant so we just add the static value to each row
                            for c, metadata_column in enumerate(metadata_to_include):
                                data[metadata_labels[c]].append(header_metadata.get(metadata_column, ''))
                                
            # Finished reading the file
            
            # Save data to file
            output_filepath = output_dir+data_filename.format(voyage_id = voyage_id, filename=filename)
            save_data(data, output_filepath)
            
    
            # Update all_data with data from this file
            for label in data:
                if label not in all_data:
                    # If label doesnt exist in all_data then add it and fill it with empty values
                    # This is incase the columns are not the same in all files
                    all_data[label] = ['' for i in range(all_data_size)]
                
                all_data[label].extend(data[label])
    
            # Again this is a precaution in case the columns are not the same in all files
            # Makes sure that the size of everything in all_data is the same
            for label in all_data:
                if label not in data:
                    all_data[label].extend(['' for i in range(number_of_rows)])
    
            all_data_size += number_of_rows

            # Parse date
            date, time = header_metadata.get('DateStartTime', 'YYYY/MM/DD H:M:S').split()
            year, month, day = date.split('/')
            month = month_names.get(month, month)
            
            # Update summary
            summary.append([])
            summary[-1].append('') # Benthic Photo Name
            summary[-1].append(filename) # Filename
            summary[-1].append(f'{voyage_id} - {voyage_type}') # Voyage Name
            summary[-1].append(f'{year}-{month}-{day}') # Date (YEAR-MONTH-DAY)
            summary[-1].append(time) # Time (UTC)
            summary[-1].append('') # Location
            summary[-1].append('') # Dive Number
            summary[-1].append(header_metadata.get('Latitude', '')) # Latitude (DD)
            summary[-1].append(header_metadata.get('Longitude', '')) # Longitude (DD)
            summary[-1].append(header_metadata.get('MaxDepthM', '')) # Maximum Depth (m)
            summary[-1].append('') # Mode
            summary[-1].append('') # Comments

    # Save all_data to file
    output_filepath = output_dir+all_data_filename.format(voyage_id = voyage_id, filename='filename')
    save_data(all_data, output_filepath)
    
    # copy summary to clipboard
    clipboard_text = '\n'.join(['\t'.join(summary[i]) for i in range(len(summary))])
    pyperclip.copy(clipboard_text)
    print('Summary copied to clipboard')
    print('All Done!')

###############
### Execute ###
###############

# If you place the code above in a seperate file then uncomment the following line
# from <pythonfilename> import process_vp2

process_vp2(voyage_id, voyage_type, 
            input_dir, output_dir, 
            data_filename, all_data_filename, 
            metadata_to_include, metadata_units)

Saving data: ./VL_92775_250209114751_ODV.csv
Saving data: ./VL_92775_250209114751 Duplicate_ODV.csv
Saving data: ./XPO250205_all_ODV.csv
Summary copied to clipboard
All Done!
