# Ocean Data Processing

In [122]:
#IMPORT REQUIRED MODULES
import os
import xarray as xr
import pandas as pd
from datetime import datetime, timedelta
from collections import defaultdict
import numpy as np
import json
import math
import gsw # Gibbs-SeaWater (GSW) Oceanographic Toolbox 

## OMG ALAMO Floats (F9250 & F9313)

The following code processes the data recorded by two OMG ALAMO floats positioned within Disko Bay, F9250 & F9313 (https://podaac.jpl.nasa.gov/dataset/OMG_L1_FLOAT_ALAMO). The data are provided as JSON files - to begin, the number of JSON files provided within each directory is printed. The JSON files are then processed in order to sample oceanic variables at a specified depth. For the purposes of this study, oceanic variables were sampled at a depth of 240M within Disko Bay. Variables such as temperature, pressure and depth are extracted, with the date, longitude and latitude of each dive also recorded from the 'DiveStart' metadata. It should be noted that depth is not measured directly and is therefore calculated from pressure, using a package from the Gibbs-SeaWater (GSW) Oceanographic Toolbox (https://www.teos-10.org/software.htm#1). 

In [119]:
# Define a function to calculate the number of JSON files within a specified directory. 
def count_json_files(directory):
    file_count = 0
    for (dir_path, _, file_names) in os.walk(os.path.normpath(directory)):
        for file in file_names:
            if file.endswith(".json"):
                file_count += 1
    return file_count

ALAMO_F9250_directory = 'R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/OMG_ALAMO_F9250/'
ALAMO_F9313_directory = 'R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/OMG_ALAMO_F9313/'
print(f"Number of JSON files in ALAMO_F9250 directory: {count_json_files(ALAMO_F9250_directory)}")
print(f"Number of JSON files in ALAMO_F9313 directory: {count_json_files(ALAMO_F9313_directory)}")

Number of JSON files in ALAMO_F9250 directory: 90
Number of JSON files in ALAMO_F9313 directory: 94


In [123]:
# SPECIFY THE INPUT DIRECTORY, DESIRED SAMPLING DEPTH AND OUTPUT FILENAME.
directory = ALAMO_F9250_directory
desired_depth = 240
output_filename = 'ALAMO_F9250_240m.csv'

# Initialize an empty DataFrame to store the combined data.
combined_df = pd.DataFrame(columns=['date', 'lon', 'lat','temperature', 'salinity', 'pressure', 'depth'])

# Within the specified directory, open each JSON file in read mode. 
for filename in os.listdir(directory):
    if filename.endswith(".json"):
        json_path = os.path.join(directory, filename)
        with open(json_path, 'r') as openfile:
            CTD_data = json.load(openfile)
        CTD_data_dict = CTD_data[0]
        
        # Extract the longitude and latitude from the 'DiveStart'. If the JSON file doesn't have any lon or lat values, the file is skipped. 
        lat = CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('lat', None)
        lon = CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('lon', None)
        if lat == 0 or lon == 0:
            continue

        # Define the temperature, salinity and pressure keys within the CTD data dictionary.
        temperature = CTD_data_dict.get('dives', [])[0].get('science', {}).get('ascending', {}).get('binned', {}).get('temperature', [])
        salinity = CTD_data_dict.get('dives', [])[0].get('science', {}).get('ascending', {}).get('binned', {}).get('salinity', [])
        pressure = np.array(CTD_data_dict.get('dives', [])[0].get('science', {}).get('ascending', {}).get('binned', {}).get('pressure', []))

        # Check that the temperature and pressure variables are the same length (i.e. no missing values)
        if len(temperature) == len(pressure) > 0:

            # Calculate depth based on pressure, using the gsw package.
            latitude = 69.2
            depth = -1 * gsw.z_from_p(pressure, latitude)

            # Create a dictionary with the keys for each desired variable. Convert this dictionary to a pandas DataFrame.
            data_dict = {'temperature': temperature, 'salinity': salinity, 'pressure': pressure, 'depth': depth,
                         'lon': [CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('lon', None)] * len(temperature),
                         'lat': [CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('lat', None)] * len(temperature),
                         'date': [CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('datetime', None)] * len(temperature)}
            df = pd.DataFrame(data_dict)

            # Extract the row of data sampled closest to the specified desired depth.
            closest_row = df.iloc[(df['depth'] - desired_depth).abs().argsort()[:1]]
            combined_df = pd.concat([combined_df, closest_row], ignore_index=True)

# Convert the 'date' column to the desired format
combined_df['date'] = pd.to_datetime(combined_df['date']).dt.strftime('%d/%m/%Y')
output_csv = os.path.join(directory, output_filename)  # Use the predefined variable for the output filename

# Save the combined DataFrame to an output CSV. 
combined_df.to_csv(output_csv, index=False)
print(f"Sampled data saved to {output_csv}")

Sampled data saved to R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/OMG_ALAMO_F9250/ALAMO_F9250_240m.csv


## OMG APEX Float (F9184)

## Greenland Ecosystem Monitoring 