In [32]:
# IMPORT REQUIRED MODULES
import pandas as pd
import json
import numpy as np
import xarray as xr
import datetime
import os
import glob
import gsw
import gsw.density as gsw_density

## OMG ALAMO Floats (F9250 & F9313)

The ALAMO F9250 & F9313 datasets are provided as JSON files. To begin, the number of JSON files in each directory is printed.

In [22]:
# Calculate the number of JSON files within a specified directory
def count_json_files(directory):
    file_count = 0
    for (dir_path, _, file_names) in os.walk(os.path.normpath(directory)):
        for file in file_names:
            if file.endswith(".json"):
                file_count += 1
    return file_count

ALAMO_F9250_directory = 'R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/data/omg_alamo_f9250/'
ALAMO_F9313_directory = 'R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/data/omg_alamo_f9313/'
print(f"Number of JSON files in ALAMO_F9250 directory: {count_json_files(ALAMO_F9250_directory)}")
print(f"Number of JSON files in ALAMO_F9313 directory: {count_json_files(ALAMO_F9313_directory)}")

Number of JSON files in ALAMO_F9250 directory: 90
Number of JSON files in ALAMO_F9313 directory: 94


In the following code block, specify the directory to be used (i.e. either ALAMO_F9250_directory or ALAMO_F9313_directory), and the desired csv output filename.

The potential density range from which data is selected is based upon the potential density values provided by Gladish et al. (2015a) - i.e. between 27.20 and 27.31 kg/m3.

In [23]:
# Specify the input directory, desired potential density (sigma_theta) range, and output filename. 
directory = ALAMO_F9250_directory
output_filename = 'R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/data/omg_alamo_f9250/f9250_pd_27.10_27.31.csv'
column_order = ['date', 'lon', 'lat', 'temperature', 'salinity', 'pressure', 'depth', 'potential_density', 'sigma_theta']
desired_sigma_theta_min = 27.20
desired_sigma_theta_max = 27.31

The following code block extracts all data within the desired potential density range, outputting the results to a csv. 

Note that both depth is calculated from pressure, and potential density is calculated using salinity and temperature, using the gsw package from the Gibbs-SeaWater (GSW) Oceanographic Toolbox (https://www.teos-10.org/software.htm#1). 

In [11]:
# Initialize an empty DataFrame to store the combined data.
dfs = []

# Within the specified directory, open each JSON file in read mode. 
for filename in os.listdir(directory):
    if filename.endswith(".json"):
        json_path = os.path.join(directory, filename)
        with open(json_path, 'r') as openfile:
            CTD_data = json.load(openfile)
        CTD_data_dict = CTD_data[0]

        # Extract the longitude and latitude from the 'DiveStart'. If the JSON file doesn't have any lon or lat values, the file is skipped. 
        lat = CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('lat', None)
        lon = CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('lon', None)
        if lat == 0 or lon == 0:
            continue

        # Define the temperature, salinity and pressure keys within the CTD data dictionary.
        temperature = CTD_data_dict.get('dives', [])[0].get('science', {}).get('ascending', {}).get('binned', {}).get('temperature', [])
        salinity = CTD_data_dict.get('dives', [])[0].get('science', {}).get('ascending', {}).get('binned', {}).get('salinity', [])
        pressure = np.array(CTD_data_dict.get('dives', [])[0].get('science', {}).get('ascending', {}).get('binned', {}).get('pressure', []))

        # Check that the temperature and pressure variables are the same length (i.e. no missing values)
        if len(temperature) == len(pressure) > 0:

            # Using the gsw package, calculate (i) depth from pressure, and (ii) potential density from salinity and temperature.
            latitude = 69.2
            depth = -1 * gsw.z_from_p(pressure, latitude)
            potential_density = gsw.density.rho(salinity, temperature, pressure)
            sigma_theta = potential_density - 1000

            # Create a dictionary with the keys for each desired variable. Convert this dictionary to a pandas DataFrame.
            data_dict = {'temperature': temperature, 'salinity': salinity, 'pressure': pressure, 'depth': depth, 'potential_density': potential_density, 'sigma_theta': sigma_theta,
                         'lon': [CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('lon', None)] * len(temperature),
                         'lat': [CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('lat', None)] * len(temperature),
                         'date': [CTD_data_dict.get('dives', [])[0].get('trajectory', {}).get('gps', [])[0].get('datetime', None)] * len(temperature)}
            df = pd.DataFrame(data_dict)

            # Filter rows based on the desired range of potential density.
            filtered_df = df[(df['sigma_theta'] >= desired_sigma_theta_min) & (df['sigma_theta'] <= desired_sigma_theta_max)]

            # Append the filtered data to the combined DataFrame
            dfs.append(filtered_df)

# Save the combined DataFrame to a CSV file
combined_df = pd.concat(dfs, ignore_index=True)            
combined_df.to_csv(output_filename, index=False, columns=column_order)
print(f'Data with potential density in the desired range saved to {output_filename}')

Data with potential density in the desired range saved to R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/data/omg_alamo_f9313/f9313_pd_27.10_27.31.csv


## Greenland Ecosystem Monitoring 

The GEM Database was searched using ‘MarineBasis Disko’ - ‘Water Column’ - ‘CTD Measurments’ - ‘2018-01-01 to 2022-12-31’. This csv was downloaded, being named as 'ctd_download_disko_bay.csv'. 

The following block of code extracts all data within the specified potential density range, outputting the results to a csv named 'ctd_filtered_disko_bay'.

In [31]:
# Open the GEM CTD data provided for Disko Bay between 2018 and 2023. 
gem_data = 'R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/data/gem_ctd_disko_bay/ctd_download_disko_bay.csv'
df = pd.read_csv(gem_data, encoding='ISO-8859-1')

# Filter the GEM CTD data within the specified potential density range.
desired_pd_min = 1027.20
desired_pd_max = 1027.31
filtered_df = df[(df['Density (kg/m^3)'] >= desired_pd_min) & (df['Density (kg/m^3)'] <= desired_pd_max)]

# Export each profile, defined by date, to a CSV.
grouped = filtered_df.groupby('Date')
output_folder = 'R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/data/gem_ctd_disko_bay/individual_profiles/'
for date, group in grouped:
    formatted_date = date.replace('/', '-')
    output_folder_date = os.path.join(output_folder, f'GEM_CSV_profile_{formatted_date}')
    output_filename = os.path.join(output_folder, f'GEM_CSV_profile_{formatted_date}.csv')
    group.to_csv(output_filename, index=False)

# Output and save a combined csv of all profiles filtered between the desired potential density threshold. 
csv_files = [file for file in os.listdir(output_folder) if file.endswith('.csv')]
combined_df = pd.DataFrame()
for csv_file in csv_files:
    file_path = os.path.join(output_folder, csv_file)
    df = pd.read_csv(file_path, encoding='ISO-8859-1')  
    combined_df = pd.concat([combined_df, df], ignore_index=True)
output_file = 'R:/JAKOBSHAVN/CODE/github/jakobshavn_isbrae/data/gem_ctd_disko_bay/ctd_filtered_disko_bay.csv'
combined_df.to_csv(output_file, index=False)