# Zero offset correction: calibrate pressure sensor

## Load and inspect data
Load pickle file and inspect contents

In [None]:
import os
import pickle

# Import necessary pyologger utilities
from pyologger.load_data.datareader import DataReader
from pyologger.load_data.metadata import Metadata
from pyologger.plot_data.plotter import *
from pyologger.process_data.sampling import upsample
from pyologger.calibrate_data.zoc import *
from pyologger.plot_data.plotter import plot_depth_correction

# Change the current working directory to the root directory
# os.chdir("/Users/fbar/Documents/GitHub/pyologger")
os.chdir("/Users/jessiekb/Documents/GitHub/pyologger")

root_dir = os.getcwd()
data_dir = os.path.join(root_dir, "data")

# Verify the current working directory
print(f"Current working directory: {root_dir}")

In [None]:
# Initialize the info class
metadata = Metadata()
metadata.fetch_databases(verbose=False)

# Save databases
deployment_db = metadata.get_metadata("deployment_DB")
logger_db = metadata.get_metadata("logger_DB")
recording_db = metadata.get_metadata("recording_DB")
animal_db = metadata.get_metadata("animal_DB")

# Assuming you have the metadata and deployment_db loaded:
datareader = DataReader()
deployment_folder = datareader.check_deployment_folder(deployment_db, data_dir)

In [None]:
# Load the data_reader object from the pickle file
pkl_path = os.path.join(deployment_folder, 'outputs', 'data.pkl')

with open(pkl_path, 'rb') as file:
    data_pkl = pickle.load(file)

for logger_id, info in data_pkl.logger_info.items():
    sampling_frequency = info.get('datetime_metadata', {}).get('fs', None)
    if sampling_frequency is not None:
        # Format the sampling frequency to 5 significant digits
        print(f"Sampling frequency for {logger_id}: {sampling_frequency} Hz")
    else:
        print(f"No sampling frequency available for {logger_id}")

## Find dives
Involves a zero offset correction with `zoc`

In [None]:
?smooth_downsample_derivative

In [None]:
data_pkl.sensor_info['pressure']


In [None]:
data_pkl.logger_data['CC-35']

In [None]:
data_pkl.sensor_data['pressure']

In [None]:
# Load the depth and temperature data
depth_data = data_pkl.sensor_data['pressure']['pressure']
depth_datetime = data_pkl.sensor_data['pressure']['datetime']
depth_fs = data_pkl.sensor_info['pressure']['sampling_frequency']
temp_data = data_pkl.sensor_data['temperature']['temp']
temp_fs = data_pkl.sensor_info['temperature']['sampling_frequency']

# Sidebar for parameters
first_deriv_threshold = 0.1 # meters
min_duration = 30 # seconds
depth_threshold = 5 # meters
apply_temp_correction = False

# Dive detection parameters
min_depth_threshold = 1.0
dive_duration_threshold = 10
smoothing_window = 5
downsampled_sampling_rate = 1

# Process depth data
first_derivative, downsampled_depth = smooth_downsample_derivative(
    depth_data, 
    original_sampling_rate=depth_fs, 
    downsampled_sampling_rate=downsampled_sampling_rate)

# Create the transformation log using values
transformation_log = [
    f"downsampled_{downsampled_sampling_rate}Hz",
    f"smoothed_{smoothing_window}s"
]

# Detect flat chunks (potential surface intervals)
flat_chunks = detect_flat_chunks(
    depth=downsampled_depth, 
    datetime_data=depth_datetime[::int(depth_fs/downsampled_sampling_rate)],  # Adjust datetime data to match downsampled depth
    first_derivative=first_derivative, # first derivative data
    threshold=first_deriv_threshold, # first derivative threshold
    min_duration=min_duration, # minimum surface duration
    depth_threshold=depth_threshold, 
    original_sampling_rate=depth_fs, 
    downsampled_sampling_rate=downsampled_sampling_rate
)
num_flat_chunks = len(flat_chunks)
print(f"Number of potential surface intervals detected: {num_flat_chunks}")

# Apply zero offset correction
corrected_depth_temp, corrected_depth_no_temp, depth_correction = apply_zero_offset_correction(
    depth=downsampled_depth, 
    temp=temp_data.values if temp_data is not None else None, 
    flat_chunks=flat_chunks
)

# Upsample and adjust the corrected depths to match original sampling rate
upsampling_factor = int(depth_fs / 1)
repeated_corrected_depth_temp = upsample(corrected_depth_temp, upsampling_factor, len(depth_data))
repeated_corrected_depth_no_temp = upsample(corrected_depth_no_temp, upsampling_factor, len(depth_data))

# Detect dives in the corrected depth data
dives = find_dives(
    depth_series=repeated_corrected_depth_no_temp,
    datetime_data=depth_datetime,
    min_depth_threshold=min_depth_threshold,
    sampling_rate=depth_fs,
    duration_threshold=dive_duration_threshold,
    smoothing_window=smoothing_window
)
num_dives = len(dives)
print(f"Number of dives detected: {num_dives}")

# Append the additional transformations
transformation_log += [
    f"ZOC_settings__first_deriv_threshold_{first_deriv_threshold}mps__minimum_duration_for_zoc_{min_duration}s__max_depth_for_surface_interval_{depth_threshold}m",
    f"DIVE_detection_settings__min_depth_threshold_{min_depth_threshold}m__dive_duration_threshol_{dive_duration_threshold}s__smoothing_window_{smoothing_window}"
]

transformation_log

### Plot data

In [None]:
upsampling_factor

In [None]:
?plot_depth_correction

In [None]:
# Plotting
dec_factor = int(upsampling_factor)*10

fig = plot_depth_correction(depth_datetime, dec_factor, depth_data, first_derivative, 
                            repeated_corrected_depth_temp, repeated_corrected_depth_no_temp, 
                            depth_correction, dives, flat_chunks, temp_data, apply_temp_correction)
fig.show()


### Save data

In [None]:
dives

In [10]:
# Calculate dive duration in seconds
dives['dive_duration'] = (dives['end_time'] - dives['start_time']).dt.total_seconds()
event_data = data_pkl.event_data

# Function to create the event DataFrame
def create_dive_events(dives):
    # Generate the event DataFrame in one go
    events_df = pd.DataFrame({
        'date': dives['start_time'].dt.floor('D').dt.strftime('%Y-%m-%d %H:%M:%S'),
        'time': dives['start_time'].dt.strftime('%H:%M:%S.%f').str[:-3],
        'value': dives['max_depth'],
        'type': 'state',
        'key': 'dive',
        'duration': dives['dive_duration'],
        'short_description': 'dive_start',
        'long_description': 'NaN',  # Optional field, left blank
        'datetime': dives['start_time']
    })
    
    return events_df

# Generate the events from dives
new_events = create_dive_events(dives)

# Check if event_data is empty or not a DataFrame
if isinstance(event_data, dict) or event_data.empty:
    # Initialize event_data as new_events if empty
    event_data = new_events
else:
    # Concatenate the new events with the existing event_data
    event_data = pd.concat([event_data, new_events], ignore_index=True)

event_data
data_pkl.event_data = event_data


In [None]:
event_data

In [12]:
event_info = []
unique_keys = event_data['key'].unique()

# Append unique keys to event_info, avoiding duplicates
for key in unique_keys:
    if key not in event_info:
        event_info.append(key)

data_pkl.event_info = event_info

In [None]:
data_pkl.event_data

## Save data to pickle

In [None]:
# Create the derived_from_sensors list
derived_from_sensors = ["pressure"]

# Save the corrected depth back to the data structure
if apply_temp_correction:
    depth_df = pd.DataFrame({
        'datetime': depth_datetime,
        'depth': repeated_corrected_depth_temp
    })
    derived_info = {
        "channels": ["depth"],
        "metadata": {
             'depth': {'original_name': 'Temp-corrected Depth (m)',
                       'unit': 'm',
                       'sensor': 'pressure'}
        },
        "derived_from_sensors": derived_from_sensors.append("temperature"),
        "transformation_log": transformation_log.append("temperature_correction")
    }
else:
    depth_df = pd.DataFrame({
        'datetime': depth_datetime,
        'depth': repeated_corrected_depth_no_temp
    })
    derived_info = {
        "channels": ["depth"],
        "metadata": {
             'depth': {'original_name': 'Depth (m)',
                       'unit': 'm',
                       'sensor': 'pressure'}
        },
        "derived_from_sensors": derived_from_sensors,
        "transformation_log": transformation_log
    }

data_pkl.derived_data['depth'] = depth_df
data_pkl.derived_info['depth'] = derived_info

# Dynamically generate and print the statement
print(f"`{', '.join(derived_from_sensors)}` sensor data was transformed into derived data `depth` by applying these transformations: {', '.join(transformation_log)}")

with open(pkl_path, 'wb') as file:
        pickle.dump(data_pkl, file)

In [None]:
data_pkl.save_to_netcdf('data/2019-11-08_apfo-001a/outputs/deployment_data_processed.nc')