In [None]:
import os
import pickle
import pandas as pd

# Import necessary pyologger utilities
from pyologger.utils.config_manager import ConfigManager
from pyologger.load_data.datareader import DataReader
from pyologger.load_data.metadata import Metadata
from pyologger.plot_data.plotter import *
from pyologger.process_data.sampling import upsample
from pyologger.calibrate_data.tag2animal import *
from pyologger.calibrate_data.zoc import *
from pyologger.plot_data.plotter import plot_depth_correction

# Change the current working directory to the root directory
# os.chdir("/Users/fbar/Documents/GitHub/pyologger")
os.chdir("/Users/jessiekb/Documents/GitHub/pyologger")

root_dir = os.getcwd()
data_dir = os.path.join(root_dir, "data")
color_mapping_path = os.path.join(root_dir, "color_mappings.json")

# Verify the current working directory
print(f"Current working directory: {root_dir}")

In [None]:
# Initialize the info class
metadata = Metadata()
metadata.fetch_databases(verbose=False)

# Save databases
deployment_db = metadata.get_metadata("deployment_DB")
logger_db = metadata.get_metadata("logger_DB")
recording_db = metadata.get_metadata("recording_DB")
animal_db = metadata.get_metadata("animal_DB")

# Assuming you have the metadata and deployment_db loaded:
datareader = DataReader()
deployment_folder, deployment_id = datareader.check_deployment_folder(deployment_db, data_dir)
config_manager = ConfigManager(deployment_folder=deployment_folder, deployment_id=deployment_id)

## Upload data to DiveDB

Make sure your local DiveDB servers are running. To do so:
- Navigate to the DiveDB directory
- Run the command: `make up`
- Wait until all services are running (Django, Postgres, Jupyter)
- Make sure you have run the latest migrations: `make migrate`
- Make sure you imported the latest logger and animal databases: `make importmetadata`

Then, you're ready to upload data!

In [None]:
# Allow Django to run with async unsafe to run outside of Django server
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

from DiveDB.services.data_uploader import DataUploader

data_uploader = DataUploader()

metadata = {
    "animal": datareader.animal_info["Animal ID"],
    "deployment": datareader.deployment_info["Deployment ID"],
    "recording": datareader.deployment_info["Recording ID"].split(", ")[1]
}

data_uploader.upload_netcdf('./data/2024-01-16_oror-002a/outputs/deployment_data.nc', metadata=metadata)

In [None]:
from DiveDB.services.duck_pond import DuckPond

duckpond = DuckPond()

df = duckpond.get_delta_data(    
    signal_names=["sensor_data_ecg", "sensor_data_light", "sensor_data_temperature", "sensor_data_depth"],
    animal_ids="mian-001", # Make sure this matches the animal ID you uploaded
    frequency=100,
)

display(df)

## In progress: export to EDF

Ideally, this would let you maintain the varied sampling frequencies from sensor_data and take the metadata from sensor_info.

In [None]:
import mne

def export_concatenated_to_edf(concatenated_df, highest_sampling_frequency, latest_start_time, edf_filename_template):
    """
    Exports the concatenated DataFrame to an EDF file.

    Parameters:
    - concatenated_df: The DataFrame containing concatenated data from all loggers.
    - highest_sampling_frequency: The highest sampling frequency among the loggers.
    - latest_start_time: The latest start time among the loggers.
    - edf_filename_template: Template string for the EDF filename.
                             The string should contain `{sensor}` to be replaced with 'ALL'.
    """
    if concatenated_df is None or concatenated_df.empty:
        print("No data available for export. Exiting.")
        return

    ch_names = concatenated_df.columns.tolist()
    sfreq = highest_sampling_frequency

    # Check if there are any channels to process
    if len(ch_names) == 0:
        print("No valid channels found to export. Exiting.")
        return

    info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types='misc')  # Adjust ch_types as necessary

    # Convert datetime to (seconds, microseconds) tuple for the latest start time
    meas_date = (int(latest_start_time.timestamp()), int((latest_start_time.timestamp() % 1) * 1e6))

    # Create MNE RawArray
    data = concatenated_df.values.T
    raw = mne.io.RawArray(data, info)
    raw.set_meas_date(meas_date)

    # Step 8: Define the EDF filename and save the EDF file
    edf_filename = edf_filename_template.format(sensor='ALL')

    print(f"Saving EDF file as {edf_filename} with shape {data.shape}.")

    # Ensure that data is within the physical range EDF expects
    raw.export(edf_filename, fmt='edf')

    print(f"EDF file saved as {edf_filename}")


In [None]:
from pyologger.process_data.sampling import *
import pandas as pd
def concatenate_logger_data(datareader):
    """
    Concatenates data from all loggers stored in `datareader.data`.

    Parameters:
    - datareader: The DataReader object containing logger data in `datareader.data`.

    Returns:
    - concatenated_df: A pandas DataFrame with the concatenated data from all loggers.
    - highest_sampling_frequency: The highest sampling frequency found among the loggers.
    - latest_start_time: The latest start time among the loggers.
    """
    logger_data_info = {}

    # Step 1: Extract start time, end time, and sampling frequency for each logger
    for logger_id, df in datareader.data.items():
        if not isinstance(df, pd.DataFrame):
            print(f"Logger {logger_id} does not contain a valid DataFrame. Skipping.")
            continue

        if 'datetime' not in df.columns:
            print(f"Logger {logger_id} does not have a 'datetime' column. Skipping.")
            continue

        start_time = df['datetime'].iloc[0]
        end_time = df['datetime'].iloc[-1]
        sampling_frequency = round(1 / df['datetime'].diff().dt.total_seconds().mean())

        logger_data_info[logger_id] = {
            'start_time': start_time,
            'end_time': end_time,
            'sampling_frequency': sampling_frequency
        }

        print(f"Logger {logger_id}: start_time={start_time}, end_time={end_time}, sampling_frequency={sampling_frequency} Hz")

    if not logger_data_info:
        print("No valid logger data found. Exiting.")
        return None, None, None

    # Step 2: Determine the latest start time, earliest end time, and highest sampling frequency
    latest_start_time = max(info['start_time'] for info in logger_data_info.values())
    earliest_end_time = min(info['end_time'] for info in logger_data_info.values())
    highest_sampling_frequency = max(info['sampling_frequency'] for info in logger_data_info.values())

    print(f"Latest start time: {latest_start_time}")
    print(f"Earliest end time: {earliest_end_time}")
    print(f"Highest sampling frequency: {highest_sampling_frequency} Hz")

    # Step 3: Initialize an empty DataFrame for concatenation
    concatenated_df = pd.DataFrame()

    # Step 4: Crop dataframes, upsample as necessary, and concatenate
    for logger_id, df in datareader.data.items():
        if not isinstance(df, pd.DataFrame):
            continue

        # Crop dataframe
        df_cropped = df[(df['datetime'] >= latest_start_time) & (df['datetime'] <= earliest_end_time)]
        print(f"Logger {logger_id}: Cropped data from {len(df)} rows to {len(df_cropped)} rows.")

        # Determine upsampling factor
        upsampling_factor = highest_sampling_frequency / logger_data_info[logger_id]['sampling_frequency']

        if upsampling_factor > 1:
            original_length = len(df_cropped)
            df_cropped = df_cropped.set_index('datetime')

            # Upsample each sensor column that is not "extra"
            for column in df_cropped.columns:
                sensor_info = None

                # Search for the sensor type in `datareader.sensor_info`
                for sensor_name, sensor_details in datareader.sensor_info.items():
                    if column in sensor_details['channels']:
                        sensor_info = sensor_details
                        break

                if not sensor_info:
                    continue

                sensor_type = sensor_info['metadata'][column]['sensor']
                if sensor_type != 'extra':
                    print(f"Upsampling column {column} from logger {logger_id} by factor {upsampling_factor}.")
                    df_cropped[column] = upsample(df_cropped[column].values, int(upsampling_factor), original_length)

            df_cropped = df_cropped.reset_index()

        # Remove "extra" sensor columns and append to the concatenated DataFrame
        columns_to_keep = []
        for column in df_cropped.columns:
            sensor_info = None

#datareader.sensor_data['accelerometer']
#datareader.files_info