In [None]:
import os
import sys
import numpy as np
import pandas as pd
import pytz
from notion_client import Client
from dotenv import load_dotenv
from datareader import DataReader
from metadata import Metadata
import plotly.express as px
import pickle
import nbformat
print(nbformat.__version__)


In [None]:
# Initialize the Metadata class
metadata = Metadata()
metadata.fetch_databases()

In [None]:
# Change the current working directory to the root directory
os.chdir("/Users/jessiekb/Documents/GitHub/Finescale-HR")

# Verify the current working directory
print(f"Current working directory: {os.getcwd()}")

In [None]:
# Get the list of CSV files in the outputs/ folder that match the specified criteria
RecID = '2024-06-17_oror-002-001a_CO-68'
folder_path = os.path.join(os.getcwd(), "outputs")

# List and sort the CSV files
csv_files = sorted([file for file in os.listdir(folder_path) if file.endswith('.csv') and file.startswith(RecID)], key=lambda x: int(x.split('_')[-1].split('.')[0]))
print(csv_files)

# Initialize an empty list to store the dataframes
dfs = []

# Read and append each CSV file to the list
for file in csv_files:
    df = pd.read_csv(os.path.join(folder_path, file))
    dfs.append(df)

# Concatenate all the dataframes in the list
final_df = pd.concat(dfs, ignore_index=True)

# Print the final dataframe
print(final_df)


In [None]:
# Modify the code to handle spaces in column names



In [None]:
# Function to read and concatenate CSV files in order
def read_and_concatenate_csvs(folder_path, RecID):
    # List and sort the CSV files
    csv_files = sorted([file for file in os.listdir(folder_path) if file.endswith('.csv') and file.startswith(RecID)], key=lambda x: int(x.split('_')[-1].split('.')[0]))

    # Concatenate dataframes
    dfs = [pd.read_csv(os.path.join(folder_path, file)) for file in csv_files]
    return pd.concat(dfs, ignore_index=True)

# Path and RecID
folder_path = os.path.join(os.getcwd(), "outputs")
RecID = '2024-06-17_oror-002-001a_CO-68'

# Read and concatenate data
final_df = read_and_concatenate_csvs(folder_path, RecID)

# Get datetime

final_df['datetime'] = pd.to_datetime(final_df[" Date (local)"] + ' ' + final_df[" Time (local)"], format='%d.%m.%Y %H:%M:%S.%f')
final_df['datetime'] = final_df['datetime'].dt.tz_localize(pytz.timezone('America/Los_Angeles'))
print(final_df['datetime'][0])

# Calculate time differences and cumulative sum of differences
sec_diff = final_df['datetime'].diff().dt.total_seconds()
final_df['cum_diff'] = np.cumsum(sec_diff)

# Check for inconsistencies (time jumps)
mean_diff = sec_diff.mean()
time_jumps = sec_diff[sec_diff > mean_diff * 2]  # Define a threshold for time jumps

# Report any inconsistencies
if not time_jumps.empty:
    print(f"Time jumps detected:\n{time_jumps}")
else:
    print("No significant time jumps detected.")
    print(f"Sampling frequency: {1 / mean_diff} Hz")

# Plot cumulative differences
plt.plot(final_df['datetime'], final_df['cum_diff'])
plt.xlabel('Time')
plt.ylabel('Cumulative Difference (seconds)')
plt.title('Cumulative Difference over Time')
plt.show()

In [None]:
# Plot prep
CO_df = final_df 

print(CO_df['datetime'][1]-CO_df['datetime'][0])
CO_fs = 1/(CO_df['datetime'][1]-CO_df['datetime'][0]).total_seconds()
CO_max_timediff = np.max(np.diff(CO_df['datetime']))
print(f"CATS Sampling frequency: {CO_fs} Hz with a maximum time difference of {CO_max_timediff}")

# Load the data_reader object from the pickle file
with open('outputs/data_reader.pkl', 'rb') as file:
    data_reader = pickle.load(file)

# Get the ECG and timestamp data
ecg_df = data_reader.data_raw['2024-06-17_oror-002-001a_UF-04_001']
ecg_df['datetime'] = pd.to_datetime(ecg_df['timestamp'])
ecg_df['datetime'] = ecg_df['datetime'].dt.tz_localize(pytz.timezone('America/Los_Angeles'))
print(ecg_df['datetime'][0])
print(ecg_df)

print(ecg_df['datetime'][1]-ecg_df['datetime'][0])
ecg_fs = 1/(ecg_df['datetime'][1]-ecg_df['datetime'][0]).total_seconds()
ecg_max_timediff = np.max(np.diff(ecg_df['datetime']))
print(f"ECG Sampling frequency: {ecg_fs} Hz with a maximum time difference of {ecg_max_timediff}")

In [None]:
new_sampling_rate = 10
ecg_conversion = int(ecg_fs / new_sampling_rate)
CATS_conversion = int(CO_fs / new_sampling_rate)

ecg_df10 = ecg_df.iloc[::ecg_conversion, :] # To subsample from 400Hz to 10Hz (1 out of every 40 samples)
CO_df10 = CO_df.iloc[::CATS_conversion, :] # To subsample from 400Hz to 10Hz (1 out of every 40 samples)

import matplotlib.pyplot as plt

fig, axs = plt.subplots(5, 1, figsize=(10, 10))

axs[0].plot(CO_df10['datetime'], CO_df10['Accelerometer X [m/s²]'])
axs[0].set_ylabel('Accelerometer X [m/s²]')

axs[1].plot(CO_df10['datetime'], CO_df10['Accelerometer Y [m/s²]'])
axs[1].set_ylabel('Accelerometer Y [m/s²]')

axs[2].plot(CO_df10['datetime'], CO_df10['Accelerometer Z [m/s²]'])
axs[2].set_ylabel('Accelerometer Z [m/s²]')

axs[3].plot(CO_df10['datetime'], CO_df10['Depth (100bar) [m]'])
axs[3].set_ylabel('Depth (100bar) [m]')

axs[4].plot(ecg_df10['datetime'], ecg_df10['ecg'])
axs[4].set_ylabel('ECG [mV]')

plt.xlabel('Datetime')
plt.show()

In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

new_CATS_sampling_rate = 10
new_ecg_sampling_rate = 50
ecg_conversion = int(ecg_fs / new_ecg_sampling_rate)
CATS_conversion = int(CO_fs / new_CATS_sampling_rate)

ecg_df50 = ecg_df.iloc[::ecg_conversion, :]  # To subsample from 400Hz to 10Hz (1 out of every 40 samples)
CO_df10 = CO_df.iloc[::CATS_conversion, :]  # To subsample from 400Hz to 10Hz (1 out of every 40 samples)

# Create subplots
fig = make_subplots(rows=6, cols=1, shared_xaxes=True, vertical_spacing=0.01)

# Add ECG plot
fig.add_trace(go.Scatter(x=ecg_df10['datetime'], y=ecg_df10['ecg'], mode='lines', name='ECG [mV]', line=dict(color='orange')), row=1, col=1)

# Add Depth plot
fig.add_trace(go.Scatter(x=CO_df10['datetime'], y=CO_df10['Depth (100bar) [m]'], mode='lines', name='Depth [m]', line=dict(color='purple')), row=2, col=1)
fig.update_yaxes(autorange="reversed", row=2, col=1)

# Add Accelerometer plots
fig.add_trace(go.Scatter(x=CO_df10['datetime'], y=CO_df10['Accelerometer X [m/s²]'], mode='lines', name='Accel X [m/s²]', line=dict(color='blue')), row=3, col=1)
fig.add_trace(go.Scatter(x=CO_df10['datetime'], y=CO_df10['Accelerometer Y [m/s²]'], mode='lines', name='Accel Y [m/s²]', line=dict(color='green')), row=4, col=1)
fig.add_trace(go.Scatter(x=CO_df10['datetime'], y=CO_df10['Accelerometer Z [m/s²]'], mode='lines', name='Accel Z [m/s²]', line=dict(color='red')), row=5, col=1)

# Add Gyroscope Y plot
fig.add_trace(go.Scatter(x=CO_df10['datetime'], y=CO_df10['Gyroscope X [mrad/s]'], mode='lines', name='Gyr X [mrad/s]', line=dict(color='pink')), row=6, col=1)

# Update layout
fig.update_layout(height=800, width=1000, title_text="Subsampled Data Plots", showlegend=False)
fig.update_xaxes(title_text="Datetime", row=6, col=1)

# Update y-axes labels
fig.update_yaxes(title_text="ECG [mV]", row=1, col=1)
fig.update_yaxes(title_text="Depth [m]", row=2, col=1)
fig.update_yaxes(title_text="Accel X [m/s²]", row=3, col=1)
fig.update_yaxes(title_text="Accel Y [m/s²]", row=4, col=1)
fig.update_yaxes(title_text="Accel Z [m/s²]", row=5, col=1)
fig.update_yaxes(title_text="Gyr X [mrad/s]", row=6, col=1)

# Show plot
fig.show()


In [None]:
# Initialize the Metadata class
metadata = Metadata()
metadata.fetch_databases()

# Get the logger database
logger_db = metadata.get_metadata("logger_DB")

# Determine unique LoggerIDs from the logger metadata dataframe
logger_ids = set(logger_db['LoggerID'])
print(f"Unique Logger IDs: {logger_ids}")

# Breakdown of loggers by type
logger_breakdown = logger_db.groupby(['Manufacturer', 'Type']).size().reset_index(name='Count')
print("Logger Breakdown by Manufacturer and Type:")
print(logger_breakdown)

