In [None]:
import pandas as pd
import netCDF4 as nc4
from glob import glob
import os
import numpy as np
import matplotlib.pyplot as plt

List the deployments that have been manually labeled

In [None]:
box_dir = "/Users/frank/Box/Research/1 In progress/Gas exchange/Breathing ethogram/"
available_data = pd.read_excel(box_dir + "BreathingVideo.xlsx", 
                               engine="openpyxl",
                               sheet_name="Overview",
                               index_col=0)
available_data = available_data[available_data.index.str.contains('[A-z]{2}[0-9]{6}') == True]
available_data[["aligned", "prh_nc"]] = available_data[["aligned", "prh_nc"]].astype(bool)
available_data = available_data.query("aligned & prh_nc")
available_data

Pick a deployment and read the PRH

In [None]:
export_id = "be180423-42"
nc_path = glob("/Volumes/COPYCATSdat/CATS/tag_data/prh/{}*.nc".format(export_id))
print(nc_path)
prh_nc = nc4.Dataset(nc_path[0])

In [None]:
whale_tz = "Etc/GMT%+i" % -prh_nc.dephist_device_tzone
dt = pd.to_datetime(np.array(prh_nc["DN"]) - 719529, unit="D", utc=True).tz_convert(whale_tz)
depth, pitch, roll = [np.array(prh_nc[var]) for var in ["P", "pitch", "roll"]]
jerk = np.sum(np.diff(np.array(prh_nc["Aw"]), append=np.nan) ** 2, axis=0)
prh_data = pd.DataFrame(index=dt, data={"depth": depth, "pitch": pitch, "roll": roll, "jerk": jerk})
prh_data.head()

In [None]:
plt.figure(figsize=(10, 4))
plt.plot(prh_data.index, -prh_data["depth"])
plt.show()

Match video coverage to PRH data

In [None]:
video_coverage = pd.read_excel(box_dir + "BreathingVideo.xlsx", 
                               engine="openpyxl",
                               sheet_name="Time alignment",
                               index_col=0,
                               converters = {"boristime1": str, "boristime2": str}).loc[export_id]
for col in ["camtime1", "camtime2"]:
    video_coverage[col] = video_coverage[col].dt.tz_localize(whale_tz)
for col in ["boristime1", "boristime2"]:
    video_coverage[col] = pd.to_timedelta(video_coverage[col]).dt.total_seconds()

video_coverage

In [None]:
plt.figure(figsize=(10, 4))
plt.plot(prh_data.index, -prh_data["depth"])
for row in range(len(video_coverage)):
    plt.axvspan(video_coverage["camtime1"].iloc[row], 
                video_coverage["camtime2"].iloc[row],
                color="y", alpha = 0.5, lw=0)
plt.show()

Pick out a sample and highlight labeled breaths

In [None]:
sample_start, sample_end = "2019-02-28 09:17:56", "2019-02-28 12:49:06"
sample_data = prh_data.loc[sample_start:sample_end]
plt.figure(figsize=(10, 4))
plt.plot(sample_data.index, -sample_data["depth"])
is_overlap = np.logical_and(video_coverage["camtime1"] < sample_end, 
                            video_coverage["camtime2"] > sample_start)
overlap_vids = video_coverage[is_overlap]
for row in range(len(overlap_vids)):
    plt.axvspan(overlap_vids["camtime1"].iloc[row], 
                overlap_vids["camtime2"].iloc[row],
                facecolor="y", alpha=0.5, lw=0)
plt.show()

In [None]:
def boristime_to_camtime(boristime):
        def interleave(l1, l2):
            result = l1 + l2
            result[::2] = l1
            result[1::2] = l2
            return result

        xs = interleave(list(video_coverage["boristime1"]), list(video_coverage["boristime2"]))
        ys = interleave(list(video_coverage["camtime1"]), list(video_coverage["camtime2"]))
        min_y = np.min(ys)
        ys_float = pd.Series(np.subtract(ys, min_y)).dt.total_seconds()
        camtime_float = np.interp(boristime, xs, ys_float)
        return min_y + pd.to_timedelta(camtime_float, unit="s")
    
breath_data = pd.read_csv(box_dir + "breaths.csv", 
                          index_col=0, 
                          usecols=[0, 3, 11, 12]).loc[export_id]
breath_data["breath_start"] = boristime_to_camtime(breath_data["Start (s)"])
breath_data["breath_end"] = boristime_to_camtime(breath_data["Stop (s)"])

breaths = breath_data["breath_start"] + (breath_data["breath_start"] - breath_data["breath_end"]) / 2
breaths

In [None]:
overlap_breaths = breaths[np.logical_and(breaths > sample_start, breaths < sample_end)]
plt.figure(figsize=(10, 4))
plt.plot(sample_data.index, -sample_data["depth"])
is_overlap = np.logical_and(video_coverage["camtime1"] < sample_end, 
                            video_coverage["camtime2"] > sample_start)
overlap_vids = video_coverage[is_overlap]
for row in range(len(overlap_vids)):
    plt.axvspan(overlap_vids["camtime1"].iloc[row], 
                overlap_vids["camtime2"].iloc[row],
                facecolor="y", alpha=0.5, lw=0)
for i in range(len(overlap_breaths)):
    plt.axvline(overlap_breaths[i], color="r")
plt.show()

print("\"{}\": (\"{}\", \"{}\")".format(export_id, sample_start, sample_end))

In [None]:
export = {
    "bb180125-30": ("2018-01-25 16:00:18", "2018-01-25 17:00:16"),
    "bb190309-52": ("2019-03-09 12:10:00", "2019-03-09 12:40:00"),
    "be180423-42": ("2018-04-23 10:53:00", "2018-04-23 15:37:45"),
    "bp180526-42": ("2018-05-26 13:00:07", "2018-05-26 16:39:38"),
    "bp180526-44": ("2018-05-26 11:57:57", "2018-05-26 17:01:32"),
    "bs190322-47": ("2019-03-22 15:18:30", "2019-03-22 17:01:00"),
    "bs190322-49": ("2019-03-22 18:15:36", "2019-03-22 18:45:23"),
    "bw180828-49": ("2018-08-28 11:52:50", "2018-08-28 19:27:28"),
    "bw180904-48": ("2018-09-04 11:17:33", "2018-09-04 14:57:00"),
    "bw180905-53": ("2018-09-05 11:55:07", "2018-09-05 13:20:01"),
    "er160505-25": ("2016-05-05 13:54:58", "2016-05-05 18:04:23"),
    "mn170810-42": ("2017-08-10 11:17:51", "2017-08-10 16:30:52"),
    "mn170815-20": ("2017-08-15 12:31:52", "2017-08-15 13:42:36"),
    "mn190228-42": ("2019-02-28 09:17:54", "2019-02-28 12:49:03")
}

In [None]:
prh_all, breaths_all = pd.DataFrame(), pd.MultiIndex.from_arrays([[], []])
for deployid, (starttime, stoptime) in export.items():
    print("{}: {} - {}".format(deployid, starttime, stoptime))
    # load prh
    nc_path = glob("/Volumes/COPYCATSdat/CATS/tag_data/prh/{}*.nc".format(deployid))
    prh_nc = nc4.Dataset(nc_path[0])
    whale_tz = "Etc/GMT%+i" % -prh_nc.dephist_device_tzone
    dt = pd.to_datetime(np.array(prh_nc["DN"]) - 719529, unit="D", utc=True).tz_convert(whale_tz)
    depth, pitch, roll = [np.array(prh_nc[var]) for var in ["P", "pitch", "roll"]]
    jerk = np.sum(np.diff(np.array(prh_nc["Aw"]), append=np.nan) ** 2, axis=0)
    prh_data = pd.DataFrame(index=dt, data={"depth": depth, "pitch": pitch, "roll": roll, "jerk": jerk})
    
    # load breaths
    video_coverage = pd.read_excel(box_dir + "BreathingVideo.xlsx", 
                                   engine="openpyxl",
                                   sheet_name="Time alignment",
                                   index_col=0,
                                   converters = {"boristime1": str, "boristime2": str}).loc[deployid]
    for col in ["camtime1", "camtime2"]:
        video_coverage[col] = video_coverage[col].dt.tz_localize(whale_tz)
    for col in ["boristime1", "boristime2"]:
        video_coverage[col] = pd.to_timedelta(video_coverage[col]).dt.total_seconds()
    def boristime_to_camtime(boristime):
        def interleave(l1, l2):
            result = l1 + l2
            result[::2] = l1
            result[1::2] = l2
            return result

        xs = interleave(list(video_coverage["boristime1"]), list(video_coverage["boristime2"]))
        ys = interleave(list(video_coverage["camtime1"]), list(video_coverage["camtime2"]))
        min_y = np.min(ys)
        ys_float = pd.Series(np.subtract(ys, min_y)).dt.total_seconds()
        camtime_float = np.interp(boristime, xs, ys_float)
        return min_y + pd.to_timedelta(camtime_float, unit="s")
    breath_data = pd.read_csv(box_dir + "breaths.csv", 
                              index_col=0, 
                              usecols=[0, 3, 11, 12]).loc[deployid]
    breath_data["breath_start"] = boristime_to_camtime(breath_data["Start (s)"])
    breath_data["breath_end"] = boristime_to_camtime(breath_data["Stop (s)"])

    breaths = breath_data["breath_start"] + (breath_data["breath_start"] - breath_data["breath_end"]) / 2
    
    # subset and append
    sample_data = prh_data.loc[starttime:stoptime]
    sample_data.index = pd.MultiIndex.from_product([[deployid], sample_data.index])
    sample_breaths = breaths[np.logical_and(breaths > starttime, breaths < stoptime)]
    sample_breaths = pd.MultiIndex.from_product([[deployid], sample_breaths])
    prh_all = prh_all.append(sample_data)
    breaths_all = breaths_all.union(sample_breaths)
prh_all.index.names = ["deployid", "time"]
breaths_all.names = ["deployid", "time"]

In [None]:
# display(prh_all.reset_index())
def duration_hrs(t: pd.Series):
    return (t.iloc[-1] - t.iloc[0]).total_seconds() / 3600
prh_summ = prh_all.reset_index().groupby("deployid").agg({"time": [duration_hrs]})
breath_summ = breaths_all.to_series().groupby("deployid").agg("count")
summ = pd.concat([prh_summ, breath_summ], axis=1)
summ.columns = ["Duration (hrs)", "Breaths (count)"]
summ.loc["Total"] = summ.sum()
display(summ)

In [None]:
# should be none
display(prh_all[prh_all.isnull().any(axis=1)])

In [None]:
prh_all.to_pickle("../data/multi_prh.pkl")
pd.Series(True, index=breaths_all, name="event").to_pickle("../data/multi_breaths.pkl")