In [None]:
import sklearn.metrics
from sklearn.metrics import confusion_matrix
%load_ext autoreload
%autoreload 2
import sys
import os
import numpy as np
import random
import pandas as pd
import netCDF4 as nc
import matplotlib.pyplot as plt
sys.path.append('../src/')
from Biologging_Toolkit.applications.Drift_Dives import DriftDives
from Biologging_Toolkit.plot.dives import *

In [None]:
depids = ['ml17_280a','ml18_296a','ml18_294b','ml19_292a','ml19_293a','ml19_292b','ml19_294a','ml20_293a','ml20_296b','ml20_313a','ml21_295a','ml21_305b']
depid = depids[1]
path = f'D:/individus_brut/individus/{depid}/'
acoustic_path = os.path.join(path, 'dives')
sens_path = f'D:/individus_brut/CTD/{depid}/{depid}sens5.nc'

### Get drift dives


In [None]:
inst = DriftDives(depid,
            path = path,
            sens_path = sens_path
            )

 #### Choose method to identify drift dives ('inertial', 'depth')

In [None]:
inst.acoustic_cluster(acoustic_path=os.path.join(path, 'dives'),
                      freqs = [0, 500],
                      bathy = [-30000, -300],
                      sort = True, tmin = 1, tmax = 12, timestep = 1,
                      freq_sampling = 'linear', nfeatures = 513, min_cluster_size=15, min_samples=10)

In [None]:
inst.acoustic_cluster(min_cluster_size = 40, min_samples = 30, computed = True)

### SEE CLUSTERING RESULTS

In [None]:
 plot_clusters(inst, save = True, save_path = 'C:/Users/grosmaan/Desktop/Results')

In [None]:
inst.compute_metric(cluster = [0])

In [None]:
isolate_clusters(inst, clusters = [2,3,4])

In [None]:
inst.save_cluster(cluster = [4,5], overwrite = True, **{'nfeature':513, 'type':'linear','tmin':1,'tmax':11,'timestep':1})

In [None]:
dives = inst.ds['dives'][:].data
acc_drifts = inst.ds['acc_drift'][:].data
inertial_drifts = inst.ds['inertial_drift'][:].data
cluster_drifts = inst.ds['cluster_drifts'][:].data
cluster_drifts[dives == 455]
#np.all(acc_drifts[dives == 455] == 0)

In [None]:
np.unique(cluster_drifts)

In [None]:
acc_drift = inst.ds['acc_drift'][:].data.astype(bool)
depth_drift = inst.ds['depth_drift'][:].data.astype(bool)
drift = acc_drift & depth_drift
drift = drift.astype(int)
dives = inst.ds['dives'][:].data
start = inst.start[np.isin(inst.clusterer.labels_, [0])]
stop = inst.stop[np.isin(inst.clusterer.labels_, [0])]
timestamps = inst.ds['time'][:]
drifts = np.zeros((len(timestamps)))
for _start, _stop in zip(start, stop):
    drifts[(timestamps >= _start) & (timestamps <= _stop)] = 1
bank_angle = inst.ds['bank_angle'][:].data
depths = inst.ds['depth'][:].data
unique_dives = np.unique(dives)
label_drift=[]
for dive in [int(_dive[-9:-4]) for _dive in inst.cluster_fns] :
    if np.all(drift[dives == dive] == 0) == False :
        label_drift.append(1)
    else :
        label_drift.append(0)
count = 0
for j, dive in enumerate([int(_dive[-9:-4]) for _dive in inst.cluster_fns]) :
    _cluster = drifts[dives == dive][0]
    if _cluster == 1 :
        #if label_drift[j] == 1 :
        count += 1
        fig, ax = plt.subplots(1, 2)
        ax1 = ax[0].twinx()
        ax[0].plot(depths[dives == dive])
        ax1.scatter(np.arange(0, len(bank_angle[dives == dive])),
                                bank_angle[dives == dive], s = 2, c = 'orange')
        ax[1].imshow(np.load(f'D:/individus_brut/individus/{depid}/dives/acoustic_dive_{int(dive):05}.npz')['spectro'].T, origin = 'lower')
        fig.show()
print(count)

### Threshold

In [None]:
thresholds = [-15, -10, -5]
acoustic_drift = []
for thresh in thresholds :
    inst.acoustic_feature_threshold(frequency = list(range(30,45)), acoustic_path = acoustic_path, threshold = thresh)
    acoustic_drift.append(inst.acoustic_drifts)

In [None]:
acc_drift = inst.ds['acc_drift'][:].data
depth_drift = inst.ds['depth_drift'][:].data
dives = inst.ds['dives'][:].data
inertial = inst.ds['inertial_drift'][:].data
unique_dives = np.unique(dives)

In [None]:
acc_dive, depth_dive, inert_dive, thres1, thres2, thres3 = [],[],[],[],[], []
for dive in unique_dives:
    acc_dive.append(np.any(acc_drift[dives == dive] == 1))
    depth_dive.append(np.any(depth_drift[dives == dive] == 1))
    inert_dive.append(np.any(inertial[dives == dive] == 1))

In [None]:
label = np.array(acc_dive) & np.array(depth_dive)
print(confusion_matrix(label, acc_dive))
print(confusion_matrix(label, depth_dive))
print(confusion_matrix(label, inert_dive))

### Save daily drift dives

In [None]:
print(depid)
path = f'D:/individus_brut/individus/{depid}/'
ds = nc.Dataset(os.path.join(path, f'{depid}_sens.nc'))
df = pd.DataFrame({'time':ds['time'][:].data, 'lat':ds['lat'][:].data, 'lon':ds['lon'][:].data,
                   'dive':ds['dives'][:].data,
                   'inert': ds['inertial_drift'][:].data.astype(bool),
                   'cluster': ds['cluster_drifts'][:].data})
df = df.groupby('dive').agg({'time':'mean', 'cluster':'max','lat':'mean','lon':'mean'})
df['datetime'] = pd.to_datetime(df["time"], unit="s", utc=True)
df["month_day"] = df["datetime"].dt.strftime("%m-%d")
df = df.groupby("month_day").agg({'lat':'mean', 'lon':'mean','cluster':'sum'})
#df.inert[df.bathy > -1] = 0
#df['inert_drift'] = 3*df.inert
df.to_csv(f'C:/Users/grosmaan/Desktop/QGIS/{depid}_deep.csv')

In [None]:
df.cluster

In [None]:
plt.scatter(df.lon, df.lat, s=df.inert_drift/100)

In [None]:
plt.plot(ds['depth'][:].data[ds['cluster_drifts'][:].data == 0][200000:210000])

### Check results

In [None]:
for depid in depids :
    path = f'D:/individus_brut/individus/{depid}/'
    acoustic_path = os.path.join(path, 'dives')
    sens_path = f'D:/individus_brut/CTD/{depid}/{depid}sens5.nc'
    inst = DriftDives(depid,
            path = path,
            sens_path = sens_path
            )
    acc_drift = inst.ds['acc_drift'][:].data
    depth_drift = inst.ds['depth_drift'][:].data
    dives = inst.ds['dives'][:].data
    acoustic = inst.ds['cluster_drifts'][:].data
    acc_dive, depth_dive, inert_dive, clust_dive = [],[],[],[]
    for dive in unique_dives:
        acc_dive.append(np.any(acc_drift[dives == dive] == 1))
        depth_dive.append(np.any(depth_drift[dives == dive] == 1))
        clust_dive.append(np.any(acoustic[dives == dive] == 1))
    print(depid)
    label = np.array(acc_dive) & np.array(depth_dive)
    print(confusion_matrix(label, clust_dive))


 ### SHOW SPECTROGRAMS FROM ONE CLUSTER

In [None]:
cluster = 0
drift = inst.cluster_fns[labels == 1]
fig, ax = plt.subplots(3,3, figsize = (15,15))
ax = ax.flatten()
random.shuffle(drift)
for i, _drift in enumerate(drift[:9]) :
    ax[i].imshow(np.load(_drift)['spectro'].T, origin = 'lower')

In [None]:
cluster = 0
inst.save_cluster(cluster = cluster)

In [None]:
inst.ds.close()

### RUN ALL SES

In [None]:
ground_truth = []
preds = []
for depid in depids[:-1] :
    path = f'D:/individus_brut/individus/{depid}/'
    sens_path = f'D:/individus_brut/CTD/{depid}/{depid}sens5.nc'
    inst = DriftDives(depid,
                      path=path,
                      sens_path=sens_path)
    _ground_truth = inst.ds['depth_drift'][:].data.astype(bool) & inst.ds['acc_drift'][:].data.astype(bool)
    ground_truth.extend(_ground_truth.astype(int))
    preds.extend(inst.ds['cluster_drifts'][:].data if 'cluster_drifts' in inst.ds.variables.keys() else np.full(len(ground_truth), np.nan))

In [None]:
import sklearn
sklearn.metrics.confusion_matrix(ground_truth, preds)

In [None]:
import umap
import hdbscan
labels = []
X = []
for depid in depids:
    path = f'D:/individus_brut/individus/{depid}/'
    sens_path = f'D:/individus_brut/CTD/{depid}/{depid}sens5.nc'
    inst = DriftDives(depid,
                      path=path,
                      analysis_length=60,
                      sens_path=sens_path)
    inst.acoustic_cluster(acoustic_path=os.path.join(path, 'dives'))
    X.extend(inst.X)
    acc_drifts  = inst.ds['acc_drift'][:].data
    dives = inst.ds['dives'][:].data
    for fn in inst.cluster_fns :
        dive = int(fn.split('.')[0][-4:])
        if np.all(acc_drifts[dives == dive] == 0) == False :
            labels.append(1)
        else :
            labels.append(0)

In [None]:
project = umap.UMAP()
embed = project.fit_transform(X)
clusterer = hdbscan.HDBSCAN(min_cluster_size=50, min_samples=10).fit(embed)
fig, ax = plt.subplots(1,2, figsize = (15,8))
scatter = ax[0].scatter(embed[:,0], embed[:,1], c=labels, s = 1)
for label in np.unique(labels):
    ax[0].scatter([], [], c=scatter.cmap(scatter.norm(label)), label=f'Cluster {label}')
ax[0].legend(title="Clusters")
scatter = ax[1].scatter(embed[:,0], embed[:,1], c=clusterer.labels_, s = 1)
for label in np.unique(clusterer.labels_):
    ax[1].scatter([], [], c=scatter.cmap(scatter.norm(label)), label=f'Cluster {label}')
ax[1].legend(title="Clusters")
fig.show()

In [None]:
import netCDF4 as nc
import pandas as pd
from scipy.interpolate import interp1d
for depid in depids :
    path = f'D:/individus_brut/individus/{depid}/'
    ds = nc.Dataset(os.path.join(path, f'{depid}_sens.nc'))
    df = pd.read_csv(os.path.join(path, f'{depid}_dive.csv'))
    timestamps = ds['time'][:].data
    lat_interp = interp1d(timestamps, ds['lat'][:].data)
    lon_interp = interp1d(timestamps, ds['lon'][:].data)
    acc = ds['acc_drift'][:].data
    dives = ds['dives'][:].data
    cluster_drifts = ds['cluster_drifts'][:].data
    df['end_lat'] = lat_interp(df.end_time)
    df['end_lon'] = lon_interp(df.end_time)
    _acc = []
    _clust = []
    for dive in np.unique(dives) :
        if np.all(acc[dives == dive] == 0) == False :
            _acc.append(1)
        else :
            _acc.append(0)
        if np.all(cluster_drifts[dives == dive] == 0) == False :
            _clust.append(0)
        else :
            _clust.append(1)
    df['acc_drifts'] = _acc
    df['cluster_drifts'] = _clust
    df.to_csv(os.path.join(path, f'{depid}_dive.csv'), index = None)

In [None]:
import netCDF4 as nc
import pandas as pd
from scipy.interpolate import interp1d
for depid in depids :
    path = f'D:/individus_brut/individus/{depid}/'
    df = pd.read_csv(os.path.join(path, f'{depid}_dive.csv'))
    print(np.isin(np.where(df.cluster_drifts == 1), np.where(df.acc_drifts == 1)).sum()/len(df[df.cluster_drifts == 1]))