In [None]:
import sys
import os
import numpy as np
import random
import matplotlib.pyplot as plt
sys.path.append('../src/')
from Biologging_Toolkit.applications.Drift_Dives import DriftDives

In [None]:
depids = ['ml17_280a','ml18_296a','ml18_294b','ml19_292a','ml19_292b','ml19_293a','ml19_294a','ml20_293a','ml20_296b','ml20_313a','ml21_295a','ml21_305b']
depid = depids[1]
path = f'D:/individus_brut/individus/{depid}/'
sens_path = f'D:/individus_brut/CTD/{depid}/{depid}sens5.nc'

### Get drift dives


In [None]:
inst = DriftDives(depid,
            path = path,
            sens_path = sens_path
            )

#### Choose method to identify drift dives ('inertial', 'depth')

In [None]:
inst(mode = 'depth', overwrite = True)

In [None]:
inst.acoustic_cluster(acoustic_path=os.path.join(path, 'dives'))

### SEE CLUSTERING RESULTS

In [None]:
fig, ax = plt.subplots(1,2, figsize = (15,8))
labels = inst.clusterer.labels_
scatter = plt.scatter(inst.embed[:, 0], inst.embed[:, 1], c=labels)
dives = inst.ds['dives'][:]
acc_drifts = inst.ds['acc_drift'][:]
labels = []
for fn in inst.cluster_fns :
    dive = int(fn.split('.')[0][-4:])
    if np.all(acc_drifts[dives == dive] == 0) == False :
        labels.append(1)
    else :
        labels.append(0)
scatter = ax[0].scatter(inst.embed[:, 0], inst.embed[:, 1], c=labels, s = 1)
for label in np.unique(labels):
    ax[0].scatter([], [], c=scatter.cmap(scatter.norm(label)), label=f'Cluster {label}')
ax[0].legend(title="Clusters")
labels = inst.clusterer.labels_
scatter = ax[1].scatter(inst.embed[:, 0], inst.embed[:, 1], c=labels, s = 1)
for label in np.unique(labels):
    ax[1].scatter([], [], c=scatter.cmap(scatter.norm(label)), label=f'Cluster {label}')
ax[1].legend(title="Clusters")
fig.show()

### SHOW SPECTROGRAMS FROM ONE CLUSTER

In [None]:
cluster = 0
drift = inst.cluster_fns[labels == 1]
fig, ax = plt.subplots(3,3, figsize = (15,15))
ax = ax.flatten()
random.shuffle(drift)
for i, _drift in enumerate(drift[:9]) :
    ax[i].imshow(np.load(_drift)['spectro'].T, origin = 'lower')

In [None]:
cluster = 0
inst.save_cluster(cluster = cluster)

In [None]:
inst.ds.close()

### RUN ALL SES

In [None]:
import umap
import hdbscan
labels = []
X = []
for depid in depids:
    path = f'D:/individus_brut/individus/{depid}/'
    sens_path = f'D:/individus_brut/CTD/{depid}/{depid}sens5.nc'
    inst = DriftDives(depid,
                      path=path,
                      analysis_length=60,
                      sens_path=sens_path)
    inst.acoustic_cluster(acoustic_path=os.path.join(path, 'dives'))
    X.extend(inst.X)
    acc_drifts  = inst.ds['acc_drift'][:].data
    dives = inst.ds['dives'][:].data
    for fn in inst.cluster_fns :
        dive = int(fn.split('.')[0][-4:])
        if np.all(acc_drifts[dives == dive] == 0) == False :
            labels.append(1)
        else :
            labels.append(0)

In [None]:
project = umap.UMAP()
embed = project.fit_transform(X)
clusterer = hdbscan.HDBSCAN(min_cluster_size=50, min_samples=10).fit(embed)
fig, ax = plt.subplots(1,2, figsize = (15,8))
scatter = ax[0].scatter(embed[:,0], embed[:,1], c=labels, s = 1)
for label in np.unique(labels):
    ax[0].scatter([], [], c=scatter.cmap(scatter.norm(label)), label=f'Cluster {label}')
ax[0].legend(title="Clusters")
scatter = ax[1].scatter(embed[:,0], embed[:,1], c=clusterer.labels_, s = 1)
for label in np.unique(clusterer.labels_):
    ax[1].scatter([], [], c=scatter.cmap(scatter.norm(label)), label=f'Cluster {label}')
ax[1].legend(title="Clusters")
fig.show()

In [None]:
import netCDF4 as nc
import pandas as pd
from scipy.interpolate import interp1d
for depid in depids :
    path = f'D:/individus_brut/individus/{depid}/'
    ds = nc.Dataset(os.path.join(path, f'{depid}_sens.nc'))
    df = pd.read_csv(os.path.join(path, f'{depid}_dive.csv'))
    timestamps = ds['time'][:].data
    lat_interp = interp1d(timestamps, ds['lat'][:].data)
    lon_interp = interp1d(timestamps, ds['lon'][:].data)
    acc = ds['acc_drift'][:].data
    dives = ds['dives'][:].data
    cluster_drifts = ds['cluster_drifts'][:].data
    df['end_lat'] = lat_interp(df.end_time)
    df['end_lon'] = lon_interp(df.end_time)
    _acc = []
    _clust = []
    for dive in np.unique(dives) :
        if np.all(acc[dives == dive] == 0) == False :
            _acc.append(1)
        else :
            _acc.append(0)
        if np.all(cluster_drifts[dives == dive] == 0) == False :
            _clust.append(0)
        else :
            _clust.append(1)
    df['acc_drifts'] = _acc
    df['cluster_drifts'] = _clust
    df.to_csv(os.path.join(path, f'{depid}_dive.csv'), index = None)

In [None]:
import netCDF4 as nc
import pandas as pd
from scipy.interpolate import interp1d
for depid in depids :
    path = f'D:/individus_brut/individus/{depid}/'
    df = pd.read_csv(os.path.join(path, f'{depid}_dive.csv'))
    print(np.isin(np.where(df.cluster_drifts == 1), np.where(df.acc_drifts == 1)).sum()/len(df[df.cluster_drifts == 1]))