In [None]:
import sys, os
import pandas as pd
import netCDF4 as nc
from datetime import datetime, timezone
import numpy as np
from sklearn import metrics

sys.path.append('../src/')
from Biologging_Toolkit.applications.Mixed_Layer_Depth import MixedLayerDepth
from Biologging_Toolkit.processing.Dives import Dives
from Biologging_Toolkit.utils.format_utils import get_start_time_sens
from Biologging_Toolkit.utils.inertial_utils import coa
from Biologging_Toolkit.applications.Wind import Wind
from Biologging_Toolkit.utils.plot_utils import subplots_centered
from Biologging_Toolkit.models.MLD_Model import MLDModel
from scipy.interpolate import interp1d
from scipy.signal import find_peaks, medfilt
from scipy.ndimage import median_filter
from scipy.optimize import curve_fit
from scipy.ndimage import generic_filter
from scipy import odr
import matplotlib.pyplot as plt
from matplotlib import colormaps
plt.rcParams.update({
    "text.usetex": True,                # Enable LaTeX text rendering
    "font.family": "serif",             # Use a serif font
    "font.serif": ["Computer Modern"],  # Set font to Computer Modern (LaTeX default)
})
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import seaborn as sns
def norm(x) :
    return (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x))
import umap
import hdbscan
import sklearn.cluster as cluster
import importlib

In [None]:
depids = ['ml18_296a','ml18_294b','ml19_292a','ml19_292b','ml19_293a','ml19_294a','ml20_293a','ml20_296b','ml20_313a','ml21_295a','ml21_305b','ml17_280a']
#path = '/run/media/grosmaan/LaCie/individus_brut/individus/'
path = 'D:/individus_brut/individus/'
paths = [os.path.join(path, depid) for depid in depids]

In [None]:
corrected_mld = True
depids_with_mld = []
for depid in depids :
    df = pd.read_csv(os.path.join(path, depid, f'{depid}_dive.csv'))
    try :
        if np.all(np.isnan(df.meop_mld.to_numpy())):
            continue
        depids_with_mld.append(depid)
    except AttributeError:
        continue
if corrected_mld :
    depids_with_mld = []
    for depid in depids :
        df = pd.read_csv(os.path.join(path, depid, f'{depid}_dive.csv'))
        if 'corr_mld' in list(df.columns) :
            depids_with_mld.append(depid)
print(depids_with_mld)

In [None]:
params = ['wind', 'temp', 'density', 'gradient', 'hour', 'lat', 'lon', 'depid_id']
mae, r2 = [], []
coeffs = []

In [None]:
model = MLDModel(path, depids_with_mld,
                 test_depid=depid, params=params,
                 norm = False,
                 deepening = True,
                 target='mld')

In [None]:
model.construct_2D_structure(t0 = 0, t1 = 24, size = 100, filter = 1)

In [None]:
df = pd.read_csv(paths[5]+ f'/{depids[5]}_dive.csv')
if norm:
    norm = lambda x: (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x))
else:
    norm = lambda x: x
timeframe, mld = df.begin_time.to_numpy(), df.meop_mld.to_numpy()
temp, wind, gradient, density = norm(df.temp10m.to_numpy()), norm(df['lstm'].to_numpy()), norm(df.gradient.to_numpy()), norm(df.density10m.to_numpy())
lat, lon = df.lat.to_numpy(), df.lon.to_numpy()
mld[mld > np.quantile(mld, 0.99)] = np.nan
wind_data, temp_data, gradient_data, density_data, previous_mld = [], [], [], [], []
lat_data, lon_data, time_data = [], [], []
for i in range(len(mld)):
    low_bound = (timeframe >= timeframe[i] - 15 * 3600)
    high_bound = (timeframe <= timeframe[i] - 0 * 3600)
    _time = timeframe[low_bound & high_bound]
    _mld = mld[low_bound & high_bound]
    previous_mld.append(_mld[0] if (~np.all(low_bound) and len(_mld) != 0) else np.nan)
    _wind = median_filter(wind[low_bound & high_bound], size=1, mode='nearest')
    wind_data.append(interp1d(_time, _wind)(np.linspace(_time[0], _time[-1], 40)) if len(_wind) != 0
                     else np.full(40, np.nan))
    _temp = median_filter(temp[low_bound & high_bound], size=1, mode='nearest')
    temp_data.append(interp1d(_time, _temp)(np.linspace(_time[0], _time[-1], 40)) if len(_temp) != 0
                     else np.full(40, np.nan))
    _density = median_filter(density[low_bound & high_bound], size=1, mode='nearest')
    density_data.append(interp1d(_time, _density)(np.linspace(_time[0], _time[-1], 40)) if len(_density) != 0
                        else np.full(40, np.nan))
    _gradient = median_filter(gradient[low_bound & high_bound], size=1, mode='nearest')
    gradient_data.append(interp1d(_time, _gradient)(np.linspace(_time[0], _time[-1], 40)) if len(_gradient) != 0
                         else np.full(40, np.nan))
    _lat = lat[low_bound & high_bound]
    lat_data.append(interp1d(_time, _lat)(np.linspace(_time[0], _time[-1], 40)) if len(_temp) != 0
                     else np.full(40, np.nan))
    _lon = lon[low_bound & high_bound]
    lon_data.append(interp1d(_time, _lon)(np.linspace(_time[0], _time[-1], 40)) if len(_temp) != 0
                     else np.full(40, np.nan))
    time_data.append(np.linspace(_time[0], _time[-1], 40))


In [None]:
wind = model.data['wind'][:,20]
mld = model.data['mld']
temp = model.data['temp']
density = model.data['density']
gradient = model.data['gradient']
def coa(lat, lon):
	return np.sin(lat[1])*np.sin(lat[0]) + np.cos(lat[0])*np.cos(lat[1])*(np.cos((lon[1]-lon[0])))
distance = [coa(model.data['lat'][i:i+2, 20], model.data['lon'][i:i+2,20]) for i in range(len(model.data['lat'])-1)]
for i in range(0, len(mld), 5):
    plt.scatter(np.sum(distance[i:i+20]), np.corrcoef(mld[i:i+20], wind[i:i+2:20])[0,1])

In [None]:
coa([40, 44], [3,5])

In [None]:
model.data['lat'][40:42, 20]

In [None]:
colors = ["b", "#ff7f0e", "g", "r", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "c", "k", "y", "m", "#1e90ff", "#008080"]
fig, ax = plt.subplots(1,2)
for j, depid in enumerate(depids_with_mld):
    model = MLDModel(path, [depid],
                     test_depid=depid, params=params,
                     deepening = True,
                     target='mld')
    model.construct_2D_structure(t0 = 0, t1 = 24, size = 60, filter = 1)
    mld = model.data['mld']
    for i in range(60) :
        wind = model.data['wind'][:,i]
        ax[0].scatter(i, np.corrcoef(wind[~np.isnan(wind) & ~np.isnan(mld)], mld[~np.isnan(wind) & ~np.isnan(mld)])[0,1], c = colors[j])
    ax[1].plot(np.sort(np.sort(model.data['wind'][:,0])), c = colors[j])

In [None]:
fig, ax = plt.subplots(1,2, figsize = (15,8))
k = 312
ax1 = ax[0].twinx()
ax2 = ax[1].twinx()
ax[0].plot(np.array(time_data)[k,:], np.array(wind_data)[k,:])
ax1.scatter(np.array(time_data)[k, -1], np.array(mld[k]), c = 'orange')
ax[1].plot(df.begin_time[df.begin_time<np.array(time_data)[k+1, -1]][-50:], df['lstm'][df.begin_time<np.array(time_data)[k+1, -1]][-50:])
ax2.scatter(df.begin_time[df.begin_time<np.array(time_data)[k+1, -1]].iloc[-1], df['meop_mld'][df.begin_time<np.array(time_data)[k+1, -1]].iloc[-1], c = 'red')
fig.show()

In [None]:
ground_truth, preds = [],[]
for depid in depids_with_mld :
    model.test_depid = depid
    model.neural_network(model_type = 'CNN_LSTM', input_size = 175, learning_rate = 0.0001, nepoch = 15)
    preds.extend(model.neural_network_estimation)
    ground_truth.extend(model.ground_truth)


In [None]:
df = pd.DataFrame({'preds':preds, 'gt':ground_truth})
sns.kdeplot(df, x = 'preds', y = 'gt')
#plt.scatter(preds, ground_truth)

In [None]:
import plotly.express as px
df = pd.DataFrame({"estimations":np.array(preds).flatten(),
                   "target":ground_truth,
                   'time' :np.linspace(0,1,len(ground_truth))})
df = df.melt(id_vars = 'time')
px.line(df, y = 'value', x = 'time', color = "variable")

In [None]:
import torch
fig, ax = plt.subplots()
ax.hist(model.data['mld_diff'], bins = 100)
ax1 = ax.twinx()
ax1.plot(np.linspace(-500,500,1000), np.tanh(np.linspace(-500,500,1000)/20)**2, c = 'orange')
ax.set_xlim(-1,200)

In [None]:
def hour(x) :
    result = []
    for elem in x :
        result.append(datetime.fromtimestamp(elem).hour)
    return np.array(result)
plt.scatter(hour(model.data['time']), model.data['mld_diff'],
            c = np.nanmax(model.data['wind'], axis = 1), s = 1)
df = pd.DataFrame({'time': hour(model.data['time']), 'mld_diff': model.data['mld_diff']})
sns.boxplot(x="time", y="mld_diff",
            data=df)
plt.ylim(-20,20)

In [None]:
hour(model.data['time'])

In [None]:
fig, ax = plt.subplots()
df = pd.DataFrame({'target':ground_truth, 'estimations':preds})
sns.kdeplot(df,
            x = 'target',
            y = 'estimations',
            ax = ax)
ax.plot([-200,200], [-200,200], '--', c = 'k')
ax.set_xlim([-200,200])
ax.set_ylim([-200,200])

In [None]:
def get_profiles(df, data = 'lstm', t0 = 10, t1 = 25, filter=1, norm = True) :
    if norm :
        norm = lambda x : (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x))
    else :
        norm = lambda x : x
    timeframe = df.begin_time.to_numpy()
    mld = df.meop_mld.to_numpy()
    mld[mld > np.quantile(mld, 0.99)] = np.nan
    temp = norm(df.temp10m.to_numpy())
    wind = norm(df[data].to_numpy())
    gradient = norm(df.gradient.to_numpy())
    density = norm(df.density10m.to_numpy())
    wind_data, temp_data, previous_mld, gradient_data, density_data = [], [], [], [], []
    for i in range(len(mld)) :
        low_bound = (timeframe >= timeframe[i] - t1*3600)
        high_bound = (timeframe <= timeframe[i] - t0*3600)
        _time = timeframe[low_bound & high_bound]
        _mld = mld[low_bound & high_bound]
        #previous_mld.append([_mld[int((low_bound & high_bound).sum()/2)]]*40 if len(_mld) != 0 else [np.nan]*40)
        previous_mld.append(_mld[0])
        _wind = median_filter(wind[low_bound & high_bound], size=filter, mode = 'nearest')
        wind_data.append(interp1d(_time, _wind)(np.linspace(_time[0], _time[-1], 40)) if len(_wind)!=0
                         else np.full(40,np.nan))
        _temp = median_filter(temp[low_bound & high_bound], size=filter, mode = 'nearest')
        temp_data.append(interp1d(_time, _temp)(np.linspace(_time[0], _time[-1], 40)) if len(_temp)!=0
                         else np.full(40,np.nan))
        _density = median_filter(density[low_bound & high_bound], size=filter, mode = 'nearest')
        density_data.append(interp1d(_time, _density)(np.linspace(_time[0], _time[-1], 40)) if len(_density) != 0
                            else np.full(40,np.nan))
        _gradient = median_filter(gradient[low_bound & high_bound], size=filter, mode = 'nearest')
        gradient_data.append(interp1d(_time, _gradient)(np.linspace(_time[0], _time[-1], 40)) if len(_gradient) != 0
                             else np.full(40,np.nan))
    return mld, previous_mld, wind_data, temp_data, density_data, gradient_data

In [None]:
mld, wind, temp, previous_mld, gradient, density = [],[],[],[],[],[]
depid_data = []
for i, depid in enumerate(depids_with_mld) :
    _df = pd.read_csv(os.path.join(path, depid, f'{depid}_dive.csv'))
    _mld, _previous_mld, _wind, _temp, _density, _gradient = get_profiles(_df, t0=0, t1=30, filter = 5, norm = True)
    mld.extend(_mld)
    wind.extend(_wind)
    temp.extend(_temp)
    depid_data.extend([depid]*len(_wind))
    previous_mld.extend(_previous_mld)
    gradient.extend(_gradient)
    density.extend(_density)

In [None]:
plt.plot(data[:,41])
plt.plot(model.X[:,81])
plt.plot(np.arange(len(model.X), len(model.X)+len(model.x_test),1), model.x_test[:,81])

In [None]:
plt.plot(target)
plt.plot(model.Y)
plt.plot(np.arange(len(model.X), len(model.X)+len(model.x_test),1), model.y_test)

In [None]:
mld = np.array(mld)
depid_data = np.array(depid_data)
previous_mld = np.array(previous_mld)
wind = np.array(wind)
temp = np.array(temp)
density = np.array(density)
gradient = np.array(gradient)
data = np.hstack((wind, temp, gradient))
target = mld - previous_mld
data = np.nan_to_num(data)

In [None]:
from torch import nn, utils
import torch
model_MLP = nn.Sequential(
    nn.Linear(200, 512),
    nn.LeakyReLU(),
    nn.Linear(512, 256),
    nn.LeakyReLU(),
    nn.Linear(256, 1)
)
class LoadData(utils.data.Dataset) :
    def __init__(self, X, Y):
        self.X = torch.FloatTensor(X)
        self.Y = torch.FloatTensor(Y)
    def __len__(self) :
        return len(self.X)
    def __getitem__(self, idx) :
        return self.X[idx], self.Y[idx]
estimations, labels_r = [], []
'''data = data[~np.isnan(target)]
depid_data = depid_data[~np.isnan(target)]
target = target[~np.isnan(target)]'''
losses = []
for depid in depids_with_mld :

    #X, Y = data[depid_data != depid], target[depid_data != depid]
    #xtest, ytest = data[depid_data == depid], target[depid_data == depid]
    model.test_depid = depid
    model.multilayer_perceptron(nepoch = 0)
    X = model.X
    Y = model.Y
    xtest, ytest = model.x_test, model.y_test
    trainloader = utils.data.DataLoader(LoadData(X,Y), 32, shuffle = True)
    testloader = utils.data.DataLoader(LoadData(xtest,ytest), 32, shuffle = False)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model_MLP.parameters(), lr=0.001, weight_decay = 0)
    for epoch in range(1,16) :
        for batch in trainloader:
            optimizer.zero_grad()
            input, labels = batch
            outputs = model_MLP(input)
            loss = criterion(outputs.squeeze(dim = 1), labels)
            loss.backward()
            if epoch == 1:
                losses.append(loss.item())
            optimizer.step()
        if epoch == 15 :
            model_MLP.eval()
            for batch in testloader:
                input, labels = batch
                outputs = model_MLP(input)
                estimations.extend(outputs.detach().numpy().flatten())
                labels_r.extend(labels)
            model_MLP.train()
    del optimizer, criterion
estimations = np.array(estimations)
labels = np.array(labels_r)

In [None]:
plt.plot(losses)

In [None]:
fig, ax = plt.subplots()
df = pd.DataFrame({'target':labels, 'estimations':np.array(estimations).flatten()})
sns.kdeplot(df,
            x = 'target',
            y = 'estimations',
            ax = ax)
ax.plot([-200,200], [-200,200], '--', c = 'k')
ax.set_xlim([-200,200])
ax.set_ylim([-200,200])

In [None]:
plt.scatter(ytest, estimations[-2309:], s = 1, alpha = 0.2)

In [None]:
import plotly.express as px
df = pd.DataFrame({"estimations":np.array(estimations).flatten(),
                   "target":target.flatten(),
                   'time' :np.linspace(0,1,len(target))})
df = df.melt(id_vars = 'time')
px.line(df, y = 'value', x = 'time', color = "variable")

In [None]:
print(np.corrcoef(target.flatten(),np.array(estimations).flatten()))
from sklearn.metrics import r2_score
print(r2_score(target, estimations))

In [None]:
df = pd.DataFrame({'estimations':estimations,'target':target,'depid_data':depid_data, "error":estimations-target})
sns.boxplot(x="depid_data", y="error",
            data=df)

In [None]:
for depid in depids_with_mld :
    print(depid)
    print('MAE : ', np.mean(abs(target-estimations)[depid_data==depid]))
    print('RMAE : ', np.mean(abs(target-estimations)[depid_data==depid])/np.mean(abs(target[depid_data==depid])))

In [None]:
len(data), len(estimations)

In [None]:
plt.scatter(np.nanmean(data[:, 40:80], axis = 1), estimations-target)

In [None]:
np.nanvar(data[40:80], axis = 0)

In [None]:
reducer = umap.UMAP()
embedding = reducer.fit_transform(data)

In [None]:
#clus = cluster.KMeans(n_clusters=6).fit(data)
#labels = clus.labels_
labels = hdbscan.HDBSCAN(min_samples = 100, min_cluster_size = 1000).fit_predict(data)
fig, ax = plt.subplots(figsize = (15,8))
unique_labels = np.unique(labels)
c = np.array(['darkorchid', 'indianred', 'cyan', 'orange', 'midnightblue', 'seagreen', 'salmon', 'gold'])
c =  np.append(c[:len(unique_labels)], 'grey')
colors = [c[label] for label in labels]
ax.scatter(embedding[:,0], embedding[:,1], c = colors, s = 4)
for label, color in zip(unique_labels, colors):
    ax.scatter([], [], label=f'Cluster {label}', color=c[label])
ax.legend()
'''label = [-1,0,3,5]
#df = pd.DataFrame({'data':data[np.isin(labels,label)][:, 0], 'mld':mld[np.isin(labels,label)].flatten()})
df = pd.DataFrame({'data':np.nanmax(data[:,:wind.shape[1]], axis = 1)[np.isin(labels,label)], 'mld':mld[np.isin(labels,label)].flatten()})
sns.kdeplot(df, x = 'data', y = 'mld', ax = ax[1])
ax[0].legend()'''

In [None]:
fig, ax = plt.subplots(1,2,figsize = (10, 6))
ax[0].scatter(embedding[:,0], embedding[:,1], c = np.nanmax(temp, axis = 1), cmap = 'viridis_r', s = 4)
ax[1].scatter(embedding[:,0], embedding[:,1], c = labels, s = 4, cmap = 'viridis')

In [None]:
df = pd.DataFrame({'mld':target, 'wind':np.nanmax(wind, axis = 1), 'temp':labels})
fig, ax = plt.subplots(2,2, sharex=True, sharey=True)
ax = ax.flatten()
for label in np.unique(labels):
    df = pd.DataFrame({'mld':target[labels == label], 'wind':np.nanmax(wind, axis = 1)[labels == label]})
    sns.kdeplot(df, x = 'wind', y = 'mld', ax = ax[label+1])

In [None]:
from pygam import LinearGAM, GAM
#data = data[np.isin(labels, [-1,0,2])]
'''target = target[np.isin(labels, [-1,1,2,3])]
data = data[np.isin(labels, [-1,1,2,3])]
depid_data = depid_data[np.isin(labels, [-1,1,2,3])]
data = data[~np.isnan(target)]
depid_data = depid_data[~np.isnan(target)]
target = target[~np.isnan(target)]
target -= np.nanmin(target)-1'''
default = {'distribution':'poisson','link':'log'}
estimations = []
for depid in depids_with_mld :
    X, Y = data[depid_data != depid], target[depid_data != depid]
    xtest, ytest = data[depid_data == depid], target[depid_data == depid]
    gam = GAM(link=default['link'], distribution=default['distribution']).fit(X, Y)
    estimations.extend(gam.predict(xtest))

In [None]:
     np.nanmin(target)

In [None]:
target

In [None]:
plt.scatter(target, estimations, s = 4, alpha = 0.2)
plt.ylim(0,600)

In [None]:
label = [-1,0,2]
target = np.nan_to_num(target)
#data_mask = target > 0
#target = mld
temperature = np.nanmax(temp, axis = 1)
temperature[temperature < 5] = 0
temperature[temperature >= 5] = 1
data = np.hstack((data, data**2, np.ones((len(data),1))))
data = data[temperature == 1]
labels = labels[temperature == 1]
target = target[temperature == 1]
x, residual, rank, s = np.linalg.lstsq(data[np.isin(labels, label)],  target[np.isin(labels, label)])

In [None]:
fig, ax = plt.subplots(figsize = (10,10))
#ax[0].scatter(mld[labels == label], data[labels == label] @ x, alpha = 0.1)
df = pd.DataFrame({'pred': (data[np.isin(labels, label)] @ x).flatten(), 'target' : target.flatten()[np.isin(labels, label)]})
'''temperature = np.nanmax(temp, axis = 1)[np.isin(labels, label)]
temperature[temperature < 5] = 0
temperature[temperature >= 5] = 1'''
sns.kdeplot(df, x = 'target', y = 'pred', ax = ax)
ax.plot([0, 200], [0, 200], '--', c = 'k')
ax.set_xlim([-30, 300])
ax.set_ylim([-30, 300])

In [None]:
plt.scatter(target.flatten()[np.isin(labels, label)], (data[np.isin(labels, label)] @ x).flatten() - target.flatten()[np.isin(labels, label)])

In [None]:
mld.shape, wind.shape

In [None]:
fig, ax = plt.subplots(1, 2, sharex = True, sharey = True, figsize = (15, 8))
#ax[0].scatter(mld[labels == label], data[labels == label] @ x, alpha = 0.1)
df = pd.DataFrame({'pred': (data[np.isin(labels, label)] @ x).flatten(), 'mld' : mld.flatten()[np.isin(labels, label)]})
sns.kdeplot(df, x = 'mld', y = 'pred', ax = ax[0])
print(np.nanmean(abs(df.pred.to_numpy() - df.mld.to_numpy())))
df = pd.DataFrame({'pred':g(data[np.isin(labels, label)][:, 0], *popt), 'mld':mld.flatten()[np.isin(labels, label)]})
sns.kdeplot(df, x = 'mld', y = 'pred', ax = ax[1])
print(np.nanmean(abs(df.pred.to_numpy() - df.mld.to_numpy())))
#ax[1].scatter(mld[labels == label], g(data[labels == label][:, 0], *popt), alpha = 0.1)
ax[0].plot([0, 400], [0, 400], '--', c = 'k')
ax[1].plot([0, 400], [0, 400], '--', c = 'k')
ax[0].set_ylim(0, 300)
ax[0].set_xlim(0, 300)
ax[0].grid()
ax[1].grid()

In [None]:
def g(x, a, b, c) :
    return a*x**2 + b*x + c
bounds = [[0,-np.inf, 0],[np.inf, np.inf, 50]]
nan_mask = (~np.isnan(data[np.isin(labels,label)][:, 0]) & ~np.isnan(mld[np.isin(labels,label)].flatten()))
temp_mask = temp[np.isin(labels,label)][:,0] < 8
popt, _ = curve_fit(g, data[np.isin(labels,label)][:, 0][nan_mask & temp_mask], mld[np.isin(labels,label)].flatten()[nan_mask & temp_mask], bounds = bounds)
plt.scatter(data[np.isin(labels,label)][:, 0][temp_mask], mld[np.isin(labels,label)][temp_mask].flatten(), s=5, alpha = 0.4, c = temp[np.isin(labels,label)][:, 0][temp_mask])
plt.plot(list(range(0,20)), g(np.array(list(range(0,20))), *popt))

In [None]:
def get_wind_gust(df, data = 'lstm', t0 = 15, t1 = 25, sort = False) :
    timeframe = df.begin_time.to_numpy()
    mld = df.meop_mld.to_numpy()
    temp = df.temp_10m.to_numpy()
    wind = df[data].to_numpy()
    wind_data = []
    temp_data = []
    previous_mld = []
    for i in range(len(mld)) :
        low_bound = (timeframe >= timeframe[i] - t1*3600)
        high_bound = (timeframe <= timeframe[i] - t0*3600)
        previous_mld.append(mld[low_bound][0])
        _wind = wind[low_bound & high_bound]
        _wind = _wind[~np.isnan(_wind)]
        _temp = temp[low_bound & high_bound]
        _temp = _temp[~np.isnan(_temp)]
        if sort :
            _wind = np.sort(_wind)[::-1]
        wind_data.append(list(_wind))
        temp_data.append(list(_temp))
    return mld, previous_mld, wind_data, temp_data

In [None]:
mld, wind, temp, previous_mld = [],[],[], []
for i, depid in enumerate(depids_with_mld) :
    _df = pd.read_csv(os.path.join(path, depid, f'{depid}_dive.csv'))
    _mld, _previous_mld, _wind, _temp, _density, _gradient = get_profiles(_df, t0=8, t1=30, sort = True)
    mld.extend(_mld)
    wind.extend(_wind)
    temp.extend(_temp)
    previous_mld.extend(_previous_mld)
max_wind = max(len(seq) for seq in wind)
wind = np.array([seq + [np.nan] * (max_wind - len(seq)) for seq in wind])
max_temp = max(len(seq) for seq in temp)
temp = np.array([seq + [np.nan] * (max_temp - len(seq)) for seq in temp])
mld = np.vstack(mld)
wind = np.vstack(wind)
temp = np.vstack(temp)
previous_mld = np.array(previous_mld)

data = np.column_stack((wind, temp))
target = mld - previous_mld[:,0]
#data = wind