In [None]:
import os
import pandas as pd
import netCDF4 as nc
from datetime import datetime, date, timezone, timedelta, tzinfo
import numpy as np
import scipy
from sklearn.metrics import r2_score

from Biologging_Toolkit.plot.mixed_layer_depth import plot_wind_average_correlation, plot_wind_correlation
from Biologging_Toolkit.applications.Mixed_Layer_Depth import MixedLayerDepth
from Biologging_Toolkit.models.MLD_Model import MLDModel
from Biologging_Toolkit.processing.Dives import Dives
from Biologging_Toolkit.utils.format_utils import get_start_time_sens
from Biologging_Toolkit.plot.mixed_layer_depth import *
from Biologging_Toolkit.plot.mixed_layer_depth import plot_wind_gust_detector
from Biologging_Toolkit.plot.mixed_layer_depth import plot_regression_results

import matplotlib.pyplot as plt
from matplotlib import colormaps
plt.rcParams.update({
    "text.usetex": True,                # Enable LaTeX text rendering
    "font.family": "serif",             # Use a serif font
    "font.serif": ["Computer Modern"],  # Set font to Computer Modern (LaTeX default)
})

### Make sure csv structure for dive data exists

In [None]:
depid = 'ml18_294b'
path = os.path.join('D:/individus_brut/individus/', depid)
ref_path = os.path.join(path, 'data', 'auxiliary', 'instrument')
sens_path = os.path.join(ref_path, depid+'sens5.nc')

In [None]:
dive = Dives(depid, path = ref_path, sens_path = sens_path)

In [None]:
dive()

### Add temperature data to reference structure

In [None]:
ds = nc.Dataset(sens_path)
temperature = ds['T'][:].data
temp_time = get_start_time_sens(ds.dephist_device_datetime_start) + np.arange(0, len(temperature))/5

In [None]:
dive.create_variable('temperature',
                     var_data =  temperature,
                     var_time = temp_time)

In [None]:
dive.ds

In [None]:
dive.ds.close()

### Compute mixed layer depth

In [None]:
depids = ['ml19_292b','ml19_293a','ml19_294a','ml20_293a','ml20_296b','ml20_313a','ml21_295a','ml21_305b','ml17_280a']
for depid in depids :
    path = os.path.join('D:/individus_brut/individus/', depid)
    inst = MixedLayerDepth(depid,
                path = path,
                meop_path = f'D:/individus_brut/CTD1/{depid}_fr1_prof.nc'
               )
    inst()

In [None]:
inst()

### Wind correlation with MLD

In [None]:
depids = ['ml18_296a','ml18_294b','ml19_292a','ml19_292b','ml19_293a','ml19_294a','ml20_293a','ml20_296b','ml20_313a','ml21_295a','ml21_305b','ml17_280a']
path = 'D:/individus_brut/individus/'
paths = [os.path.join(path, depid) for depid in depids]

In [None]:
corrected_mld = False
depids_with_mld = []
for depid in depids :
    df = pd.read_csv(os.path.join(path, depid, f'{depid}_dive.csv'))
    try :
        if np.all(np.isnan(df.meop_mld.to_numpy())):
            continue
        depids_with_mld.append(depid)
    except AttributeError:
        continue
if corrected_mld :
    depids_with_mld = []
    for depid in depids :
        df = pd.read_csv(os.path.join(path, depid, f'{depid}_dive.csv'))
        if 'corr_mld' in list(df.columns) :
            depids_with_mld.append(depid)
print(depids_with_mld)

In [None]:
plot_wind_average_correlation(depids_with_mld,
                  path = 'D:/individus_brut/individus',
                  data = 'lstm',
                  group = 'gradient',
                  save = False, save_path = 'C:/Users/grosm/Desktop/th√®se/Figures/')

In [None]:
data = ['wind_speed', 'hildebrand', 'pensieri', 'lstm']
labels = ['ERA5', 'Hildebrand', 'Pensieri', 'LSTM']
plot_wind_correlation(depids_with_mld,
                      path = 'D:/individus_brut/individus',
                      data = data,
                      labels = labels)

### MLD | Wind relationship model

In [None]:
params = ['peaks', 'gradient', 'temp10', 'previous_mld', 'temp_diff', 'density']
mae, r2 = [], []
coeffs = []
model = MLDModel(path, depids_with_mld,
                 test_depid = depid, params = params,
                 target = 'mld_diff',
                 deepening = False,
                 smoothing = True,
                 structure = 'complete')
model.create_gust_dataframe()
model.df.mld_diff += 486
model.df = model.df[model.df.temp_diff <  5]

In [None]:
for depid in depids_with_mld :
    model.test_depid = [depid]
    #model.generalized_additive_model(link = 'inverse', distribution = 'gamma')
    model.random_forest(plot = True)

In [None]:
fig, ax = plt.subplots()
sns.kdeplot(model.df, x = 'mld_diff', y = 'RF_pred')
ax.scatter(model.df.mld_diff, model.df.RF_pred, c = 'orange', s = 5, alpha = 0.1)
ax.set_ylim(200,800)
ax.plot([300,700], [300,700], '--', c = 'k')

In [None]:
from sklearn.metrics import r2_score
np.corrcoef(model.df.RF_pred, model.df.mld_diff)

In [None]:
fig, ax = plt.subplots()
sns.kdeplot(model.df, x = 'mld_diff', y = "GAM_pred", ax = ax)
ax.plot([-120,120], [-120, 120], '--', c = 'k')
ax.scatter(model.df.mld_diff, model.df.GAM_pred, c = 'orange', s = 5, alpha = 0.3)
ax.set_ylim(-120, 120)
ax.set_xlim(-120, 120)

In [None]:
import umap
import hdbscan
import matplotlib.colors as mcolors

params = ['peaks', 'gradient', 'previous_mld', 'temp10', 'temp_diff']
reducer = umap.UMAP()
data = np.nan_to_num(model.df[params])
embedding = reducer.fit_transform(data)
labels = hdbscan.HDBSCAN(min_samples = 50, min_cluster_size = 200).fit_predict(data)
fig, ax = plt.subplots(1, 2, figsize = (15,8))
unique_labels = np.unique(labels)
c = np.array(['darkorchid', 'indianred', 'cyan', 'midnightblue', 'seagreen'])
c =  np.append(c[:len(unique_labels)], 'grey')
colors = [c[label] for label in labels]
'''ax[0].scatter(embedding[:,0], embedding[:,1], c = colors, s = 4)
for label, color in zip(unique_labels, colors):
    ax[0].scatter([], [], label=f'Cluster {label}', color=c[label])
ax[0].legend()
ground_truth = model.df.mld_diff'''
ax[0].scatter(embedding[:,0], embedding[:,1], c = model.df.mld, s = 4, cmap = 'viridis')
ground_truth[abs(ground_truth) > 50] = np.nan
ax[1].scatter(embedding[:,0], embedding[:,1], c = model.df.mld_diff, s = 4, cmap = 'viridis')

In [None]:
fig, ax = plt.subplots()
sns.kdeplot(model.df, x = model.target, y = 'GLM_pred')
ax.set_ylim(-20, 200)
ax.plot([0,200], [0,200], '--', c = 'k')

In [None]:
plot_wind_gust_detector(path, depids_with_mld[-1], prominence = 0, distance = 1, height = 6)

In [None]:
### PCA FOR INDEPENDANT VARIABLES
from statsmodels.multivariate.pca import PCA

pca_model = PCA(model.df[params], ncomp=6, standardize=True, method='eig')  # Standardized PCA
factors = pca_model.factors.to_numpy()  # Principal components (scores)
loadings = pca_model.loadings.to_numpy()  # Loadings (eigenvectors)
explained_variance = pca_model.eigenvals / np.sum(pca_model.eigenvals)  # Proportion of variance explained

# 2.1 Scree Plot
plt.figure(figsize=(8, 5))
plt.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='--', color='b')
plt.title('Scree Plot')
plt.xlabel('Principal Component')
plt.ylabel('Proportion of Variance Explained')
plt.show()

# 2.2 Score Plot (PC1 vs. PC2)
plt.figure(figsize=(8, 5))
plt.scatter(factors[:, 0], factors[:, 1], alpha=0.7, edgecolors='k', label='Scores')
plt.axhline(0, color='black', linestyle='--', linewidth=0.8)
plt.axvline(0, color='black', linestyle='--', linewidth=0.8)
plt.title('Score Plot (PC1 vs. PC2)')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend()
plt.show()

# 2.3 Loading Plot (PC1 vs. PC2)
plt.figure(figsize=(8, 5))
for i, var in enumerate(model.df[params].columns):
    plt.arrow(0, 0, loadings[i, 0], loadings[i, 1], color='r', alpha=0.8, head_width=0.05)
    plt.text(loadings[i, 0] * 1.2, loadings[i, 1] * 1.2, var, color='g', ha='center', va='center')
plt.axhline(0, color='black', linestyle='--', linewidth=0.8)
plt.axvline(0, color='black', linestyle='--', linewidth=0.8)
plt.title('Loading Plot (PC1 vs. PC2)')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.grid()
plt.show()

# 2.4 Biplot
plt.figure(figsize=(8, 5))
plt.scatter(factors[:, 0], factors[:, 1], alpha=0.7, edgecolors='k', label='Scores')
for i, var in enumerate(model.df[params].columns):
    plt.arrow(0, 0, loadings[i, 0], loadings[i, 1], color='r', alpha=0.8, head_width=0.05)
    plt.text(loadings[i, 0] * 1.2, loadings[i, 1] * 1.2, var, color='g', ha='center', va='center')
plt.axhline(0, color='black', linestyle='--', linewidth=0.8)
plt.axvline(0, color='black', linestyle='--', linewidth=0.8)
plt.title('Biplot (Scores and Loadings)')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend()
plt.show()

# 2.5 Cumulative Variance Explained Plot
cumulative_variance = np.cumsum(explained_variance)
plt.figure(figsize=(8, 5))
plt.plot(range(1, len(cumulative_variance) + 1), cumulative_variance, marker='o', linestyle='--', color='g')
plt.title('Cumulative Variance Explained')
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Proportion of Variance Explained')
plt.axhline(y=0.9, color='r', linestyle='--', label='90% Variance Explained')
plt.legend()
plt.show()

In [None]:
params = {1 : ['peaks', 'gradient'],
          2 : ['peaks', 'previous_mld']}
fig, ax = plt.subplots()
for j in range(1,3):
        R2 = []
        model = MLDModel(path, depids_with_mld, structure = 'complete', params = params[j], deepening = False)
        for i in range(1,72,2) :
            model.create_gust_dataframe(time_diff=i)
            for depid in depids_with_mld:
                model.test_depid = [depid]
                model.gls_regression()
            R2.append(r2_score(model.df.mld, model.df.GLS_pred))
        ax.plot(list(range(1,72,2)), R2, label = '_'.join(params[j]))
ax.legend()
ax.grid()
fig.tight_layout()
#fig.savefig('C:/Users/grosm/Desktop/Models_R2_MLD_with_PMLD.pdf')

In [None]:
import gsw
depid = 'ml19_293a'
df = pd.read_csv(os.path.join(path, depid, f'{depid}_dive.csv'))
meop_path = f'D:/individus_brut/MEOP_profiles/meop_{depid}.nc'
meop = nc.Dataset(meop_path)
ctd_time = np.array([(datetime(1950,1,1,0,0,0) + timedelta(elem)).replace(tzinfo=timezone.utc) for elem in meop['JULD'][:].data])
sal_var = 'PSAL_ADJUSTED'
temp_var = 'TEMP_ADJUSTED'
temp = meop[temp_var][:].data
temp[meop[temp_var][:].mask] = np.nan
sal = meop[sal_var][:].data
sal[meop[sal_var][:].mask] = np.nan
sigma0 = gsw.density.sigma0(sal, temp)

In [None]:
test_mld = []
for elem in sigma0 :
    try:
        test_mld.append(np.min(np.where(abs(elem[11:] - elem[10]) > 0.03))+11)
    except:
        test_mld.append(np.nan)

In [None]:
plt.plot(ctd_time, test_mld)
plt.plot(df.end_time.apply(datetime.utcfromtimestamp), df.meop_mld)
plt.xlim(date(2018,11,1), date(2018,11,6))

In [None]:
print(np.where(ctd_time > datetime(2018,10,30,tzinfo=timezone.utc)))
dive_time = df.end_time.apply(lambda x : datetime.fromtimestamp(x).replace(tzinfo=timezone.utc))
print(np.where(dive_time > datetime(2018,10,30,tzinfo=timezone.utc)))

In [None]:
epoch_ctd = [_ctd_time.timestamp() for _ctd_time in ctd_time]

In [None]:
plt.hist(np.array(sigma0).flatten(), bins = 50)

In [None]:
import matplotlib.dates as mdates
fig, ax = plt.subplots(figsize=(8,5))
cmap = plt.get_cmap('viridis_r')


#indices_mld = np.searchsorted(dive_ds.begin_time, ctd_time[time_mld < self.dive_ds.end_time.iloc[-1]])
indices = np.searchsorted(epoch_ctd, df.begin_time[df.end_time <= epoch_ctd[-1]])
final_dive = np.full((len(epoch_ctd)), np.nan)
final_dive[indices - 1] = df.meop_mld[df.end_time <= epoch_ctd[-1]]
#final_mld = np.full(len(self.dive_ds), np.nan)
#final_mld[indices_mld-1] = mld[time_mld < self.dive_ds.end_time.iloc[-1]]

im = ax.imshow(sigma0.T, origin='lower', aspect='auto', cmap=cmap, norm=mcolors.LogNorm(vmin=26.5, vmax=28))
ax.plot(final_dive, label='MLD', color='red')

plt.xticks(rotation=45)
ax.set_xlim(0,200)
#ax.set_ylim(0, 400)
ax.legend()
fig.tight_layout()

In [None]:
len(df)

In [None]:
import matplotlib.colors as mcolors
#norm = mcolors.LogNorm(vmin=0, vmax=np.nanmax(temp)-np.nanmin(temp))

fig, ax = plt.subplots(figsize=(8,5))
cmap = colormaps.get_cmap('viridis')  # viridis is the default colormap for imshow
sigma0[sigma0 < 26.5] = np.nan
sigma0[sigma0 > 27.5] = np.nan
ax.imshow(sigma0.T, origin = 'lower', aspect = 'auto', cmap = cmap, vmin = 26.5, vmax = 27.5)
ax.plot()
#ax.set_xticks(np.arange(0, len(ctd_time))[::1000])
#ax.set_xticklabels(ctd_time[::1000])
#ax.plot(df.begin_time.apply(datetime.fromtimestamp)[df.meop_mld < 350], df.meop_mld[df.meop_mld < 350], label = 'MLD')
#ax.legend()
ax.set_xlim(4700, 4800)
ax.set_ylim(0, 600)
fig.tight_layout()

In [None]:
plt.hist(sigma0.flatten(), bins = 50)

In [None]:
from datetime import datetime, date
df = pd.read_csv(os.path.join(path, depid, f'{depid}_dive.csv'))
fig, ax = plt.subplots(figsize=(8, 5))
lines = []
line = ax.plot(df.begin_time.apply(datetime.fromtimestamp)[df.meop_mld < 350], df.meop_mld[df.meop_mld < 350], label = 'MLD')
ax1 = ax.twinx()
line1 = ax1.plot(df.begin_time.apply(datetime.fromtimestamp), df.lstm, c = 'orange', label = 'WIND')
ax.grid()
ax.set_xlim([date(2018, 11, 9), date(2018, 11, 15)])
lines.extend(line)
lines.extend(line1)
labels = [l.get_label() for l in lines]
ax.legend(lines, labels)
fig.tight_layout()

In [None]:
fig.savefig('C:/Users/grosm/Desktop/MLD.pdf')

In [None]:
params = {1 : ['peaks', 'duration', 'gradient', 'density', 'temp10'],
          2 : ['peaks', 'gradient', 'density', 'temp10'],
          3 : ['peaks', 'gradient', 'temp10'],
          4 : ['peaks', 'duration', 'gradient'],}
fig, ax = plt.subplots()
for j in range(1,5):
        R2 = []
        model = MLDModel(path, depids_with_mld, target = 'mld', params = params[j], deepening = False)
        for i in range(1,72,2) :
            model.create_gust_dataframe(time_diff=i)
            for depid in depids_with_mld:
                model.test_depid = [depid]
                model.ols_regression()
            R2.append(r2_score(model.df.mld, model.df.OLS_pred))
        ax.plot(list(range(1,72,2)), R2, label = '_'.join(params[j]))
ax.legend()
ax.grid()
fig.tight_layout()

In [None]:
 model.df

In [None]:
plt.plot(model.df.temp10)

In [None]:
plt.scatter(model.df.mld, model.df.RF_pred)

In [None]:
params = {1 : ['peaks', 'duration', 'gradient', 'density', 'temp10', 'previous_mld'],
          2 : ['peaks', 'duration', 'gradient', 'density', 'temp10']}
targets = {1 : 'mld', 2 : 'mld_diff'}
labels = ['MLD with previous MDL', 'MLD', 'MLD diff with previous MLD', 'MLD diff']
fig, ax = plt.subplots()
for j in range(1,3):
    for k in range(1,3):
        final = []
        model = MLDModel(path, depids_with_mld, params = params[j], deepening = True, target = targets[k])
        for i in range(1,72,2) :
            R2 = []
            model.create_gust_dataframe(time_diff=i)
            for depid in depids_with_mld:
                model.test_depid = [depid]
                model.ols_regression()
                R2.append(model.OLS_r_squared)
                #model.random_forest(plot = False)
                #RF.append(model.RF_r_squared)
                #model.linear_gam()
                #GAM.append(model.GAM_r_squared)
            final.append(np.nanmean(R2))
            #ax.scatter(i, np.nanmean(OLS), c = 'gold', label = 'OLS' if i == 1 else "")
            #ax.scatter(i, np.nanmean(RF), c = 'red', label = 'RF' if i == 1 else "")
            #ax.scatter(i, np.nanmean(GAM), c = 'blue')
        ax.plot(list(range(1,72,2)), final, label = labels[j*2+k-3])
ax.legend()
ax.grid()
fig.tight_layout()
fig.savefig('C:/Users/grosm/Desktop/Models_R2_MLD_with_PMLD.pdf')

In [None]:
### GET OLS RESULTS FOR DIFFERENT TIME DIFFS
params = ['peaks', 'duration', 'gradient', 'density', 'temp10']
mae, r2 = [], []
coeffs = []
for depid in depids_with_mld :
    model = MLDModel(path, depids_with_mld,
                     test_depid = depid, params = params,
                     target = 'mld_diff',
                     deepening = True,
                     find_peaks_params = {'prominence':0.9, 'height':6, 'distance':3})
    model.temporal_linear_regression(tmax = 48)
    mae.append(model.OLS_mae)
    r2.append(model.OLS_r_squared)
    coeffs.append(model.OLS_results)

In [None]:
plt.savefig('C:/Users/grosm/Desktop/OLS_mld_diff_deepening_profiles.pdf')

In [None]:
fig, ax = plt.subplots()
ax1 = ax.twinx()
line = ax.plot(np.nanmean(mae, axis = 0), label = 'MAE', c = 'red')
line1 = ax1.plot(np.nanmean(r2, axis = 0), label = 'R2')
ax.grid()
ax.legend()
ax.set_ylabel('MAE (m)')
ax1.set_ylabel('$R^{2}$')
lines = line+line1
ax_labels = [l.get_label() for l in lines]
ax.legend(lines, ax_labels, loc='upper left')
ax.set_xlabel('Time differential (h)')
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(2,2, figsize = (8,8))
ax = ax.flatten()
params = ['peaks', 'duration', 'gradient', 'density', 'temp10','previous_mld']
model = MLDModel(path, depids_with_mld,
                     test_depid = depid, params = params,
                     target = 'mld',
                     deepening = True,
                     find_peaks_params = {'prominence':0.9, 'height':6, 'distance':3})
for i, time_diff in enumerate([12, 24, 36, 48]):
    model.create_gust_dataframe(time_diff=time_diff)
    estimation = (0.03*time_diff - 0.02) * model.df.peaks.to_numpy() \
        + (-1.43*time_diff - 28) * model.df.gradient.to_numpy() \
        + (-.32*time_diff - 5.25) * model.df.density.to_numpy() \
        + (-0.04*time_diff - 2.58) * model.df.temp10.to_numpy() \
        + 1.04 * model.df.previous_mld.to_numpy() \
        + 9.26*time_diff + 171
    estimation2 = (-0.03*time_diff +1.41) * model.df.peaks.to_numpy() \
        + (-1.43*time_diff - 28) * model.df.gradient.to_numpy() \
        + (-.32*time_diff - 5.25) * model.df.density.to_numpy() \
        + (-0.04*time_diff - 2.58) * model.df.temp10.to_numpy() \
        + 1.04 * model.df.previous_mld.to_numpy() \
        + 9.26*time_diff + 171
    ax[i].plot([0, 350], [0, 350], '--', c = 'k')
    if time_diff > 24 :
        ax[i].scatter(model.df.mld.to_numpy(), estimation2, s = 5, c = model.df.other_peaks)
    else :
        ax[i].scatter(model.df.mld.to_numpy(), estimation, s = 5, c = model.df.other_peaks)
    print(time_diff, np.nanmean(abs(model.df.mld.to_numpy() - estimation)))
    ax[i].set_title(f'MLD estimation after {time_diff} h')
    ax[i].grid()
    fig.supxlabel("Mixed Layer Depth (MLD)")
    fig.supylabel("Estimation Value")
    fig.tight_layout()


In [None]:
fig.savefig('C:/Users/grosm/Desktop/OLS_estimation_final_model.pdf')

In [None]:
def fit_func(x, a, b) :
    return a*x + b
for var in list(coeffs[0].keys()) :
    var_mean = np.mean(np.column_stack([coeffs[i][var][0] for i in range(len(depids_with_mld))]),axis=1)
    if np.isin(var, ['peaks','duration']):
        popt1, _ = scipy.optimize.curve_fit(fit_func, list(range(1,25)), var_mean[:24])
        popt2, _ = scipy.optimize.curve_fit(fit_func, list(range(25,48)), var_mean[24:])
        fig, ax = plt.subplots()
        ax.scatter(list(range(1,48)), var_mean)
        ax.plot(list(range(1,25)), fit_func(np.array(list(range(1,25))), *popt1))
        ax.plot(list(range(25,48)), fit_func(np.array(list(range(25,48))), *popt2))
        print(popt1, popt2)
        continue
    popt,_ = scipy.optimize.curve_fit(fit_func, list(range(1,48)), var_mean)
    fig, ax = plt.subplots()
    ax.scatter(list(range(1,48)), var_mean)
    ax.plot(list(range(1,48)), fit_func(np.array(list(range(1,48))), *popt))
    print(popt)


In [None]:
fig.savefig('C:/Users/grosm/Desktop/OLS_model_all_vars.pdf')

In [None]:
params = ['peaks', 'duration', 'gradient', 'density', 'temp10', 'previous_mld']
model = MLDModel(path, depids_with_mld, params = params)
model.create_gust_dataframe(time_diff=15)
model.test_depid = ['ml17_280a']
model.ols_regression()

In [None]:
### GET ALL MODEL ESTIMATIONS FOR GIVEN TIME DIFF
params = ['peaks', 'duration', 'gradient', 'density', 'temp10', 'previous_mld']
model = MLDModel(path, depids_with_mld, params = params)
model.create_gust_dataframe(time_diff=15)
for depid in depids_with_mld :
    model.test_depid = [depid]
    model.ols_regression()
    #model.random_forest(plot = True)
    #model.linear_gam()
fig, ax = plt.subplots(figsize = (7,7))
ax.plot([0, 400], [0, 400], '--', c ='k')
for pred in ['OLS_pred'] : #, 'RF_pred', 'GAM_pred'] :
    print(pred, np.nanmean(abs(model.df.mld.to_numpy() - model.df[pred].to_numpy())))
    sns.kdeplot(model.df,x='mld',y=pred,ax = ax, alpha = 0.5, color = 'orange')
    sc = ax.scatter(model.df.mld, model.df[pred], label = pred, alpha = 0.6, s=5)
ax.grid()
ax.set_xlabel('MLD (m)')
ax.set_ylabel('MLD estimation (m)')
fig.tight_layout()

In [None]:
fig.savefig('C:/Users/grosm/Desktop/OLS_estimation.pdf')

In [None]:
scatter_wind_mld(model.df, {'var_mld':'MLD Variance', 'gradient':'Density gradient at MLD', 'temp10':'10m temperature', 'density':'MLD Density'})

In [None]:
fig, ax = plt.subplots(figsize = (9,9))
params = ['peaks', 'duration', 'gradient', 'density', 'temp10', 'previous_mld']
sns.heatmap(model.df[params + ['mld']].corr(), cmap = 'Reds', square = True, annot = True, ax = ax)
fig.tight_layout()
fig.savefig('C:/Users/grosm/Desktop/correlation_matrix.pdf')

In [None]:
params = ['gradient', 'peaks','temp10']
alg = 'OLS'
model = MLDModel(path, depids_with_mld, test_depid='ml18_294b',
                 norm = False, params = params, target = 'mld_diff',
                 find_peaks_params = {'prominence':0.9, 'height':0, 'distance':5})
model.create_gust_dataframe(time_diff=15)
model.df = model.df[model.df.temp10 < 4]
model.temporal_linear_regression(tmax = 48, model = alg)
labels  = {'peaks':'Maximum wind speed', 'duration':'Wind gust duration', 'density':'Density at MLD', 'gradient':'Gradient at MLD', 'previous_mld':'MLD during wind gust', 'temp10': '10m Temperature', 'const':'Constant'}
plot_regression_results(model, labels, model = alg)


In [None]:
plt.hist(model.df.gradient.to_numpy(), bins = 50)

In [None]:
model.params

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
import pandas as pd
model.params = ['peaks', 'previous_mld', 'const']

vif_data = pd.DataFrame()
vif_data["Feature"] = model.params[:-1]
vif_data["VIF"] = [variance_inflation_factor(model.train_df[model.params[:-1]], i) for i in range(model.train_df[model.params[:-1]].to_numpy().shape[1])]

print(vif_data)


In [None]:
import statsmodels.api as sm
data = sm.datasets.longley.load()

In [None]:
data.exog

In [None]:
### COMPUTE POLLARD'S LAW
x = np.arange(0,20,1)
#wind_stress = np.nanmean(1.225 * 0.0014 * df.peaks.to_numpy()**2)
wind_stress = 1.225 * 0.0014 * x**2
density = np.nanmean(df.density.to_numpy() + 1000)
N2 = np.nanmean(9.81 / 1027 * df.mld_gradient.to_numpy())
f = np.nanmean(2 * 7.2921 * 10e-5 * np.sin(df.latitude.to_numpy()))
t = np.arange(0, 48*3600, 3600)
t = 15*3600
h = np.sqrt(wind_stress / density) * (4*(1-np.cos(f*t)) / (N2 * f**2))**(0.25)
