# Pysindy with DEC data

In [1]:
import dctkit as dt
from dctkit import config
import os
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt
from dctkit.mesh import util
from dctkit.mesh.simplex import SimplicialComplex
from dctkit.dec import cochain as C
from dctkit.dec.vector import flat_PDP as flat
from matplotlib import cm
import pysindy as ps
from alpine.data.util import load_dataset
from alpine.data.burgers.burgers_dataset import data_path
from dctkit.physics import burgers as b
import math
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
#import gmsh

#gmsh.initialize()

## Data preparation

In [2]:
config()

In [3]:
# SPACE PARAMS
# spatial resolution
dx = 2**4/2**8
L = 2**4 + dx
dx_norm = dx/L
L_norm = 1
#  Number of spatial grid points
num_x_points = int(math.ceil(L / dx))
num_x_points_norm = num_x_points

# vector containing spatial points
#x = np.linspace(0, L, num_x_points)
x = np.linspace(-L/2, L/2, num_x_points)
x_circ = (x[:-1] + x[1:])/2

# initial velocity
#u_0 = 2 * np.exp(-2 * (x_circ - 0.5 * L)**2)
u_0 = 1* np.exp(-1 * (x_circ + 0.5 * L/4)**2)
umax = np.max(u_0)

# TIME PARAMS
T = 10
T_norm = T*umax/L
# temporal resolution
#dt = 10/2**12
dt = 10/2**9
dt_norm = dt*umax/L
# number of temporal grid points
num_t_points_norm = int(math.ceil(T_norm / dt_norm))
num_t_points = num_t_points_norm


t = np.linspace(0, T, num_t_points)
t_norm = np.linspace(0, T_norm, num_t_points_norm)

# Viscosity
#epsilon = 0.005*(L*umax)
epsilon = 0.1
#epsilon = 0.02
epsilon_norm = epsilon/(L*umax)
print(epsilon_norm)

nodes_BC = {'left': np.zeros(num_t_points_norm),
            'right': np.zeros(num_t_points_norm)}

0.006231811307617956


In [4]:
# generate mesh
# since the mesh points are a lot (> 1000) to not display warnings 
# we put expert mode
#gmsh.option.setNumber("General.ExpertMode", 1)
mesh, _ = util.generate_line_mesh(num_x_points, L, x_min=-L/2)
S = util.build_complex_from_mesh(mesh)
S.get_hodge_star()
S.get_flat_PDP_weights()
# get circ
circ = S.circ[1][:,0]

In [5]:
# construct full u (only for plotting and u_dot)
prb = b.Burgers(S, T, dt, u_0, nodes_BC, epsilon)
prb.run("parabolic")

In [6]:
u = prb.u
u_dot_true = prb.u_dot
#u_dot_true = ps.FiniteDifference(axis=1)._differentiate(u, t=dt)

## Extract results

In [7]:
def grid_search(threshold_list, alpha_list,pde_lib, train, val, l0 =0.001):
    t_training, u_training_reshape = train
    u_val_reshape, u_dot_val_true = val
    curr_best_error = 1000
    for threshold in threshold_list:
        for alpha in alpha_list:
            optimizer = ps.STLSQ(threshold=threshold, alpha=alpha, normalize_columns=True, verbose=False)
            model = ps.SINDy(feature_library=pde_lib, optimizer=optimizer)
            model.fit(u_training_reshape, t=t_training)
            u_dot_val = model.predict(u_val_reshape)[:,:,0] + l0*np.count_nonzero(model.coefficients())
            val_err = np.mean(np.linalg.norm(u_dot_val_true[:,:,0] - u_dot_val, axis=0)**2)
            if val_err < curr_best_error:
                curr_best_error = val_err
                best_params = (threshold, alpha)
    return curr_best_error, best_params

In [8]:
def prepare_data(time_idx, t_sample, u_T, u_dot_sample):
    # define u_training
    t = t_sample[time_idx]
    u = u_T.T
    u_dot_true = u_dot_sample[:, time_idx]

    # reshape u_training and u_dot_training
    u_reshaped = u.reshape(u.shape[0], u.shape[1], 1)
    u_dot_reshaped = u_dot_true.reshape(u_dot_true.shape[0], u_dot_true.shape[1], 1)

    return t, u_reshaped, u_dot_reshaped

In [9]:
def get_results(u, u_dot, skip_dxs, skip_dts, thresholds, alphas):
    for skip_dx in skip_dxs:
        for skip_dt in skip_dts:
            
            # define sub-sampled mesh and time interval
            t_sample = t[::skip_dt]
            circ_sample = circ[::skip_dx]
            
            u_sample = u[::skip_dx, ::skip_dt]
            u_dot_sample = u_dot_true[::skip_dx, ::skip_dt]
            
            # split the dataset in training+val and test set
            time_train_val, time_test, u_train_val_T, u_test_T = train_test_split(
                np.arange(len(t_sample)), u_sample.T, test_size=0.1, random_state=42, shuffle=False)
            # split training+val in training and validation set
            time_train, time_val, u_train_T, u_val_T = train_test_split(
                time_train_val, u_train_val_T, test_size=0.1, random_state=42, shuffle=False)


            # prepare data (reshaping and extracting info)
            training_data = prepare_data(time_train, t_sample, u_train_T, u_dot_sample)
            train_val_data = prepare_data(time_train_val, t_sample, u_train_val_T, u_dot_sample)
            val_data = prepare_data(time_val, t_sample, u_val_T, u_dot_sample)
            test_data = prepare_data(time_test, t_sample, u_test_T, u_dot_sample)
            t_training, u_training_reshaped, _ = training_data
            t_train_val, u_train_val_reshaped, _ = train_val_data
            _, u_val_reshaped, u_val_dot_reshaped = val_data
            _, u_test_reshaped, u_test_dot_reshaped = test_data
            

            # init pysindy
            library_functions = [lambda x: x, lambda x: x*x, lambda x: x*x*x]
            library_function_names = [lambda x: x, lambda x: x+x, lambda x: x+x+x]
            pde_lib = ps.PDELibrary(
                library_functions=library_functions,
                function_names=library_function_names,
                derivative_order=3,
                spatial_grid=circ_sample,
                is_uniform=True,
                include_bias =True)

            # launch GS
            train = (t_training, u_training_reshaped)
            val = (u_val_reshaped, u_val_dot_reshaped)
            curr_best_error, best_params = grid_search(thresholds, alphas, pde_lib, train,val)

            # training on train+val with best params
            optimizer = ps.STLSQ(threshold=best_params[0], alpha=best_params[1], normalize_columns=True, verbose=False)
            model = ps.SINDy(feature_library=pde_lib, optimizer=optimizer)
            model.fit(u_train_val_reshaped, t=t_train_val)
            
            #extract stats
            non_zero_idx = np.nonzero(model.coefficients()[0,:])[0]
            true_non_zero_idx = np.array([5,7])


            u_dot_test = model.predict(u_test_reshaped)[:,:,0]

            test_MSE = np.mean(np.linalg.norm(u_test_dot_reshaped[:,:,0] - u_dot_test, axis=0)**2)

            
            if len(non_zero_idx) == 2 and np.allclose(non_zero_idx, true_non_zero_idx):
                non_zero_coeffs = model.coefficients()[0, non_zero_idx]
                non_zero_coeffs[0] = (non_zero_coeffs[0] - epsilon)*100/0.1
                non_zero_coeffs[1] = (non_zero_coeffs[1] + 1)*100
                error = np.mean(np.abs(non_zero_coeffs))
                print(f"({2**8//skip_dx},{2**9//skip_dt}): Works! Coeffs error = {round(error,3)}, test error = {test_MSE}")
            else:
                print(f"({2**8//skip_dx},{2**9//skip_dt}): Doesn't work, test error = {test_MSE}")
        


In [10]:
skip_dxs = [2**0, 2**1, 2**2, 2**3]
skip_dts = [2**0, 2**1, 2**2, 2**3, 2**4, 2**5]
threshold_list = [1, 2.5, 5, 7.5, 10]
alpha_list = [1e-1, 1e-2,1e-3, 1e-4, 1e-5]
with warnings.catch_warnings(record=True):
    get_results(u, u_dot_true, skip_dxs, skip_dts, threshold_list, alpha_list)

(256,512): Works! Coeffs error = 0.952, test error = 1.4890818188907674e-05
(256,256): Works! Coeffs error = 0.971, test error = 1.5459472765688934e-05
(256,128): Works! Coeffs error = 1.028, test error = 1.7280243446959207e-05
(256,64): Works! Coeffs error = 1.261, test error = 2.2994463016950227e-05
(256,32): Works! Coeffs error = 2.084, test error = 3.750968265721926e-05
(256,16): Works! Coeffs error = 3.782, test error = 0.0006655205434750594
(128,512): Works! Coeffs error = 0.718, test error = 7.45128986467338e-06
(128,256): Works! Coeffs error = 0.671, test error = 7.518103767526679e-06
(128,128): Works! Coeffs error = 0.54, test error = 7.69799433664559e-06
(128,64): Works! Coeffs error = 0.664, test error = 8.444444037700486e-06
(128,32): Works! Coeffs error = 0.724, test error = 9.999519061251622e-06
(128,16): Works! Coeffs error = 4.169, test error = 0.00029707754696087056
(64,512): Works! Coeffs error = 6.112, test error = 3.229635205556469e-05
(64,256): Works! Coeffs error 